/src/gdal/gcore/rasterio.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: GDAL Core |
4 | | * Purpose: Contains default implementation of GDALRasterBand::IRasterIO() |
5 | | * and supporting functions of broader utility. |
6 | | * Author: Frank Warmerdam, warmerdam@pobox.com |
7 | | * |
8 | | ****************************************************************************** |
9 | | * Copyright (c) 1998, Frank Warmerdam |
10 | | * Copyright (c) 2007-2014, Even Rouault <even dot rouault at spatialys.com> |
11 | | * |
12 | | * SPDX-License-Identifier: MIT |
13 | | ****************************************************************************/ |
14 | | |
15 | | #include "cpl_port.h" |
16 | | #include "gdal.h" |
17 | | #include "gdal_priv.h" |
18 | | |
19 | | #include <cassert> |
20 | | #include <climits> |
21 | | #include <cmath> |
22 | | #include <cstddef> |
23 | | #include <cstdio> |
24 | | #include <cstdlib> |
25 | | #include <cstring> |
26 | | |
27 | | #include <algorithm> |
28 | | #include <limits> |
29 | | #include <stdexcept> |
30 | | #include <type_traits> |
31 | | |
32 | | #include "cpl_conv.h" |
33 | | #include "cpl_cpu_features.h" |
34 | | #include "cpl_error.h" |
35 | | #include "cpl_float.h" |
36 | | #include "cpl_progress.h" |
37 | | #include "cpl_string.h" |
38 | | #include "cpl_vsi.h" |
39 | | #include "gdal_priv_templates.hpp" |
40 | | #include "gdal_vrt.h" |
41 | | #include "gdalwarper.h" |
42 | | #include "memdataset.h" |
43 | | #include "vrtdataset.h" |
44 | | |
45 | | #if defined(__x86_64) || defined(_M_X64) |
46 | | #include <emmintrin.h> |
47 | | #define HAVE_SSE2 |
48 | | #elif defined(USE_NEON_OPTIMIZATIONS) |
49 | | #include "include_sse2neon.h" |
50 | | #define HAVE_SSE2 |
51 | | #endif |
52 | | |
53 | | #ifdef HAVE_SSSE3_AT_COMPILE_TIME |
54 | | #include "rasterio_ssse3.h" |
55 | | #ifdef __SSSE3__ |
56 | | #include <tmmintrin.h> |
57 | | #endif |
58 | | #endif |
59 | | |
60 | | static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData, |
61 | | int nSrcPixelStride, GByte *CPL_RESTRICT pDstData, |
62 | | int nDstPixelStride, GPtrDiff_t nWordCount); |
63 | | |
64 | | /************************************************************************/ |
65 | | /* DownsamplingIntegerXFactor() */ |
66 | | /************************************************************************/ |
67 | | |
68 | | template <bool bSameDataType, int DATA_TYPE_SIZE> |
69 | | static bool DownsamplingIntegerXFactor( |
70 | | GDALRasterBand *poBand, int iSrcX, int nSrcXInc, GPtrDiff_t iSrcOffsetCst, |
71 | | GByte *CPL_RESTRICT pabyDstData, int nPixelSpace, int nBufXSize, |
72 | | GDALDataType eDataType, GDALDataType eBufType, int &nStartBlockX, |
73 | | int nBlockXSize, GDALRasterBlock *&poBlock, int nLBlockY) |
74 | 0 | { |
75 | 0 | const int nBandDataSize = |
76 | 0 | bSameDataType ? DATA_TYPE_SIZE : GDALGetDataTypeSizeBytes(eDataType); |
77 | 0 | int nOuterLoopIters = nBufXSize - 1; |
78 | 0 | const int nIncSrcOffset = nSrcXInc * nBandDataSize; |
79 | 0 | const GByte *CPL_RESTRICT pabySrcData; |
80 | 0 | int nEndBlockX = nBlockXSize + nStartBlockX; |
81 | |
|
82 | 0 | if (iSrcX < nEndBlockX) |
83 | 0 | { |
84 | 0 | CPLAssert(poBlock); |
85 | 0 | goto no_reload_block; |
86 | 0 | } |
87 | 0 | goto reload_block; |
88 | | |
89 | | // Don't do the last iteration in the loop, as iSrcX might go beyond |
90 | | // nRasterXSize - 1 |
91 | 0 | while (--nOuterLoopIters >= 1) |
92 | 0 | { |
93 | 0 | iSrcX += nSrcXInc; |
94 | 0 | pabySrcData += nIncSrcOffset; |
95 | 0 | pabyDstData += nPixelSpace; |
96 | | |
97 | | /* -------------------------------------------------------------------- |
98 | | */ |
99 | | /* Ensure we have the appropriate block loaded. */ |
100 | | /* -------------------------------------------------------------------- |
101 | | */ |
102 | 0 | if (iSrcX >= nEndBlockX) |
103 | 0 | { |
104 | 0 | reload_block: |
105 | 0 | { |
106 | 0 | const int nLBlockX = iSrcX / nBlockXSize; |
107 | 0 | nStartBlockX = nLBlockX * nBlockXSize; |
108 | 0 | nEndBlockX = nStartBlockX + nBlockXSize; |
109 | |
|
110 | 0 | if (poBlock != nullptr) |
111 | 0 | poBlock->DropLock(); |
112 | |
|
113 | 0 | poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY, FALSE); |
114 | 0 | if (poBlock == nullptr) |
115 | 0 | { |
116 | 0 | return false; |
117 | 0 | } |
118 | 0 | } |
119 | | |
120 | 0 | no_reload_block: |
121 | 0 | const GByte *pabySrcBlock = |
122 | 0 | static_cast<const GByte *>(poBlock->GetDataRef()); |
123 | 0 | GPtrDiff_t iSrcOffset = |
124 | 0 | (iSrcX - nStartBlockX + iSrcOffsetCst) * nBandDataSize; |
125 | 0 | pabySrcData = pabySrcBlock + iSrcOffset; |
126 | 0 | } |
127 | | |
128 | | /* -------------------------------------------------------------------- |
129 | | */ |
130 | | /* Copy the maximum run of pixels. */ |
131 | | /* -------------------------------------------------------------------- |
132 | | */ |
133 | | |
134 | 0 | const int nIters = std::min( |
135 | 0 | (nEndBlockX - iSrcX + (nSrcXInc - 1)) / nSrcXInc, nOuterLoopIters); |
136 | 0 | if (bSameDataType) |
137 | 0 | { |
138 | 0 | memcpy(pabyDstData, pabySrcData, nBandDataSize); |
139 | 0 | if (nIters > 1) |
140 | 0 | { |
141 | 0 | if (DATA_TYPE_SIZE == 1) |
142 | 0 | { |
143 | 0 | pabySrcData += nIncSrcOffset; |
144 | 0 | pabyDstData += nPixelSpace; |
145 | 0 | GDALFastCopyByte(pabySrcData, nIncSrcOffset, pabyDstData, |
146 | 0 | nPixelSpace, nIters - 1); |
147 | 0 | pabySrcData += |
148 | 0 | static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 2); |
149 | 0 | pabyDstData += |
150 | 0 | static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 2); |
151 | 0 | } |
152 | 0 | else |
153 | 0 | { |
154 | 0 | for (int i = 0; i < nIters - 1; i++) |
155 | 0 | { |
156 | 0 | pabySrcData += nIncSrcOffset; |
157 | 0 | pabyDstData += nPixelSpace; |
158 | 0 | memcpy(pabyDstData, pabySrcData, nBandDataSize); |
159 | 0 | } |
160 | 0 | } |
161 | 0 | iSrcX += nSrcXInc * (nIters - 1); |
162 | 0 | nOuterLoopIters -= nIters - 1; |
163 | 0 | } |
164 | 0 | } |
165 | 0 | else |
166 | 0 | { |
167 | | // Type to type conversion ... |
168 | 0 | GDALCopyWords64(pabySrcData, eDataType, nIncSrcOffset, pabyDstData, |
169 | 0 | eBufType, nPixelSpace, std::max(1, nIters)); |
170 | 0 | if (nIters > 1) |
171 | 0 | { |
172 | 0 | pabySrcData += |
173 | 0 | static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 1); |
174 | 0 | pabyDstData += |
175 | 0 | static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 1); |
176 | 0 | iSrcX += nSrcXInc * (nIters - 1); |
177 | 0 | nOuterLoopIters -= nIters - 1; |
178 | 0 | } |
179 | 0 | } |
180 | 0 | } |
181 | | |
182 | | // Deal with last iteration to avoid iSrcX to go beyond nRasterXSize - 1 |
183 | 0 | if (nOuterLoopIters == 0) |
184 | 0 | { |
185 | 0 | const int nRasterXSize = poBand->GetXSize(); |
186 | 0 | iSrcX = |
187 | 0 | static_cast<int>(std::min(static_cast<GInt64>(iSrcX) + nSrcXInc, |
188 | 0 | static_cast<GInt64>(nRasterXSize - 1))); |
189 | 0 | pabyDstData += nPixelSpace; |
190 | 0 | if (iSrcX < nEndBlockX) |
191 | 0 | { |
192 | 0 | goto no_reload_block; |
193 | 0 | } |
194 | 0 | goto reload_block; |
195 | 0 | } |
196 | 0 | return true; |
197 | 0 | } Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 1>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int) Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 2>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int) Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 4>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int) Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 8>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int) Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 16>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int) Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<false, 0>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int) |
198 | | |
199 | | template <class A, class B> |
200 | | CPL_NOSANITIZE_UNSIGNED_INT_OVERFLOW inline auto CPLUnsanitizedMul(A a, B b) |
201 | 0 | { |
202 | 0 | return a * b; |
203 | 0 | } |
204 | | |
205 | | /************************************************************************/ |
206 | | /* IRasterIO() */ |
207 | | /* */ |
208 | | /* Default internal implementation of RasterIO() ... utilizes */ |
209 | | /* the Block access methods to satisfy the request. This would */ |
210 | | /* normally only be overridden by formats with overviews. */ |
211 | | /************************************************************************/ |
212 | | |
213 | | CPLErr GDALRasterBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff, |
214 | | int nXSize, int nYSize, void *pData, |
215 | | int nBufXSize, int nBufYSize, |
216 | | GDALDataType eBufType, GSpacing nPixelSpace, |
217 | | GSpacing nLineSpace, |
218 | | GDALRasterIOExtraArg *psExtraArg) |
219 | | |
220 | 0 | { |
221 | 0 | if (eRWFlag == GF_Write && eFlushBlockErr != CE_None) |
222 | 0 | { |
223 | 0 | CPLError(eFlushBlockErr, CPLE_AppDefined, |
224 | 0 | "An error occurred while writing a dirty block " |
225 | 0 | "from GDALRasterBand::IRasterIO"); |
226 | 0 | CPLErr eErr = eFlushBlockErr; |
227 | 0 | eFlushBlockErr = CE_None; |
228 | 0 | return eErr; |
229 | 0 | } |
230 | 0 | if (nBlockXSize <= 0 || nBlockYSize <= 0) |
231 | 0 | { |
232 | 0 | CPLError(CE_Failure, CPLE_AppDefined, "Invalid block size"); |
233 | 0 | return CE_Failure; |
234 | 0 | } |
235 | | |
236 | 0 | const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType); |
237 | 0 | const int nBufDataSize = GDALGetDataTypeSizeBytes(eBufType); |
238 | 0 | GByte dummyBlock[2] = {0, 0}; |
239 | 0 | GByte *pabySrcBlock = |
240 | 0 | dummyBlock; /* to avoid Coverity warning about nullptr dereference */ |
241 | 0 | GDALRasterBlock *poBlock = nullptr; |
242 | 0 | const bool bUseIntegerRequestCoords = |
243 | 0 | (!psExtraArg->bFloatingPointWindowValidity || |
244 | 0 | (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff && |
245 | 0 | nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize)); |
246 | | |
247 | | /* ==================================================================== */ |
248 | | /* A common case is the data requested with the destination */ |
249 | | /* is packed, and the block width is the raster width. */ |
250 | | /* ==================================================================== */ |
251 | 0 | if (nPixelSpace == nBufDataSize && nLineSpace == nPixelSpace * nXSize && |
252 | 0 | nBlockXSize == GetXSize() && nBufXSize == nXSize && |
253 | 0 | nBufYSize == nYSize && bUseIntegerRequestCoords) |
254 | 0 | { |
255 | 0 | CPLErr eErr = CE_None; |
256 | 0 | int nLBlockY = -1; |
257 | |
|
258 | 0 | for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++) |
259 | 0 | { |
260 | 0 | const int iSrcY = iBufYOff + nYOff; |
261 | |
|
262 | 0 | if (iSrcY < nLBlockY * nBlockYSize || |
263 | 0 | iSrcY - nBlockYSize >= nLBlockY * nBlockYSize) |
264 | 0 | { |
265 | 0 | nLBlockY = iSrcY / nBlockYSize; |
266 | 0 | bool bJustInitialize = |
267 | 0 | eRWFlag == GF_Write && nXOff == 0 && |
268 | 0 | nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize && |
269 | 0 | nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize; |
270 | | |
271 | | // Is this a partial tile at right and/or bottom edges of |
272 | | // the raster, and that is going to be completely written? |
273 | | // If so, do not load it from storage, but zero it so that |
274 | | // the content outsize of the validity area is initialized. |
275 | 0 | bool bMemZeroBuffer = false; |
276 | 0 | if (eRWFlag == GF_Write && !bJustInitialize && nXOff == 0 && |
277 | 0 | nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize && |
278 | 0 | nYOff + nYSize == GetYSize() && |
279 | 0 | nLBlockY * nBlockYSize > GetYSize() - nBlockYSize) |
280 | 0 | { |
281 | 0 | bJustInitialize = true; |
282 | 0 | bMemZeroBuffer = true; |
283 | 0 | } |
284 | |
|
285 | 0 | if (poBlock) |
286 | 0 | poBlock->DropLock(); |
287 | |
|
288 | 0 | const GUInt32 nErrorCounter = CPLGetErrorCounter(); |
289 | 0 | poBlock = GetLockedBlockRef(0, nLBlockY, bJustInitialize); |
290 | 0 | if (poBlock == nullptr) |
291 | 0 | { |
292 | 0 | if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") == |
293 | 0 | nullptr) |
294 | 0 | { |
295 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
296 | 0 | "GetBlockRef failed at X block offset %d, " |
297 | 0 | "Y block offset %d%s", |
298 | 0 | 0, nLBlockY, |
299 | 0 | (nErrorCounter != CPLGetErrorCounter()) |
300 | 0 | ? CPLSPrintf(": %s", CPLGetLastErrorMsg()) |
301 | 0 | : ""); |
302 | 0 | } |
303 | 0 | eErr = CE_Failure; |
304 | 0 | break; |
305 | 0 | } |
306 | | |
307 | 0 | if (eRWFlag == GF_Write) |
308 | 0 | poBlock->MarkDirty(); |
309 | |
|
310 | 0 | pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef()); |
311 | 0 | if (bMemZeroBuffer) |
312 | 0 | { |
313 | 0 | memset(pabySrcBlock, 0, |
314 | 0 | static_cast<GPtrDiff_t>(nBandDataSize) * |
315 | 0 | nBlockXSize * nBlockYSize); |
316 | 0 | } |
317 | 0 | } |
318 | | |
319 | 0 | const auto nSrcByteOffset = |
320 | 0 | (static_cast<GPtrDiff_t>(iSrcY - nLBlockY * nBlockYSize) * |
321 | 0 | nBlockXSize + |
322 | 0 | nXOff) * |
323 | 0 | nBandDataSize; |
324 | |
|
325 | 0 | if (eDataType == eBufType) |
326 | 0 | { |
327 | 0 | if (eRWFlag == GF_Read) |
328 | 0 | memcpy(static_cast<GByte *>(pData) + |
329 | 0 | static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace, |
330 | 0 | pabySrcBlock + nSrcByteOffset, |
331 | 0 | static_cast<size_t>(nLineSpace)); |
332 | 0 | else |
333 | 0 | memcpy(pabySrcBlock + nSrcByteOffset, |
334 | 0 | static_cast<GByte *>(pData) + |
335 | 0 | static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace, |
336 | 0 | static_cast<size_t>(nLineSpace)); |
337 | 0 | } |
338 | 0 | else |
339 | 0 | { |
340 | | // Type to type conversion. |
341 | 0 | if (eRWFlag == GF_Read) |
342 | 0 | GDALCopyWords64( |
343 | 0 | pabySrcBlock + nSrcByteOffset, eDataType, nBandDataSize, |
344 | 0 | static_cast<GByte *>(pData) + |
345 | 0 | static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace, |
346 | 0 | eBufType, static_cast<int>(nPixelSpace), nBufXSize); |
347 | 0 | else |
348 | 0 | GDALCopyWords64(static_cast<GByte *>(pData) + |
349 | 0 | static_cast<GPtrDiff_t>(iBufYOff) * |
350 | 0 | nLineSpace, |
351 | 0 | eBufType, static_cast<int>(nPixelSpace), |
352 | 0 | pabySrcBlock + nSrcByteOffset, eDataType, |
353 | 0 | nBandDataSize, nBufXSize); |
354 | 0 | } |
355 | |
|
356 | 0 | if (psExtraArg->pfnProgress != nullptr && |
357 | 0 | !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "", |
358 | 0 | psExtraArg->pProgressData)) |
359 | 0 | { |
360 | 0 | eErr = CE_Failure; |
361 | 0 | break; |
362 | 0 | } |
363 | 0 | } |
364 | |
|
365 | 0 | if (poBlock) |
366 | 0 | poBlock->DropLock(); |
367 | |
|
368 | 0 | return eErr; |
369 | 0 | } |
370 | | |
371 | | /* ==================================================================== */ |
372 | | /* Do we have overviews that would be appropriate to satisfy */ |
373 | | /* this request? */ |
374 | | /* ==================================================================== */ |
375 | 0 | if ((nBufXSize < nXSize || nBufYSize < nYSize) && GetOverviewCount() > 0 && |
376 | 0 | eRWFlag == GF_Read) |
377 | 0 | { |
378 | 0 | GDALRasterIOExtraArg sExtraArg; |
379 | 0 | GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg); |
380 | |
|
381 | 0 | const int nOverview = |
382 | 0 | GDALBandGetBestOverviewLevel2(this, nXOff, nYOff, nXSize, nYSize, |
383 | 0 | nBufXSize, nBufYSize, &sExtraArg); |
384 | 0 | if (nOverview >= 0) |
385 | 0 | { |
386 | 0 | GDALRasterBand *poOverviewBand = GetOverview(nOverview); |
387 | 0 | if (poOverviewBand == nullptr) |
388 | 0 | return CE_Failure; |
389 | | |
390 | 0 | return poOverviewBand->RasterIO( |
391 | 0 | eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize, |
392 | 0 | nBufYSize, eBufType, nPixelSpace, nLineSpace, &sExtraArg); |
393 | 0 | } |
394 | 0 | } |
395 | | |
396 | 0 | if (eRWFlag == GF_Read && nBufXSize < nXSize / 100 && |
397 | 0 | nBufYSize < nYSize / 100 && nPixelSpace == nBufDataSize && |
398 | 0 | nLineSpace == nPixelSpace * nBufXSize && |
399 | 0 | CPLTestBool(CPLGetConfigOption("GDAL_NO_COSTLY_OVERVIEW", "NO"))) |
400 | 0 | { |
401 | 0 | memset(pData, 0, static_cast<size_t>(nLineSpace * nBufYSize)); |
402 | 0 | return CE_None; |
403 | 0 | } |
404 | | |
405 | | /* ==================================================================== */ |
406 | | /* The second case when we don't need subsample data but likely */ |
407 | | /* need data type conversion. */ |
408 | | /* ==================================================================== */ |
409 | 0 | if ( // nPixelSpace == nBufDataSize && |
410 | 0 | nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords) |
411 | 0 | { |
412 | | #if DEBUG_VERBOSE |
413 | | printf("IRasterIO(%d,%d,%d,%d) rw=%d case 2\n", /*ok*/ |
414 | | nXOff, nYOff, nXSize, nYSize, static_cast<int>(eRWFlag)); |
415 | | #endif |
416 | | |
417 | | /* -------------------------------------------------------------------- |
418 | | */ |
419 | | /* Loop over buffer computing source locations. */ |
420 | | /* -------------------------------------------------------------------- |
421 | | */ |
422 | | // Calculate starting values out of loop |
423 | 0 | const int nLBlockXStart = nXOff / nBlockXSize; |
424 | 0 | const int nXSpanEnd = nBufXSize + nXOff; |
425 | |
|
426 | 0 | int nYInc = 0; |
427 | 0 | for (int iBufYOff = 0, iSrcY = nYOff; iBufYOff < nBufYSize; |
428 | 0 | iBufYOff += nYInc, iSrcY += nYInc) |
429 | 0 | { |
430 | 0 | GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) * |
431 | 0 | static_cast<GPtrDiff_t>(nLineSpace); |
432 | 0 | int nLBlockY = iSrcY / nBlockYSize; |
433 | 0 | int nLBlockX = nLBlockXStart; |
434 | 0 | int iSrcX = nXOff; |
435 | 0 | while (iSrcX < nXSpanEnd) |
436 | 0 | { |
437 | 0 | int nXSpan = nLBlockX * nBlockXSize; |
438 | 0 | if (nXSpan < INT_MAX - nBlockXSize) |
439 | 0 | nXSpan += nBlockXSize; |
440 | 0 | else |
441 | 0 | nXSpan = INT_MAX; |
442 | 0 | const int nXRight = nXSpan; |
443 | 0 | nXSpan = (nXSpan < nXSpanEnd ? nXSpan : nXSpanEnd) - iSrcX; |
444 | |
|
445 | 0 | const size_t nXSpanSize = |
446 | 0 | CPLUnsanitizedMul(nXSpan, static_cast<size_t>(nPixelSpace)); |
447 | |
|
448 | 0 | bool bJustInitialize = |
449 | 0 | eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize && |
450 | 0 | nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize && |
451 | 0 | nXOff <= nLBlockX * nBlockXSize && |
452 | 0 | nXOff + nXSize >= nXRight; |
453 | | |
454 | | // Is this a partial tile at right and/or bottom edges of |
455 | | // the raster, and that is going to be completely written? |
456 | | // If so, do not load it from storage, but zero it so that |
457 | | // the content outsize of the validity area is initialized. |
458 | 0 | bool bMemZeroBuffer = false; |
459 | 0 | if (eRWFlag == GF_Write && !bJustInitialize && |
460 | 0 | nXOff <= nLBlockX * nBlockXSize && |
461 | 0 | nYOff <= nLBlockY * nBlockYSize && |
462 | 0 | (nXOff + nXSize >= nXRight || |
463 | | // cppcheck-suppress knownConditionTrueFalse |
464 | 0 | (nXOff + nXSize == GetXSize() && nXRight > GetXSize())) && |
465 | 0 | (nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize || |
466 | 0 | (nYOff + nYSize == GetYSize() && |
467 | 0 | nLBlockY * nBlockYSize > GetYSize() - nBlockYSize))) |
468 | 0 | { |
469 | 0 | bJustInitialize = true; |
470 | 0 | bMemZeroBuffer = true; |
471 | 0 | } |
472 | | |
473 | | /* -------------------------------------------------------------------- |
474 | | */ |
475 | | /* Ensure we have the appropriate block loaded. */ |
476 | | /* -------------------------------------------------------------------- |
477 | | */ |
478 | 0 | const GUInt32 nErrorCounter = CPLGetErrorCounter(); |
479 | 0 | poBlock = |
480 | 0 | GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize); |
481 | 0 | if (!poBlock) |
482 | 0 | { |
483 | 0 | if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") == |
484 | 0 | nullptr) |
485 | 0 | { |
486 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
487 | 0 | "GetBlockRef failed at X block offset %d, " |
488 | 0 | "Y block offset %d%s", |
489 | 0 | nLBlockX, nLBlockY, |
490 | 0 | (nErrorCounter != CPLGetErrorCounter()) |
491 | 0 | ? CPLSPrintf(": %s", CPLGetLastErrorMsg()) |
492 | 0 | : ""); |
493 | 0 | } |
494 | 0 | return (CE_Failure); |
495 | 0 | } |
496 | | |
497 | 0 | if (eRWFlag == GF_Write) |
498 | 0 | poBlock->MarkDirty(); |
499 | |
|
500 | 0 | pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef()); |
501 | 0 | if (bMemZeroBuffer) |
502 | 0 | { |
503 | 0 | memset(pabySrcBlock, 0, |
504 | 0 | static_cast<GPtrDiff_t>(nBandDataSize) * |
505 | 0 | nBlockXSize * nBlockYSize); |
506 | 0 | } |
507 | | /* -------------------------------------------------------------------- |
508 | | */ |
509 | | /* Copy over this chunk of data. */ |
510 | | /* -------------------------------------------------------------------- |
511 | | */ |
512 | 0 | GPtrDiff_t iSrcOffset = |
513 | 0 | (static_cast<GPtrDiff_t>(iSrcX) - |
514 | 0 | static_cast<GPtrDiff_t>(nLBlockX * nBlockXSize) + |
515 | 0 | (static_cast<GPtrDiff_t>(iSrcY) - |
516 | 0 | static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) * |
517 | 0 | nBlockXSize) * |
518 | 0 | nBandDataSize; |
519 | | // Fill up as many rows as possible for the loaded block. |
520 | 0 | const int kmax = std::min(nBlockYSize - (iSrcY % nBlockYSize), |
521 | 0 | nBufYSize - iBufYOff); |
522 | 0 | for (int k = 0; k < kmax; k++) |
523 | 0 | { |
524 | 0 | if (eDataType == eBufType && nPixelSpace == nBufDataSize) |
525 | 0 | { |
526 | 0 | if (eRWFlag == GF_Read) |
527 | 0 | memcpy(static_cast<GByte *>(pData) + iBufOffset + |
528 | 0 | static_cast<GPtrDiff_t>(k) * nLineSpace, |
529 | 0 | pabySrcBlock + iSrcOffset, nXSpanSize); |
530 | 0 | else |
531 | 0 | memcpy(pabySrcBlock + iSrcOffset, |
532 | 0 | static_cast<GByte *>(pData) + iBufOffset + |
533 | 0 | static_cast<GPtrDiff_t>(k) * nLineSpace, |
534 | 0 | nXSpanSize); |
535 | 0 | } |
536 | 0 | else |
537 | 0 | { |
538 | | /* type to type conversion */ |
539 | 0 | if (eRWFlag == GF_Read) |
540 | 0 | GDALCopyWords64( |
541 | 0 | pabySrcBlock + iSrcOffset, eDataType, |
542 | 0 | nBandDataSize, |
543 | 0 | static_cast<GByte *>(pData) + iBufOffset + |
544 | 0 | static_cast<GPtrDiff_t>(k) * nLineSpace, |
545 | 0 | eBufType, static_cast<int>(nPixelSpace), |
546 | 0 | nXSpan); |
547 | 0 | else |
548 | 0 | GDALCopyWords64( |
549 | 0 | static_cast<GByte *>(pData) + iBufOffset + |
550 | 0 | static_cast<GPtrDiff_t>(k) * nLineSpace, |
551 | 0 | eBufType, static_cast<int>(nPixelSpace), |
552 | 0 | pabySrcBlock + iSrcOffset, eDataType, |
553 | 0 | nBandDataSize, nXSpan); |
554 | 0 | } |
555 | |
|
556 | 0 | iSrcOffset += |
557 | 0 | static_cast<GPtrDiff_t>(nBlockXSize) * nBandDataSize; |
558 | 0 | } |
559 | |
|
560 | 0 | iBufOffset = |
561 | 0 | CPLUnsanitizedAdd<GPtrDiff_t>(iBufOffset, nXSpanSize); |
562 | 0 | nLBlockX++; |
563 | 0 | iSrcX += nXSpan; |
564 | |
|
565 | 0 | poBlock->DropLock(); |
566 | 0 | poBlock = nullptr; |
567 | 0 | } |
568 | | |
569 | | /* Compute the increment to go on a block boundary */ |
570 | 0 | nYInc = nBlockYSize - (iSrcY % nBlockYSize); |
571 | |
|
572 | 0 | if (psExtraArg->pfnProgress != nullptr && |
573 | 0 | !psExtraArg->pfnProgress( |
574 | 0 | 1.0 * std::min(nBufYSize, iBufYOff + nYInc) / nBufYSize, "", |
575 | 0 | psExtraArg->pProgressData)) |
576 | 0 | { |
577 | 0 | return CE_Failure; |
578 | 0 | } |
579 | 0 | } |
580 | | |
581 | 0 | return CE_None; |
582 | 0 | } |
583 | | |
584 | | /* ==================================================================== */ |
585 | | /* Loop reading required source blocks to satisfy output */ |
586 | | /* request. This is the most general implementation. */ |
587 | | /* ==================================================================== */ |
588 | | |
589 | 0 | double dfXOff = nXOff; |
590 | 0 | double dfYOff = nYOff; |
591 | 0 | double dfXSize = nXSize; |
592 | 0 | double dfYSize = nYSize; |
593 | 0 | if (psExtraArg->bFloatingPointWindowValidity) |
594 | 0 | { |
595 | 0 | dfXOff = psExtraArg->dfXOff; |
596 | 0 | dfYOff = psExtraArg->dfYOff; |
597 | 0 | dfXSize = psExtraArg->dfXSize; |
598 | 0 | dfYSize = psExtraArg->dfYSize; |
599 | 0 | } |
600 | | |
601 | | /* -------------------------------------------------------------------- */ |
602 | | /* Compute stepping increment. */ |
603 | | /* -------------------------------------------------------------------- */ |
604 | 0 | const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize); |
605 | 0 | const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize); |
606 | 0 | CPLErr eErr = CE_None; |
607 | |
|
608 | 0 | if (eRWFlag == GF_Write) |
609 | 0 | { |
610 | | /* -------------------------------------------------------------------- |
611 | | */ |
612 | | /* Write case */ |
613 | | /* Loop over raster window computing source locations in the buffer. |
614 | | */ |
615 | | /* -------------------------------------------------------------------- |
616 | | */ |
617 | 0 | GByte *pabyDstBlock = nullptr; |
618 | 0 | int nLBlockX = -1; |
619 | 0 | int nLBlockY = -1; |
620 | |
|
621 | 0 | for (int iDstY = nYOff; iDstY < nYOff + nYSize; iDstY++) |
622 | 0 | { |
623 | 0 | const int iBufYOff = static_cast<int>((iDstY - nYOff) / dfSrcYInc); |
624 | |
|
625 | 0 | for (int iDstX = nXOff; iDstX < nXOff + nXSize; iDstX++) |
626 | 0 | { |
627 | 0 | const int iBufXOff = |
628 | 0 | static_cast<int>((iDstX - nXOff) / dfSrcXInc); |
629 | 0 | GPtrDiff_t iBufOffset = |
630 | 0 | static_cast<GPtrDiff_t>(iBufYOff) * |
631 | 0 | static_cast<GPtrDiff_t>(nLineSpace) + |
632 | 0 | iBufXOff * static_cast<GPtrDiff_t>(nPixelSpace); |
633 | | |
634 | | // FIXME: this code likely doesn't work if the dirty block gets |
635 | | // flushed to disk before being completely written. |
636 | | // In the meantime, bJustInitialize should probably be set to |
637 | | // FALSE even if it is not ideal performance wise, and for |
638 | | // lossy compression. |
639 | | |
640 | | /* -------------------------------------------------------------------- |
641 | | */ |
642 | | /* Ensure we have the appropriate block loaded. */ |
643 | | /* -------------------------------------------------------------------- |
644 | | */ |
645 | 0 | if (iDstX < nLBlockX * nBlockXSize || |
646 | 0 | iDstX - nBlockXSize >= nLBlockX * nBlockXSize || |
647 | 0 | iDstY < nLBlockY * nBlockYSize || |
648 | 0 | iDstY - nBlockYSize >= nLBlockY * nBlockYSize) |
649 | 0 | { |
650 | 0 | nLBlockX = iDstX / nBlockXSize; |
651 | 0 | nLBlockY = iDstY / nBlockYSize; |
652 | |
|
653 | 0 | const bool bJustInitialize = |
654 | 0 | nYOff <= nLBlockY * nBlockYSize && |
655 | 0 | nYOff + nYSize - nBlockYSize >= |
656 | 0 | nLBlockY * nBlockYSize && |
657 | 0 | nXOff <= nLBlockX * nBlockXSize && |
658 | 0 | nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize; |
659 | | /*bool bMemZeroBuffer = FALSE; |
660 | | if( !bJustInitialize && |
661 | | nXOff <= nLBlockX * nBlockXSize && |
662 | | nYOff <= nLBlockY * nBlockYSize && |
663 | | (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize || |
664 | | (nXOff + nXSize == GetXSize() && |
665 | | (nLBlockX+1) * nBlockXSize > GetXSize())) && |
666 | | (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize || |
667 | | (nYOff + nYSize == GetYSize() && |
668 | | (nLBlockY+1) * nBlockYSize > GetYSize())) ) |
669 | | { |
670 | | bJustInitialize = TRUE; |
671 | | bMemZeroBuffer = TRUE; |
672 | | }*/ |
673 | 0 | if (poBlock != nullptr) |
674 | 0 | poBlock->DropLock(); |
675 | |
|
676 | 0 | poBlock = |
677 | 0 | GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize); |
678 | 0 | if (poBlock == nullptr) |
679 | 0 | { |
680 | 0 | return (CE_Failure); |
681 | 0 | } |
682 | | |
683 | 0 | poBlock->MarkDirty(); |
684 | |
|
685 | 0 | pabyDstBlock = static_cast<GByte *>(poBlock->GetDataRef()); |
686 | | /*if( bMemZeroBuffer ) |
687 | | { |
688 | | memset(pabyDstBlock, 0, |
689 | | static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize |
690 | | * nBlockYSize); |
691 | | }*/ |
692 | 0 | } |
693 | | |
694 | | // To make Coverity happy. Should not happen by design. |
695 | 0 | if (pabyDstBlock == nullptr) |
696 | 0 | { |
697 | 0 | CPLAssert(false); |
698 | 0 | eErr = CE_Failure; |
699 | 0 | break; |
700 | 0 | } |
701 | | |
702 | | /* -------------------------------------------------------------------- |
703 | | */ |
704 | | /* Copy over this pixel of data. */ |
705 | | /* -------------------------------------------------------------------- |
706 | | */ |
707 | 0 | GPtrDiff_t iDstOffset = |
708 | 0 | (static_cast<GPtrDiff_t>(iDstX) - |
709 | 0 | static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize + |
710 | 0 | (static_cast<GPtrDiff_t>(iDstY) - |
711 | 0 | static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) * |
712 | 0 | nBlockXSize) * |
713 | 0 | nBandDataSize; |
714 | |
|
715 | 0 | if (eDataType == eBufType) |
716 | 0 | { |
717 | 0 | memcpy(pabyDstBlock + iDstOffset, |
718 | 0 | static_cast<GByte *>(pData) + iBufOffset, |
719 | 0 | nBandDataSize); |
720 | 0 | } |
721 | 0 | else |
722 | 0 | { |
723 | | /* type to type conversion ... ouch, this is expensive way |
724 | | of handling single words */ |
725 | 0 | GDALCopyWords64(static_cast<GByte *>(pData) + iBufOffset, |
726 | 0 | eBufType, 0, pabyDstBlock + iDstOffset, |
727 | 0 | eDataType, 0, 1); |
728 | 0 | } |
729 | 0 | } |
730 | | |
731 | 0 | if (psExtraArg->pfnProgress != nullptr && |
732 | 0 | !psExtraArg->pfnProgress(1.0 * (iDstY - nYOff + 1) / nYSize, "", |
733 | 0 | psExtraArg->pProgressData)) |
734 | 0 | { |
735 | 0 | eErr = CE_Failure; |
736 | 0 | break; |
737 | 0 | } |
738 | 0 | } |
739 | 0 | } |
740 | 0 | else |
741 | 0 | { |
742 | 0 | if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour) |
743 | 0 | { |
744 | 0 | if ((psExtraArg->eResampleAlg == GRIORA_Cubic || |
745 | 0 | psExtraArg->eResampleAlg == GRIORA_CubicSpline || |
746 | 0 | psExtraArg->eResampleAlg == GRIORA_Bilinear || |
747 | 0 | psExtraArg->eResampleAlg == GRIORA_Lanczos) && |
748 | 0 | GetColorTable() != nullptr) |
749 | 0 | { |
750 | 0 | CPLError(CE_Warning, CPLE_NotSupported, |
751 | 0 | "Resampling method not supported on paletted band. " |
752 | 0 | "Falling back to nearest neighbour"); |
753 | 0 | } |
754 | 0 | else if (psExtraArg->eResampleAlg == GRIORA_Gauss && |
755 | 0 | GDALDataTypeIsComplex(eDataType)) |
756 | 0 | { |
757 | 0 | CPLError(CE_Warning, CPLE_NotSupported, |
758 | 0 | "Resampling method not supported on complex data type " |
759 | 0 | "band. Falling back to nearest neighbour"); |
760 | 0 | } |
761 | 0 | else |
762 | 0 | { |
763 | 0 | return RasterIOResampled(eRWFlag, nXOff, nYOff, nXSize, nYSize, |
764 | 0 | pData, nBufXSize, nBufYSize, eBufType, |
765 | 0 | nPixelSpace, nLineSpace, psExtraArg); |
766 | 0 | } |
767 | 0 | } |
768 | | |
769 | 0 | int nLimitBlockY = 0; |
770 | 0 | const bool bByteCopy = eDataType == eBufType && nBandDataSize == 1; |
771 | 0 | int nStartBlockX = -nBlockXSize; |
772 | 0 | const double EPS = 1e-10; |
773 | 0 | int nLBlockY = -1; |
774 | 0 | const double dfSrcXStart = 0.5 * dfSrcXInc + dfXOff + EPS; |
775 | 0 | const bool bIntegerXFactor = |
776 | 0 | bUseIntegerRequestCoords && |
777 | 0 | static_cast<int>(dfSrcXInc) == dfSrcXInc && |
778 | 0 | static_cast<int>(dfSrcXInc) < INT_MAX / nBandDataSize; |
779 | | |
780 | | /* -------------------------------------------------------------------- |
781 | | */ |
782 | | /* Read case */ |
783 | | /* Loop over buffer computing source locations. */ |
784 | | /* -------------------------------------------------------------------- |
785 | | */ |
786 | 0 | for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++) |
787 | 0 | { |
788 | | // Add small epsilon to avoid some numeric precision issues. |
789 | 0 | const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS; |
790 | 0 | const int iSrcY = static_cast<int>(std::min( |
791 | 0 | std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1))); |
792 | |
|
793 | 0 | GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) * |
794 | 0 | static_cast<GPtrDiff_t>(nLineSpace); |
795 | |
|
796 | 0 | if (iSrcY >= nLimitBlockY) |
797 | 0 | { |
798 | 0 | nLBlockY = iSrcY / nBlockYSize; |
799 | 0 | nLimitBlockY = nLBlockY * nBlockYSize; |
800 | 0 | if (nLimitBlockY < INT_MAX - nBlockYSize) |
801 | 0 | nLimitBlockY += nBlockYSize; |
802 | 0 | else |
803 | 0 | nLimitBlockY = INT_MAX; |
804 | | // Make sure a new block is loaded. |
805 | 0 | nStartBlockX = -nBlockXSize; |
806 | 0 | } |
807 | 0 | else if (static_cast<int>(dfSrcXStart) < nStartBlockX) |
808 | 0 | { |
809 | | // Make sure a new block is loaded. |
810 | 0 | nStartBlockX = -nBlockXSize; |
811 | 0 | } |
812 | |
|
813 | 0 | GPtrDiff_t iSrcOffsetCst = (iSrcY - nLBlockY * nBlockYSize) * |
814 | 0 | static_cast<GPtrDiff_t>(nBlockXSize); |
815 | |
|
816 | 0 | if (bIntegerXFactor) |
817 | 0 | { |
818 | 0 | int iSrcX = static_cast<int>(dfSrcXStart); |
819 | 0 | const int nSrcXInc = static_cast<int>(dfSrcXInc); |
820 | 0 | GByte *pabyDstData = static_cast<GByte *>(pData) + iBufOffset; |
821 | 0 | bool bRet = false; |
822 | 0 | if (bByteCopy) |
823 | 0 | { |
824 | 0 | bRet = DownsamplingIntegerXFactor<true, 1>( |
825 | 0 | this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData, |
826 | 0 | static_cast<int>(nPixelSpace), nBufXSize, GDT_Byte, |
827 | 0 | GDT_Byte, nStartBlockX, nBlockXSize, poBlock, nLBlockY); |
828 | 0 | } |
829 | 0 | else if (eDataType == eBufType) |
830 | 0 | { |
831 | 0 | switch (nBandDataSize) |
832 | 0 | { |
833 | 0 | case 2: |
834 | 0 | bRet = DownsamplingIntegerXFactor<true, 2>( |
835 | 0 | this, iSrcX, nSrcXInc, iSrcOffsetCst, |
836 | 0 | pabyDstData, static_cast<int>(nPixelSpace), |
837 | 0 | nBufXSize, eDataType, eDataType, nStartBlockX, |
838 | 0 | nBlockXSize, poBlock, nLBlockY); |
839 | 0 | break; |
840 | 0 | case 4: |
841 | 0 | bRet = DownsamplingIntegerXFactor<true, 4>( |
842 | 0 | this, iSrcX, nSrcXInc, iSrcOffsetCst, |
843 | 0 | pabyDstData, static_cast<int>(nPixelSpace), |
844 | 0 | nBufXSize, eDataType, eDataType, nStartBlockX, |
845 | 0 | nBlockXSize, poBlock, nLBlockY); |
846 | 0 | break; |
847 | 0 | case 8: |
848 | 0 | bRet = DownsamplingIntegerXFactor<true, 8>( |
849 | 0 | this, iSrcX, nSrcXInc, iSrcOffsetCst, |
850 | 0 | pabyDstData, static_cast<int>(nPixelSpace), |
851 | 0 | nBufXSize, eDataType, eDataType, nStartBlockX, |
852 | 0 | nBlockXSize, poBlock, nLBlockY); |
853 | 0 | break; |
854 | 0 | case 16: |
855 | 0 | bRet = DownsamplingIntegerXFactor<true, 16>( |
856 | 0 | this, iSrcX, nSrcXInc, iSrcOffsetCst, |
857 | 0 | pabyDstData, static_cast<int>(nPixelSpace), |
858 | 0 | nBufXSize, eDataType, eDataType, nStartBlockX, |
859 | 0 | nBlockXSize, poBlock, nLBlockY); |
860 | 0 | break; |
861 | 0 | default: |
862 | 0 | CPLAssert(false); |
863 | 0 | break; |
864 | 0 | } |
865 | 0 | } |
866 | 0 | else |
867 | 0 | { |
868 | 0 | bRet = DownsamplingIntegerXFactor<false, 0>( |
869 | 0 | this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData, |
870 | 0 | static_cast<int>(nPixelSpace), nBufXSize, eDataType, |
871 | 0 | eBufType, nStartBlockX, nBlockXSize, poBlock, nLBlockY); |
872 | 0 | } |
873 | 0 | if (!bRet) |
874 | 0 | eErr = CE_Failure; |
875 | 0 | } |
876 | 0 | else |
877 | 0 | { |
878 | 0 | double dfSrcX = dfSrcXStart; |
879 | 0 | for (int iBufXOff = 0; iBufXOff < nBufXSize; |
880 | 0 | iBufXOff++, dfSrcX += dfSrcXInc) |
881 | 0 | { |
882 | | // TODO?: try to avoid the clamping for most iterations |
883 | 0 | const int iSrcX = static_cast<int>( |
884 | 0 | std::min(std::max(0.0, dfSrcX), |
885 | 0 | static_cast<double>(nRasterXSize - 1))); |
886 | | |
887 | | /* -------------------------------------------------------------------- |
888 | | */ |
889 | | /* Ensure we have the appropriate block loaded. */ |
890 | | /* -------------------------------------------------------------------- |
891 | | */ |
892 | 0 | if (iSrcX >= nBlockXSize + nStartBlockX) |
893 | 0 | { |
894 | 0 | const int nLBlockX = iSrcX / nBlockXSize; |
895 | 0 | nStartBlockX = nLBlockX * nBlockXSize; |
896 | |
|
897 | 0 | if (poBlock != nullptr) |
898 | 0 | poBlock->DropLock(); |
899 | |
|
900 | 0 | poBlock = GetLockedBlockRef(nLBlockX, nLBlockY, FALSE); |
901 | 0 | if (poBlock == nullptr) |
902 | 0 | { |
903 | 0 | eErr = CE_Failure; |
904 | 0 | break; |
905 | 0 | } |
906 | | |
907 | 0 | pabySrcBlock = |
908 | 0 | static_cast<GByte *>(poBlock->GetDataRef()); |
909 | 0 | } |
910 | 0 | const GPtrDiff_t nDiffX = |
911 | 0 | static_cast<GPtrDiff_t>(iSrcX - nStartBlockX); |
912 | | |
913 | | /* -------------------------------------------------------------------- |
914 | | */ |
915 | | /* Copy over this pixel of data. */ |
916 | | /* -------------------------------------------------------------------- |
917 | | */ |
918 | |
|
919 | 0 | if (bByteCopy) |
920 | 0 | { |
921 | 0 | GPtrDiff_t iSrcOffset = nDiffX + iSrcOffsetCst; |
922 | 0 | static_cast<GByte *>(pData)[iBufOffset] = |
923 | 0 | pabySrcBlock[iSrcOffset]; |
924 | 0 | } |
925 | 0 | else if (eDataType == eBufType) |
926 | 0 | { |
927 | 0 | GPtrDiff_t iSrcOffset = |
928 | 0 | (nDiffX + iSrcOffsetCst) * nBandDataSize; |
929 | 0 | memcpy(static_cast<GByte *>(pData) + iBufOffset, |
930 | 0 | pabySrcBlock + iSrcOffset, nBandDataSize); |
931 | 0 | } |
932 | 0 | else |
933 | 0 | { |
934 | | // Type to type conversion ... |
935 | 0 | GPtrDiff_t iSrcOffset = |
936 | 0 | (nDiffX + iSrcOffsetCst) * nBandDataSize; |
937 | 0 | GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0, |
938 | 0 | static_cast<GByte *>(pData) + |
939 | 0 | iBufOffset, |
940 | 0 | eBufType, 0, 1); |
941 | 0 | } |
942 | |
|
943 | 0 | iBufOffset += static_cast<int>(nPixelSpace); |
944 | 0 | } |
945 | 0 | } |
946 | 0 | if (eErr == CE_Failure) |
947 | 0 | break; |
948 | | |
949 | 0 | if (psExtraArg->pfnProgress != nullptr && |
950 | 0 | !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "", |
951 | 0 | psExtraArg->pProgressData)) |
952 | 0 | { |
953 | 0 | eErr = CE_Failure; |
954 | 0 | break; |
955 | 0 | } |
956 | 0 | } |
957 | 0 | } |
958 | | |
959 | 0 | if (poBlock != nullptr) |
960 | 0 | poBlock->DropLock(); |
961 | |
|
962 | 0 | return eErr; |
963 | 0 | } |
964 | | |
965 | | /************************************************************************/ |
966 | | /* GDALRasterIOTransformer() */ |
967 | | /************************************************************************/ |
968 | | |
969 | | struct GDALRasterIOTransformerStruct |
970 | | { |
971 | | double dfXOff; |
972 | | double dfYOff; |
973 | | double dfXRatioDstToSrc; |
974 | | double dfYRatioDstToSrc; |
975 | | }; |
976 | | |
977 | | static int GDALRasterIOTransformer(void *pTransformerArg, int bDstToSrc, |
978 | | int nPointCount, double *x, double *y, |
979 | | double * /* z */, int *panSuccess) |
980 | 0 | { |
981 | 0 | GDALRasterIOTransformerStruct *psParams = |
982 | 0 | static_cast<GDALRasterIOTransformerStruct *>(pTransformerArg); |
983 | 0 | if (bDstToSrc) |
984 | 0 | { |
985 | 0 | for (int i = 0; i < nPointCount; i++) |
986 | 0 | { |
987 | 0 | x[i] = x[i] * psParams->dfXRatioDstToSrc + psParams->dfXOff; |
988 | 0 | y[i] = y[i] * psParams->dfYRatioDstToSrc + psParams->dfYOff; |
989 | 0 | panSuccess[i] = TRUE; |
990 | 0 | } |
991 | 0 | } |
992 | 0 | else |
993 | 0 | { |
994 | 0 | for (int i = 0; i < nPointCount; i++) |
995 | 0 | { |
996 | 0 | x[i] = (x[i] - psParams->dfXOff) / psParams->dfXRatioDstToSrc; |
997 | 0 | y[i] = (y[i] - psParams->dfYOff) / psParams->dfYRatioDstToSrc; |
998 | 0 | panSuccess[i] = TRUE; |
999 | 0 | } |
1000 | 0 | } |
1001 | 0 | return TRUE; |
1002 | 0 | } |
1003 | | |
1004 | | /************************************************************************/ |
1005 | | /* RasterIOResampled() */ |
1006 | | /************************************************************************/ |
1007 | | |
1008 | | //! @cond Doxygen_Suppress |
1009 | | CPLErr GDALRasterBand::RasterIOResampled( |
1010 | | GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize, |
1011 | | void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType, |
1012 | | GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg) |
1013 | 0 | { |
1014 | | // Determine if we use warping resampling or overview resampling |
1015 | 0 | const bool bUseWarp = |
1016 | 0 | (GDALDataTypeIsComplex(eDataType) && |
1017 | 0 | psExtraArg->eResampleAlg != GRIORA_NearestNeighbour && |
1018 | 0 | psExtraArg->eResampleAlg != GRIORA_Mode); |
1019 | |
|
1020 | 0 | double dfXOff = nXOff; |
1021 | 0 | double dfYOff = nYOff; |
1022 | 0 | double dfXSize = nXSize; |
1023 | 0 | double dfYSize = nYSize; |
1024 | 0 | if (psExtraArg->bFloatingPointWindowValidity) |
1025 | 0 | { |
1026 | 0 | dfXOff = psExtraArg->dfXOff; |
1027 | 0 | dfYOff = psExtraArg->dfYOff; |
1028 | 0 | dfXSize = psExtraArg->dfXSize; |
1029 | 0 | dfYSize = psExtraArg->dfYSize; |
1030 | 0 | } |
1031 | |
|
1032 | 0 | const double dfXRatioDstToSrc = dfXSize / nBufXSize; |
1033 | 0 | const double dfYRatioDstToSrc = dfYSize / nBufYSize; |
1034 | | |
1035 | | // Determine the coordinates in the "virtual" output raster to see |
1036 | | // if there are not integers, in which case we will use them as a shift |
1037 | | // so that subwindow extracts give the exact same results as entire raster |
1038 | | // scaling. |
1039 | 0 | double dfDestXOff = dfXOff / dfXRatioDstToSrc; |
1040 | 0 | bool bHasXOffVirtual = false; |
1041 | 0 | int nDestXOffVirtual = 0; |
1042 | 0 | if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8) |
1043 | 0 | { |
1044 | 0 | bHasXOffVirtual = true; |
1045 | 0 | dfXOff = nXOff; |
1046 | 0 | nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5); |
1047 | 0 | } |
1048 | |
|
1049 | 0 | double dfDestYOff = dfYOff / dfYRatioDstToSrc; |
1050 | 0 | bool bHasYOffVirtual = false; |
1051 | 0 | int nDestYOffVirtual = 0; |
1052 | 0 | if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8) |
1053 | 0 | { |
1054 | 0 | bHasYOffVirtual = true; |
1055 | 0 | dfYOff = nYOff; |
1056 | 0 | nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5); |
1057 | 0 | } |
1058 | | |
1059 | | // Create a MEM dataset that wraps the output buffer. |
1060 | 0 | GDALDataset *poMEMDS; |
1061 | 0 | void *pTempBuffer = nullptr; |
1062 | 0 | GSpacing nPSMem = nPixelSpace; |
1063 | 0 | GSpacing nLSMem = nLineSpace; |
1064 | 0 | void *pDataMem = pData; |
1065 | 0 | GDALDataType eDTMem = eBufType; |
1066 | 0 | if (eBufType != eDataType) |
1067 | 0 | { |
1068 | 0 | nPSMem = GDALGetDataTypeSizeBytes(eDataType); |
1069 | 0 | nLSMem = nPSMem * nBufXSize; |
1070 | 0 | pTempBuffer = |
1071 | 0 | VSI_MALLOC2_VERBOSE(nBufYSize, static_cast<size_t>(nLSMem)); |
1072 | 0 | if (pTempBuffer == nullptr) |
1073 | 0 | return CE_Failure; |
1074 | 0 | pDataMem = pTempBuffer; |
1075 | 0 | eDTMem = eDataType; |
1076 | 0 | } |
1077 | | |
1078 | 0 | poMEMDS = |
1079 | 0 | MEMDataset::Create("", nDestXOffVirtual + nBufXSize, |
1080 | 0 | nDestYOffVirtual + nBufYSize, 0, eDTMem, nullptr); |
1081 | 0 | GByte *pabyData = static_cast<GByte *>(pDataMem) - |
1082 | 0 | nPSMem * nDestXOffVirtual - nLSMem * nDestYOffVirtual; |
1083 | 0 | GDALRasterBandH hMEMBand = MEMCreateRasterBandEx( |
1084 | 0 | poMEMDS, 1, pabyData, eDTMem, nPSMem, nLSMem, false); |
1085 | 0 | poMEMDS->SetBand(1, GDALRasterBand::FromHandle(hMEMBand)); |
1086 | |
|
1087 | 0 | const char *pszNBITS = GetMetadataItem("NBITS", "IMAGE_STRUCTURE"); |
1088 | 0 | const int nNBITS = pszNBITS ? atoi(pszNBITS) : 0; |
1089 | 0 | if (pszNBITS) |
1090 | 0 | GDALRasterBand::FromHandle(hMEMBand)->SetMetadataItem( |
1091 | 0 | "NBITS", pszNBITS, "IMAGE_STRUCTURE"); |
1092 | |
|
1093 | 0 | CPLErr eErr = CE_None; |
1094 | | |
1095 | | // Do the resampling. |
1096 | 0 | if (bUseWarp) |
1097 | 0 | { |
1098 | 0 | int bHasNoData = FALSE; |
1099 | 0 | double dfNoDataValue = GetNoDataValue(&bHasNoData); |
1100 | |
|
1101 | 0 | VRTDatasetH hVRTDS = nullptr; |
1102 | 0 | GDALRasterBandH hVRTBand = nullptr; |
1103 | 0 | if (GetDataset() == nullptr) |
1104 | 0 | { |
1105 | | /* Create VRT dataset that wraps the whole dataset */ |
1106 | 0 | hVRTDS = VRTCreate(nRasterXSize, nRasterYSize); |
1107 | 0 | VRTAddBand(hVRTDS, eDataType, nullptr); |
1108 | 0 | hVRTBand = GDALGetRasterBand(hVRTDS, 1); |
1109 | 0 | VRTAddSimpleSource(hVRTBand, this, 0, 0, nRasterXSize, nRasterYSize, |
1110 | 0 | 0, 0, nRasterXSize, nRasterYSize, nullptr, |
1111 | 0 | VRT_NODATA_UNSET); |
1112 | | |
1113 | | /* Add a mask band if needed */ |
1114 | 0 | if (GetMaskFlags() != GMF_ALL_VALID) |
1115 | 0 | { |
1116 | 0 | GDALDataset::FromHandle(hVRTDS)->CreateMaskBand(0); |
1117 | 0 | VRTSourcedRasterBand *poVRTMaskBand = |
1118 | 0 | reinterpret_cast<VRTSourcedRasterBand *>( |
1119 | 0 | reinterpret_cast<GDALRasterBand *>(hVRTBand) |
1120 | 0 | ->GetMaskBand()); |
1121 | 0 | poVRTMaskBand->AddMaskBandSource(this, 0, 0, nRasterXSize, |
1122 | 0 | nRasterYSize, 0, 0, |
1123 | 0 | nRasterXSize, nRasterYSize); |
1124 | 0 | } |
1125 | 0 | } |
1126 | |
|
1127 | 0 | GDALWarpOptions *psWarpOptions = GDALCreateWarpOptions(); |
1128 | 0 | switch (psExtraArg->eResampleAlg) |
1129 | 0 | { |
1130 | 0 | case GRIORA_NearestNeighbour: |
1131 | 0 | psWarpOptions->eResampleAlg = GRA_NearestNeighbour; |
1132 | 0 | break; |
1133 | 0 | case GRIORA_Bilinear: |
1134 | 0 | psWarpOptions->eResampleAlg = GRA_Bilinear; |
1135 | 0 | break; |
1136 | 0 | case GRIORA_Cubic: |
1137 | 0 | psWarpOptions->eResampleAlg = GRA_Cubic; |
1138 | 0 | break; |
1139 | 0 | case GRIORA_CubicSpline: |
1140 | 0 | psWarpOptions->eResampleAlg = GRA_CubicSpline; |
1141 | 0 | break; |
1142 | 0 | case GRIORA_Lanczos: |
1143 | 0 | psWarpOptions->eResampleAlg = GRA_Lanczos; |
1144 | 0 | break; |
1145 | 0 | case GRIORA_Average: |
1146 | 0 | psWarpOptions->eResampleAlg = GRA_Average; |
1147 | 0 | break; |
1148 | 0 | case GRIORA_RMS: |
1149 | 0 | psWarpOptions->eResampleAlg = GRA_RMS; |
1150 | 0 | break; |
1151 | 0 | case GRIORA_Mode: |
1152 | 0 | psWarpOptions->eResampleAlg = GRA_Mode; |
1153 | 0 | break; |
1154 | 0 | default: |
1155 | 0 | CPLAssert(false); |
1156 | 0 | psWarpOptions->eResampleAlg = GRA_NearestNeighbour; |
1157 | 0 | break; |
1158 | 0 | } |
1159 | 0 | psWarpOptions->hSrcDS = hVRTDS ? hVRTDS : GetDataset(); |
1160 | 0 | psWarpOptions->hDstDS = poMEMDS; |
1161 | 0 | psWarpOptions->nBandCount = 1; |
1162 | 0 | int nSrcBandNumber = hVRTDS ? 1 : nBand; |
1163 | 0 | int nDstBandNumber = 1; |
1164 | 0 | psWarpOptions->panSrcBands = &nSrcBandNumber; |
1165 | 0 | psWarpOptions->panDstBands = &nDstBandNumber; |
1166 | 0 | psWarpOptions->pfnProgress = psExtraArg->pfnProgress |
1167 | 0 | ? psExtraArg->pfnProgress |
1168 | 0 | : GDALDummyProgress; |
1169 | 0 | psWarpOptions->pProgressArg = psExtraArg->pProgressData; |
1170 | 0 | psWarpOptions->pfnTransformer = GDALRasterIOTransformer; |
1171 | 0 | if (bHasNoData) |
1172 | 0 | { |
1173 | 0 | psWarpOptions->papszWarpOptions = CSLSetNameValue( |
1174 | 0 | psWarpOptions->papszWarpOptions, "INIT_DEST", "NO_DATA"); |
1175 | 0 | if (psWarpOptions->padfSrcNoDataReal == nullptr) |
1176 | 0 | { |
1177 | 0 | psWarpOptions->padfSrcNoDataReal = |
1178 | 0 | static_cast<double *>(CPLMalloc(sizeof(double))); |
1179 | 0 | psWarpOptions->padfSrcNoDataReal[0] = dfNoDataValue; |
1180 | 0 | } |
1181 | |
|
1182 | 0 | if (psWarpOptions->padfDstNoDataReal == nullptr) |
1183 | 0 | { |
1184 | 0 | psWarpOptions->padfDstNoDataReal = |
1185 | 0 | static_cast<double *>(CPLMalloc(sizeof(double))); |
1186 | 0 | psWarpOptions->padfDstNoDataReal[0] = dfNoDataValue; |
1187 | 0 | } |
1188 | 0 | } |
1189 | |
|
1190 | 0 | GDALRasterIOTransformerStruct sTransformer; |
1191 | 0 | sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff; |
1192 | 0 | sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff; |
1193 | 0 | sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc; |
1194 | 0 | sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc; |
1195 | 0 | psWarpOptions->pTransformerArg = &sTransformer; |
1196 | |
|
1197 | 0 | GDALWarpOperationH hWarpOperation = |
1198 | 0 | GDALCreateWarpOperation(psWarpOptions); |
1199 | 0 | eErr = GDALChunkAndWarpImage(hWarpOperation, nDestXOffVirtual, |
1200 | 0 | nDestYOffVirtual, nBufXSize, nBufYSize); |
1201 | 0 | GDALDestroyWarpOperation(hWarpOperation); |
1202 | |
|
1203 | 0 | psWarpOptions->panSrcBands = nullptr; |
1204 | 0 | psWarpOptions->panDstBands = nullptr; |
1205 | 0 | GDALDestroyWarpOptions(psWarpOptions); |
1206 | |
|
1207 | 0 | if (hVRTDS) |
1208 | 0 | GDALClose(hVRTDS); |
1209 | 0 | } |
1210 | 0 | else |
1211 | 0 | { |
1212 | 0 | const char *pszResampling = |
1213 | 0 | (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR" |
1214 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC" |
1215 | 0 | : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE" |
1216 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS" |
1217 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE" |
1218 | 0 | : (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS" |
1219 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE" |
1220 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS" |
1221 | 0 | : "UNKNOWN"; |
1222 | |
|
1223 | 0 | int nKernelRadius = 0; |
1224 | 0 | GDALResampleFunction pfnResampleFunc = |
1225 | 0 | GDALGetResampleFunction(pszResampling, &nKernelRadius); |
1226 | 0 | CPLAssert(pfnResampleFunc); |
1227 | 0 | GDALDataType eWrkDataType = |
1228 | 0 | GDALGetOvrWorkDataType(pszResampling, eDataType); |
1229 | 0 | int nHasNoData = 0; |
1230 | 0 | double dfNoDataValue = GetNoDataValue(&nHasNoData); |
1231 | 0 | const bool bHasNoData = CPL_TO_BOOL(nHasNoData); |
1232 | 0 | if (!bHasNoData) |
1233 | 0 | dfNoDataValue = 0.0; |
1234 | |
|
1235 | 0 | int nDstBlockXSize = nBufXSize; |
1236 | 0 | int nDstBlockYSize = nBufYSize; |
1237 | 0 | int nFullResXChunk = 0; |
1238 | 0 | int nFullResYChunk = 0; |
1239 | 0 | while (true) |
1240 | 0 | { |
1241 | 0 | nFullResXChunk = |
1242 | 0 | 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc); |
1243 | 0 | nFullResYChunk = |
1244 | 0 | 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc); |
1245 | 0 | if (nFullResXChunk > nRasterXSize) |
1246 | 0 | nFullResXChunk = nRasterXSize; |
1247 | 0 | if (nFullResYChunk > nRasterYSize) |
1248 | 0 | nFullResYChunk = nRasterYSize; |
1249 | 0 | if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) || |
1250 | 0 | (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <= |
1251 | 0 | 1024 * 1024)) |
1252 | 0 | break; |
1253 | | // When operating on the full width of a raster whose block width is |
1254 | | // the raster width, prefer doing chunks in height. |
1255 | 0 | if (nFullResXChunk >= nXSize && nXSize == nBlockXSize && |
1256 | 0 | nDstBlockYSize > 1) |
1257 | 0 | nDstBlockYSize /= 2; |
1258 | | /* Otherwise cut the maximal dimension */ |
1259 | 0 | else if (nDstBlockXSize > 1 && |
1260 | 0 | (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1)) |
1261 | 0 | nDstBlockXSize /= 2; |
1262 | 0 | else |
1263 | 0 | nDstBlockYSize /= 2; |
1264 | 0 | } |
1265 | |
|
1266 | 0 | int nOvrXFactor = static_cast<int>(0.5 + dfXRatioDstToSrc); |
1267 | 0 | int nOvrYFactor = static_cast<int>(0.5 + dfYRatioDstToSrc); |
1268 | 0 | if (nOvrXFactor == 0) |
1269 | 0 | nOvrXFactor = 1; |
1270 | 0 | if (nOvrYFactor == 0) |
1271 | 0 | nOvrYFactor = 1; |
1272 | 0 | int nFullResXSizeQueried = |
1273 | 0 | nFullResXChunk + 2 * nKernelRadius * nOvrXFactor; |
1274 | 0 | int nFullResYSizeQueried = |
1275 | 0 | nFullResYChunk + 2 * nKernelRadius * nOvrYFactor; |
1276 | |
|
1277 | 0 | if (nFullResXSizeQueried > nRasterXSize) |
1278 | 0 | nFullResXSizeQueried = nRasterXSize; |
1279 | 0 | if (nFullResYSizeQueried > nRasterYSize) |
1280 | 0 | nFullResYSizeQueried = nRasterYSize; |
1281 | |
|
1282 | 0 | void *pChunk = |
1283 | 0 | VSI_MALLOC3_VERBOSE(GDALGetDataTypeSizeBytes(eWrkDataType), |
1284 | 0 | nFullResXSizeQueried, nFullResYSizeQueried); |
1285 | 0 | GByte *pabyChunkNoDataMask = nullptr; |
1286 | |
|
1287 | 0 | GDALRasterBand *poMaskBand = GetMaskBand(); |
1288 | 0 | int l_nMaskFlags = GetMaskFlags(); |
1289 | |
|
1290 | 0 | bool bUseNoDataMask = ((l_nMaskFlags & GMF_ALL_VALID) == 0); |
1291 | 0 | if (bUseNoDataMask) |
1292 | 0 | { |
1293 | 0 | pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE( |
1294 | 0 | nFullResXSizeQueried, nFullResYSizeQueried)); |
1295 | 0 | } |
1296 | 0 | if (pChunk == nullptr || |
1297 | 0 | (bUseNoDataMask && pabyChunkNoDataMask == nullptr)) |
1298 | 0 | { |
1299 | 0 | GDALClose(poMEMDS); |
1300 | 0 | CPLFree(pChunk); |
1301 | 0 | CPLFree(pabyChunkNoDataMask); |
1302 | 0 | VSIFree(pTempBuffer); |
1303 | 0 | return CE_Failure; |
1304 | 0 | } |
1305 | | |
1306 | 0 | const int nTotalBlocks = DIV_ROUND_UP(nBufXSize, nDstBlockXSize) * |
1307 | 0 | DIV_ROUND_UP(nBufYSize, nDstBlockYSize); |
1308 | 0 | int nBlocksDone = 0; |
1309 | |
|
1310 | 0 | int nDstYOff; |
1311 | 0 | for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None; |
1312 | 0 | nDstYOff += nDstBlockYSize) |
1313 | 0 | { |
1314 | 0 | int nDstYCount; |
1315 | 0 | if (nDstYOff + nDstBlockYSize <= nBufYSize) |
1316 | 0 | nDstYCount = nDstBlockYSize; |
1317 | 0 | else |
1318 | 0 | nDstYCount = nBufYSize - nDstYOff; |
1319 | |
|
1320 | 0 | int nChunkYOff = |
1321 | 0 | nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc); |
1322 | 0 | int nChunkYOff2 = nYOff + 1 + |
1323 | 0 | static_cast<int>(ceil((nDstYOff + nDstYCount) * |
1324 | 0 | dfYRatioDstToSrc)); |
1325 | 0 | if (nChunkYOff2 > nRasterYSize) |
1326 | 0 | nChunkYOff2 = nRasterYSize; |
1327 | 0 | int nYCount = nChunkYOff2 - nChunkYOff; |
1328 | 0 | CPLAssert(nYCount <= nFullResYChunk); |
1329 | | |
1330 | 0 | int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrYFactor; |
1331 | 0 | int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrYFactor; |
1332 | 0 | if (nChunkYOffQueried < 0) |
1333 | 0 | { |
1334 | 0 | nChunkYSizeQueried += nChunkYOffQueried; |
1335 | 0 | nChunkYOffQueried = 0; |
1336 | 0 | } |
1337 | 0 | if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize) |
1338 | 0 | nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried; |
1339 | 0 | CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried); |
1340 | | |
1341 | 0 | int nDstXOff = 0; |
1342 | 0 | for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None; |
1343 | 0 | nDstXOff += nDstBlockXSize) |
1344 | 0 | { |
1345 | 0 | int nDstXCount = 0; |
1346 | 0 | if (nDstXOff + nDstBlockXSize <= nBufXSize) |
1347 | 0 | nDstXCount = nDstBlockXSize; |
1348 | 0 | else |
1349 | 0 | nDstXCount = nBufXSize - nDstXOff; |
1350 | |
|
1351 | 0 | int nChunkXOff = |
1352 | 0 | nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc); |
1353 | 0 | int nChunkXOff2 = |
1354 | 0 | nXOff + 1 + |
1355 | 0 | static_cast<int>( |
1356 | 0 | ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc)); |
1357 | 0 | if (nChunkXOff2 > nRasterXSize) |
1358 | 0 | nChunkXOff2 = nRasterXSize; |
1359 | 0 | int nXCount = nChunkXOff2 - nChunkXOff; |
1360 | 0 | CPLAssert(nXCount <= nFullResXChunk); |
1361 | | |
1362 | 0 | int nChunkXOffQueried = |
1363 | 0 | nChunkXOff - nKernelRadius * nOvrXFactor; |
1364 | 0 | int nChunkXSizeQueried = |
1365 | 0 | nXCount + 2 * nKernelRadius * nOvrXFactor; |
1366 | 0 | if (nChunkXOffQueried < 0) |
1367 | 0 | { |
1368 | 0 | nChunkXSizeQueried += nChunkXOffQueried; |
1369 | 0 | nChunkXOffQueried = 0; |
1370 | 0 | } |
1371 | 0 | if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize) |
1372 | 0 | nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried; |
1373 | 0 | CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried); |
1374 | | |
1375 | | // Read the source buffers. |
1376 | 0 | eErr = RasterIO(GF_Read, nChunkXOffQueried, nChunkYOffQueried, |
1377 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, pChunk, |
1378 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, |
1379 | 0 | eWrkDataType, 0, 0, nullptr); |
1380 | |
|
1381 | 0 | bool bSkipResample = false; |
1382 | 0 | bool bNoDataMaskFullyOpaque = false; |
1383 | 0 | if (eErr == CE_None && bUseNoDataMask) |
1384 | 0 | { |
1385 | 0 | eErr = poMaskBand->RasterIO( |
1386 | 0 | GF_Read, nChunkXOffQueried, nChunkYOffQueried, |
1387 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, |
1388 | 0 | pabyChunkNoDataMask, nChunkXSizeQueried, |
1389 | 0 | nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr); |
1390 | | |
1391 | | /* Optimizations if mask if fully opaque or transparent */ |
1392 | 0 | int nPixels = nChunkXSizeQueried * nChunkYSizeQueried; |
1393 | 0 | GByte bVal = pabyChunkNoDataMask[0]; |
1394 | 0 | int i = 1; |
1395 | 0 | for (; i < nPixels; i++) |
1396 | 0 | { |
1397 | 0 | if (pabyChunkNoDataMask[i] != bVal) |
1398 | 0 | break; |
1399 | 0 | } |
1400 | 0 | if (i == nPixels) |
1401 | 0 | { |
1402 | 0 | if (bVal == 0) |
1403 | 0 | { |
1404 | 0 | for (int j = 0; j < nDstYCount; j++) |
1405 | 0 | { |
1406 | 0 | GDALCopyWords64(&dfNoDataValue, GDT_Float64, 0, |
1407 | 0 | static_cast<GByte *>(pDataMem) + |
1408 | 0 | nLSMem * (j + nDstYOff) + |
1409 | 0 | nDstXOff * nPSMem, |
1410 | 0 | eDTMem, |
1411 | 0 | static_cast<int>(nPSMem), |
1412 | 0 | nDstXCount); |
1413 | 0 | } |
1414 | 0 | bSkipResample = true; |
1415 | 0 | } |
1416 | 0 | else |
1417 | 0 | { |
1418 | 0 | bNoDataMaskFullyOpaque = true; |
1419 | 0 | } |
1420 | 0 | } |
1421 | 0 | } |
1422 | |
|
1423 | 0 | if (!bSkipResample && eErr == CE_None) |
1424 | 0 | { |
1425 | 0 | const bool bPropagateNoData = false; |
1426 | 0 | void *pDstBuffer = nullptr; |
1427 | 0 | GDALDataType eDstBufferDataType = GDT_Unknown; |
1428 | 0 | GDALRasterBand *poMEMBand = |
1429 | 0 | GDALRasterBand::FromHandle(hMEMBand); |
1430 | 0 | GDALOverviewResampleArgs args; |
1431 | 0 | args.eSrcDataType = eDataType; |
1432 | 0 | args.eOvrDataType = poMEMBand->GetRasterDataType(); |
1433 | 0 | args.nOvrXSize = poMEMBand->GetXSize(); |
1434 | 0 | args.nOvrYSize = poMEMBand->GetYSize(); |
1435 | 0 | args.nOvrNBITS = nNBITS; |
1436 | 0 | args.dfXRatioDstToSrc = dfXRatioDstToSrc; |
1437 | 0 | args.dfYRatioDstToSrc = dfYRatioDstToSrc; |
1438 | 0 | args.dfSrcXDelta = |
1439 | 0 | dfXOff - nXOff; /* == 0 if bHasXOffVirtual */ |
1440 | 0 | args.dfSrcYDelta = |
1441 | 0 | dfYOff - nYOff; /* == 0 if bHasYOffVirtual */ |
1442 | 0 | args.eWrkDataType = eWrkDataType; |
1443 | 0 | args.pabyChunkNodataMask = |
1444 | 0 | bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask; |
1445 | 0 | args.nChunkXOff = |
1446 | 0 | nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff); |
1447 | 0 | args.nChunkXSize = nChunkXSizeQueried; |
1448 | 0 | args.nChunkYOff = |
1449 | 0 | nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff); |
1450 | 0 | args.nChunkYSize = nChunkYSizeQueried; |
1451 | 0 | args.nDstXOff = nDstXOff + nDestXOffVirtual; |
1452 | 0 | args.nDstXOff2 = nDstXOff + nDestXOffVirtual + nDstXCount; |
1453 | 0 | args.nDstYOff = nDstYOff + nDestYOffVirtual; |
1454 | 0 | args.nDstYOff2 = nDstYOff + nDestYOffVirtual + nDstYCount; |
1455 | 0 | args.pszResampling = pszResampling; |
1456 | 0 | args.bHasNoData = bHasNoData; |
1457 | 0 | args.dfNoDataValue = dfNoDataValue; |
1458 | 0 | args.poColorTable = GetColorTable(); |
1459 | 0 | args.bPropagateNoData = bPropagateNoData; |
1460 | 0 | eErr = pfnResampleFunc(args, pChunk, &pDstBuffer, |
1461 | 0 | &eDstBufferDataType); |
1462 | 0 | if (eErr == CE_None) |
1463 | 0 | { |
1464 | 0 | eErr = poMEMBand->RasterIO( |
1465 | 0 | GF_Write, nDstXOff + nDestXOffVirtual, |
1466 | 0 | nDstYOff + nDestYOffVirtual, nDstXCount, nDstYCount, |
1467 | 0 | pDstBuffer, nDstXCount, nDstYCount, |
1468 | 0 | eDstBufferDataType, 0, 0, nullptr); |
1469 | 0 | } |
1470 | 0 | CPLFree(pDstBuffer); |
1471 | 0 | } |
1472 | |
|
1473 | 0 | nBlocksDone++; |
1474 | 0 | if (eErr == CE_None && psExtraArg->pfnProgress != nullptr && |
1475 | 0 | !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks, |
1476 | 0 | "", psExtraArg->pProgressData)) |
1477 | 0 | { |
1478 | 0 | eErr = CE_Failure; |
1479 | 0 | } |
1480 | 0 | } |
1481 | 0 | } |
1482 | | |
1483 | 0 | CPLFree(pChunk); |
1484 | 0 | CPLFree(pabyChunkNoDataMask); |
1485 | 0 | } |
1486 | | |
1487 | 0 | if (eBufType != eDataType) |
1488 | 0 | { |
1489 | 0 | CPL_IGNORE_RET_VAL(poMEMDS->GetRasterBand(1)->RasterIO( |
1490 | 0 | GF_Read, nDestXOffVirtual, nDestYOffVirtual, nBufXSize, nBufYSize, |
1491 | 0 | pData, nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace, |
1492 | 0 | nullptr)); |
1493 | 0 | } |
1494 | 0 | GDALClose(poMEMDS); |
1495 | 0 | VSIFree(pTempBuffer); |
1496 | |
|
1497 | 0 | return eErr; |
1498 | 0 | } |
1499 | | |
1500 | | /************************************************************************/ |
1501 | | /* RasterIOResampled() */ |
1502 | | /************************************************************************/ |
1503 | | |
1504 | | CPLErr GDALDataset::RasterIOResampled( |
1505 | | GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize, |
1506 | | void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType, |
1507 | | int nBandCount, const int *panBandMap, GSpacing nPixelSpace, |
1508 | | GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg) |
1509 | | |
1510 | 0 | { |
1511 | | #if 0 |
1512 | | // Determine if we use warping resampling or overview resampling |
1513 | | bool bUseWarp = false; |
1514 | | if( GDALDataTypeIsComplex( eDataType ) ) |
1515 | | bUseWarp = true; |
1516 | | #endif |
1517 | |
|
1518 | 0 | double dfXOff = nXOff; |
1519 | 0 | double dfYOff = nYOff; |
1520 | 0 | double dfXSize = nXSize; |
1521 | 0 | double dfYSize = nYSize; |
1522 | 0 | if (psExtraArg->bFloatingPointWindowValidity) |
1523 | 0 | { |
1524 | 0 | dfXOff = psExtraArg->dfXOff; |
1525 | 0 | dfYOff = psExtraArg->dfYOff; |
1526 | 0 | dfXSize = psExtraArg->dfXSize; |
1527 | 0 | dfYSize = psExtraArg->dfYSize; |
1528 | 0 | } |
1529 | |
|
1530 | 0 | const double dfXRatioDstToSrc = dfXSize / nBufXSize; |
1531 | 0 | const double dfYRatioDstToSrc = dfYSize / nBufYSize; |
1532 | | |
1533 | | // Determine the coordinates in the "virtual" output raster to see |
1534 | | // if there are not integers, in which case we will use them as a shift |
1535 | | // so that subwindow extracts give the exact same results as entire raster |
1536 | | // scaling. |
1537 | 0 | double dfDestXOff = dfXOff / dfXRatioDstToSrc; |
1538 | 0 | bool bHasXOffVirtual = false; |
1539 | 0 | int nDestXOffVirtual = 0; |
1540 | 0 | if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8) |
1541 | 0 | { |
1542 | 0 | bHasXOffVirtual = true; |
1543 | 0 | dfXOff = nXOff; |
1544 | 0 | nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5); |
1545 | 0 | } |
1546 | |
|
1547 | 0 | double dfDestYOff = dfYOff / dfYRatioDstToSrc; |
1548 | 0 | bool bHasYOffVirtual = false; |
1549 | 0 | int nDestYOffVirtual = 0; |
1550 | 0 | if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8) |
1551 | 0 | { |
1552 | 0 | bHasYOffVirtual = true; |
1553 | 0 | dfYOff = nYOff; |
1554 | 0 | nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5); |
1555 | 0 | } |
1556 | | |
1557 | | // Create a MEM dataset that wraps the output buffer. |
1558 | 0 | GDALDataset *poMEMDS = |
1559 | 0 | MEMDataset::Create("", nDestXOffVirtual + nBufXSize, |
1560 | 0 | nDestYOffVirtual + nBufYSize, 0, eBufType, nullptr); |
1561 | 0 | GDALRasterBand **papoDstBands = static_cast<GDALRasterBand **>( |
1562 | 0 | CPLMalloc(nBandCount * sizeof(GDALRasterBand *))); |
1563 | 0 | int nNBITS = 0; |
1564 | 0 | for (int i = 0; i < nBandCount; i++) |
1565 | 0 | { |
1566 | 0 | char szBuffer[32] = {'\0'}; |
1567 | 0 | int nRet = CPLPrintPointer( |
1568 | 0 | szBuffer, |
1569 | 0 | static_cast<GByte *>(pData) - nPixelSpace * nDestXOffVirtual - |
1570 | 0 | nLineSpace * nDestYOffVirtual + nBandSpace * i, |
1571 | 0 | sizeof(szBuffer)); |
1572 | 0 | szBuffer[nRet] = 0; |
1573 | |
|
1574 | 0 | char szBuffer0[64] = {'\0'}; |
1575 | 0 | snprintf(szBuffer0, sizeof(szBuffer0), "DATAPOINTER=%s", szBuffer); |
1576 | |
|
1577 | 0 | char szBuffer1[64] = {'\0'}; |
1578 | 0 | snprintf(szBuffer1, sizeof(szBuffer1), "PIXELOFFSET=" CPL_FRMT_GIB, |
1579 | 0 | static_cast<GIntBig>(nPixelSpace)); |
1580 | |
|
1581 | 0 | char szBuffer2[64] = {'\0'}; |
1582 | 0 | snprintf(szBuffer2, sizeof(szBuffer2), "LINEOFFSET=" CPL_FRMT_GIB, |
1583 | 0 | static_cast<GIntBig>(nLineSpace)); |
1584 | |
|
1585 | 0 | char *apszOptions[4] = {szBuffer0, szBuffer1, szBuffer2, nullptr}; |
1586 | |
|
1587 | 0 | poMEMDS->AddBand(eBufType, apszOptions); |
1588 | |
|
1589 | 0 | GDALRasterBand *poSrcBand = GetRasterBand(panBandMap[i]); |
1590 | 0 | papoDstBands[i] = poMEMDS->GetRasterBand(i + 1); |
1591 | 0 | const char *pszNBITS = |
1592 | 0 | poSrcBand->GetMetadataItem("NBITS", "IMAGE_STRUCTURE"); |
1593 | 0 | if (pszNBITS) |
1594 | 0 | { |
1595 | 0 | nNBITS = atoi(pszNBITS); |
1596 | 0 | poMEMDS->GetRasterBand(i + 1)->SetMetadataItem("NBITS", pszNBITS, |
1597 | 0 | "IMAGE_STRUCTURE"); |
1598 | 0 | } |
1599 | 0 | } |
1600 | |
|
1601 | 0 | CPLErr eErr = CE_None; |
1602 | | |
1603 | | // TODO(schwehr): Why disabled? Why not just delete? |
1604 | | // Looks like this code was initially added as disable by copying |
1605 | | // from RasterIO here: |
1606 | | // https://trac.osgeo.org/gdal/changeset/29572 |
1607 | | #if 0 |
1608 | | // Do the resampling. |
1609 | | if( bUseWarp ) |
1610 | | { |
1611 | | VRTDatasetH hVRTDS = nullptr; |
1612 | | GDALRasterBandH hVRTBand = nullptr; |
1613 | | if( GetDataset() == nullptr ) |
1614 | | { |
1615 | | /* Create VRT dataset that wraps the whole dataset */ |
1616 | | hVRTDS = VRTCreate(nRasterXSize, nRasterYSize); |
1617 | | VRTAddBand( hVRTDS, eDataType, nullptr ); |
1618 | | hVRTBand = GDALGetRasterBand(hVRTDS, 1); |
1619 | | VRTAddSimpleSource( (VRTSourcedRasterBandH)hVRTBand, |
1620 | | (GDALRasterBandH)this, |
1621 | | 0, 0, |
1622 | | nRasterXSize, nRasterYSize, |
1623 | | 0, 0, |
1624 | | nRasterXSize, nRasterYSize, |
1625 | | nullptr, VRT_NODATA_UNSET ); |
1626 | | |
1627 | | /* Add a mask band if needed */ |
1628 | | if( GetMaskFlags() != GMF_ALL_VALID ) |
1629 | | { |
1630 | | ((GDALDataset*)hVRTDS)->CreateMaskBand(0); |
1631 | | VRTSourcedRasterBand* poVRTMaskBand = |
1632 | | (VRTSourcedRasterBand*)(((GDALRasterBand*)hVRTBand)->GetMaskBand()); |
1633 | | poVRTMaskBand-> |
1634 | | AddMaskBandSource( this, |
1635 | | 0, 0, |
1636 | | nRasterXSize, nRasterYSize, |
1637 | | 0, 0, |
1638 | | nRasterXSize, nRasterYSize); |
1639 | | } |
1640 | | } |
1641 | | |
1642 | | GDALWarpOptions* psWarpOptions = GDALCreateWarpOptions(); |
1643 | | psWarpOptions->eResampleAlg = (GDALResampleAlg)psExtraArg->eResampleAlg; |
1644 | | psWarpOptions->hSrcDS = (GDALDatasetH) (hVRTDS ? hVRTDS : GetDataset()); |
1645 | | psWarpOptions->hDstDS = (GDALDatasetH) poMEMDS; |
1646 | | psWarpOptions->nBandCount = 1; |
1647 | | int nSrcBandNumber = (hVRTDS ? 1 : nBand); |
1648 | | int nDstBandNumber = 1; |
1649 | | psWarpOptions->panSrcBands = &nSrcBandNumber; |
1650 | | psWarpOptions->panDstBands = &nDstBandNumber; |
1651 | | psWarpOptions->pfnProgress = psExtraArg->pfnProgress ? |
1652 | | psExtraArg->pfnProgress : GDALDummyProgress; |
1653 | | psWarpOptions->pProgressArg = psExtraArg->pProgressData; |
1654 | | psWarpOptions->pfnTransformer = GDALRasterIOTransformer; |
1655 | | GDALRasterIOTransformerStruct sTransformer; |
1656 | | sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff; |
1657 | | sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff; |
1658 | | sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc; |
1659 | | sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc; |
1660 | | psWarpOptions->pTransformerArg = &sTransformer; |
1661 | | |
1662 | | GDALWarpOperationH hWarpOperation = GDALCreateWarpOperation(psWarpOptions); |
1663 | | eErr = GDALChunkAndWarpImage( hWarpOperation, |
1664 | | nDestXOffVirtual, nDestYOffVirtual, |
1665 | | nBufXSize, nBufYSize ); |
1666 | | GDALDestroyWarpOperation( hWarpOperation ); |
1667 | | |
1668 | | psWarpOptions->panSrcBands = nullptr; |
1669 | | psWarpOptions->panDstBands = nullptr; |
1670 | | GDALDestroyWarpOptions( psWarpOptions ); |
1671 | | |
1672 | | if( hVRTDS ) |
1673 | | GDALClose(hVRTDS); |
1674 | | } |
1675 | | else |
1676 | | #endif |
1677 | 0 | { |
1678 | 0 | const char *pszResampling = |
1679 | 0 | (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR" |
1680 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC" |
1681 | 0 | : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE" |
1682 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS" |
1683 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE" |
1684 | 0 | : (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS" |
1685 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE" |
1686 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS" |
1687 | 0 | : "UNKNOWN"; |
1688 | |
|
1689 | 0 | GDALRasterBand *poFirstSrcBand = GetRasterBand(panBandMap[0]); |
1690 | 0 | GDALDataType eDataType = poFirstSrcBand->GetRasterDataType(); |
1691 | 0 | int nBlockXSize, nBlockYSize; |
1692 | 0 | poFirstSrcBand->GetBlockSize(&nBlockXSize, &nBlockYSize); |
1693 | |
|
1694 | 0 | int nKernelRadius; |
1695 | 0 | GDALResampleFunction pfnResampleFunc = |
1696 | 0 | GDALGetResampleFunction(pszResampling, &nKernelRadius); |
1697 | 0 | CPLAssert(pfnResampleFunc); |
1698 | | #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND |
1699 | | GDALResampleFunctionMultiBands pfnResampleFuncMultiBands = |
1700 | | GDALGetResampleFunctionMultiBands(pszResampling, &nKernelRadius); |
1701 | | #endif |
1702 | 0 | GDALDataType eWrkDataType = |
1703 | 0 | GDALGetOvrWorkDataType(pszResampling, eDataType); |
1704 | |
|
1705 | 0 | int nDstBlockXSize = nBufXSize; |
1706 | 0 | int nDstBlockYSize = nBufYSize; |
1707 | 0 | int nFullResXChunk, nFullResYChunk; |
1708 | 0 | while (true) |
1709 | 0 | { |
1710 | 0 | nFullResXChunk = |
1711 | 0 | 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc); |
1712 | 0 | nFullResYChunk = |
1713 | 0 | 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc); |
1714 | 0 | if (nFullResXChunk > nRasterXSize) |
1715 | 0 | nFullResXChunk = nRasterXSize; |
1716 | 0 | if (nFullResYChunk > nRasterYSize) |
1717 | 0 | nFullResYChunk = nRasterYSize; |
1718 | 0 | if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) || |
1719 | 0 | (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <= |
1720 | 0 | 1024 * 1024)) |
1721 | 0 | break; |
1722 | | // When operating on the full width of a raster whose block width is |
1723 | | // the raster width, prefer doing chunks in height. |
1724 | 0 | if (nFullResXChunk >= nXSize && nXSize == nBlockXSize && |
1725 | 0 | nDstBlockYSize > 1) |
1726 | 0 | nDstBlockYSize /= 2; |
1727 | | /* Otherwise cut the maximal dimension */ |
1728 | 0 | else if (nDstBlockXSize > 1 && |
1729 | 0 | (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1)) |
1730 | 0 | nDstBlockXSize /= 2; |
1731 | 0 | else |
1732 | 0 | nDstBlockYSize /= 2; |
1733 | 0 | } |
1734 | |
|
1735 | 0 | int nOvrFactor = std::max(static_cast<int>(0.5 + dfXRatioDstToSrc), |
1736 | 0 | static_cast<int>(0.5 + dfYRatioDstToSrc)); |
1737 | 0 | if (nOvrFactor == 0) |
1738 | 0 | nOvrFactor = 1; |
1739 | 0 | int nFullResXSizeQueried = |
1740 | 0 | nFullResXChunk + 2 * nKernelRadius * nOvrFactor; |
1741 | 0 | int nFullResYSizeQueried = |
1742 | 0 | nFullResYChunk + 2 * nKernelRadius * nOvrFactor; |
1743 | |
|
1744 | 0 | if (nFullResXSizeQueried > nRasterXSize) |
1745 | 0 | nFullResXSizeQueried = nRasterXSize; |
1746 | 0 | if (nFullResYSizeQueried > nRasterYSize) |
1747 | 0 | nFullResYSizeQueried = nRasterYSize; |
1748 | |
|
1749 | 0 | void *pChunk = VSI_MALLOC3_VERBOSE( |
1750 | 0 | cpl::fits_on<int>(GDALGetDataTypeSizeBytes(eWrkDataType) * |
1751 | 0 | nBandCount), |
1752 | 0 | nFullResXSizeQueried, nFullResYSizeQueried); |
1753 | 0 | GByte *pabyChunkNoDataMask = nullptr; |
1754 | |
|
1755 | 0 | GDALRasterBand *poMaskBand = poFirstSrcBand->GetMaskBand(); |
1756 | 0 | int nMaskFlags = poFirstSrcBand->GetMaskFlags(); |
1757 | |
|
1758 | 0 | bool bUseNoDataMask = ((nMaskFlags & GMF_ALL_VALID) == 0); |
1759 | 0 | if (bUseNoDataMask) |
1760 | 0 | { |
1761 | 0 | pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE( |
1762 | 0 | nFullResXSizeQueried, nFullResYSizeQueried)); |
1763 | 0 | } |
1764 | 0 | if (pChunk == nullptr || |
1765 | 0 | (bUseNoDataMask && pabyChunkNoDataMask == nullptr)) |
1766 | 0 | { |
1767 | 0 | GDALClose(poMEMDS); |
1768 | 0 | CPLFree(pChunk); |
1769 | 0 | CPLFree(pabyChunkNoDataMask); |
1770 | 0 | CPLFree(papoDstBands); |
1771 | 0 | return CE_Failure; |
1772 | 0 | } |
1773 | | |
1774 | 0 | const int nTotalBlocks = DIV_ROUND_UP(nBufXSize, nDstBlockXSize) * |
1775 | 0 | DIV_ROUND_UP(nBufYSize, nDstBlockYSize); |
1776 | 0 | int nBlocksDone = 0; |
1777 | |
|
1778 | 0 | int nDstYOff; |
1779 | 0 | for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None; |
1780 | 0 | nDstYOff += nDstBlockYSize) |
1781 | 0 | { |
1782 | 0 | int nDstYCount; |
1783 | 0 | if (nDstYOff + nDstBlockYSize <= nBufYSize) |
1784 | 0 | nDstYCount = nDstBlockYSize; |
1785 | 0 | else |
1786 | 0 | nDstYCount = nBufYSize - nDstYOff; |
1787 | |
|
1788 | 0 | int nChunkYOff = |
1789 | 0 | nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc); |
1790 | 0 | int nChunkYOff2 = nYOff + 1 + |
1791 | 0 | static_cast<int>(ceil((nDstYOff + nDstYCount) * |
1792 | 0 | dfYRatioDstToSrc)); |
1793 | 0 | if (nChunkYOff2 > nRasterYSize) |
1794 | 0 | nChunkYOff2 = nRasterYSize; |
1795 | 0 | int nYCount = nChunkYOff2 - nChunkYOff; |
1796 | 0 | CPLAssert(nYCount <= nFullResYChunk); |
1797 | | |
1798 | 0 | int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrFactor; |
1799 | 0 | int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrFactor; |
1800 | 0 | if (nChunkYOffQueried < 0) |
1801 | 0 | { |
1802 | 0 | nChunkYSizeQueried += nChunkYOffQueried; |
1803 | 0 | nChunkYOffQueried = 0; |
1804 | 0 | } |
1805 | 0 | if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize) |
1806 | 0 | nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried; |
1807 | 0 | CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried); |
1808 | | |
1809 | 0 | int nDstXOff; |
1810 | 0 | for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None; |
1811 | 0 | nDstXOff += nDstBlockXSize) |
1812 | 0 | { |
1813 | 0 | int nDstXCount; |
1814 | 0 | if (nDstXOff + nDstBlockXSize <= nBufXSize) |
1815 | 0 | nDstXCount = nDstBlockXSize; |
1816 | 0 | else |
1817 | 0 | nDstXCount = nBufXSize - nDstXOff; |
1818 | |
|
1819 | 0 | int nChunkXOff = |
1820 | 0 | nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc); |
1821 | 0 | int nChunkXOff2 = |
1822 | 0 | nXOff + 1 + |
1823 | 0 | static_cast<int>( |
1824 | 0 | ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc)); |
1825 | 0 | if (nChunkXOff2 > nRasterXSize) |
1826 | 0 | nChunkXOff2 = nRasterXSize; |
1827 | 0 | int nXCount = nChunkXOff2 - nChunkXOff; |
1828 | 0 | CPLAssert(nXCount <= nFullResXChunk); |
1829 | | |
1830 | 0 | int nChunkXOffQueried = nChunkXOff - nKernelRadius * nOvrFactor; |
1831 | 0 | int nChunkXSizeQueried = |
1832 | 0 | nXCount + 2 * nKernelRadius * nOvrFactor; |
1833 | 0 | if (nChunkXOffQueried < 0) |
1834 | 0 | { |
1835 | 0 | nChunkXSizeQueried += nChunkXOffQueried; |
1836 | 0 | nChunkXOffQueried = 0; |
1837 | 0 | } |
1838 | 0 | if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize) |
1839 | 0 | nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried; |
1840 | 0 | CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried); |
1841 | | |
1842 | 0 | bool bSkipResample = false; |
1843 | 0 | bool bNoDataMaskFullyOpaque = false; |
1844 | 0 | if (eErr == CE_None && bUseNoDataMask) |
1845 | 0 | { |
1846 | 0 | eErr = poMaskBand->RasterIO( |
1847 | 0 | GF_Read, nChunkXOffQueried, nChunkYOffQueried, |
1848 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, |
1849 | 0 | pabyChunkNoDataMask, nChunkXSizeQueried, |
1850 | 0 | nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr); |
1851 | | |
1852 | | /* Optimizations if mask if fully opaque or transparent */ |
1853 | 0 | const int nPixels = nChunkXSizeQueried * nChunkYSizeQueried; |
1854 | 0 | const GByte bVal = pabyChunkNoDataMask[0]; |
1855 | 0 | int i = 1; // Used after for. |
1856 | 0 | for (; i < nPixels; i++) |
1857 | 0 | { |
1858 | 0 | if (pabyChunkNoDataMask[i] != bVal) |
1859 | 0 | break; |
1860 | 0 | } |
1861 | 0 | if (i == nPixels) |
1862 | 0 | { |
1863 | 0 | if (bVal == 0) |
1864 | 0 | { |
1865 | 0 | GByte abyZero[16] = {0}; |
1866 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
1867 | 0 | { |
1868 | 0 | for (int j = 0; j < nDstYCount; j++) |
1869 | 0 | { |
1870 | 0 | GDALCopyWords64( |
1871 | 0 | abyZero, GDT_Byte, 0, |
1872 | 0 | static_cast<GByte *>(pData) + |
1873 | 0 | iBand * nBandSpace + |
1874 | 0 | nLineSpace * (j + nDstYOff) + |
1875 | 0 | nDstXOff * nPixelSpace, |
1876 | 0 | eBufType, static_cast<int>(nPixelSpace), |
1877 | 0 | nDstXCount); |
1878 | 0 | } |
1879 | 0 | } |
1880 | 0 | bSkipResample = true; |
1881 | 0 | } |
1882 | 0 | else |
1883 | 0 | { |
1884 | 0 | bNoDataMaskFullyOpaque = true; |
1885 | 0 | } |
1886 | 0 | } |
1887 | 0 | } |
1888 | |
|
1889 | 0 | if (!bSkipResample && eErr == CE_None) |
1890 | 0 | { |
1891 | | /* Read the source buffers */ |
1892 | 0 | eErr = RasterIO( |
1893 | 0 | GF_Read, nChunkXOffQueried, nChunkYOffQueried, |
1894 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, pChunk, |
1895 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, eWrkDataType, |
1896 | 0 | nBandCount, panBandMap, 0, 0, 0, nullptr); |
1897 | 0 | } |
1898 | |
|
1899 | | #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND |
1900 | | if (pfnResampleFuncMultiBands && !bSkipResample && |
1901 | | eErr == CE_None) |
1902 | | { |
1903 | | eErr = pfnResampleFuncMultiBands( |
1904 | | dfXRatioDstToSrc, dfYRatioDstToSrc, |
1905 | | dfXOff - nXOff, /* == 0 if bHasXOffVirtual */ |
1906 | | dfYOff - nYOff, /* == 0 if bHasYOffVirtual */ |
1907 | | eWrkDataType, (GByte *)pChunk, nBandCount, |
1908 | | bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask, |
1909 | | nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff), |
1910 | | nChunkXSizeQueried, |
1911 | | nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff), |
1912 | | nChunkYSizeQueried, nDstXOff + nDestXOffVirtual, |
1913 | | nDstXOff + nDestXOffVirtual + nDstXCount, |
1914 | | nDstYOff + nDestYOffVirtual, |
1915 | | nDstYOff + nDestYOffVirtual + nDstYCount, papoDstBands, |
1916 | | pszResampling, FALSE /*bHasNoData*/, |
1917 | | 0.0 /* dfNoDataValue */, nullptr /* color table*/, |
1918 | | eDataType); |
1919 | | } |
1920 | | else |
1921 | | #endif |
1922 | 0 | { |
1923 | 0 | size_t nChunkBandOffset = |
1924 | 0 | static_cast<size_t>(nChunkXSizeQueried) * |
1925 | 0 | nChunkYSizeQueried * |
1926 | 0 | GDALGetDataTypeSizeBytes(eWrkDataType); |
1927 | 0 | for (int i = 0; |
1928 | 0 | i < nBandCount && !bSkipResample && eErr == CE_None; |
1929 | 0 | i++) |
1930 | 0 | { |
1931 | 0 | const bool bPropagateNoData = false; |
1932 | 0 | void *pDstBuffer = nullptr; |
1933 | 0 | GDALDataType eDstBufferDataType = GDT_Unknown; |
1934 | 0 | GDALRasterBand *poMEMBand = |
1935 | 0 | poMEMDS->GetRasterBand(i + 1); |
1936 | 0 | GDALOverviewResampleArgs args; |
1937 | 0 | args.eSrcDataType = eDataType; |
1938 | 0 | args.eOvrDataType = poMEMBand->GetRasterDataType(); |
1939 | 0 | args.nOvrXSize = poMEMBand->GetXSize(); |
1940 | 0 | args.nOvrYSize = poMEMBand->GetYSize(); |
1941 | 0 | args.nOvrNBITS = nNBITS; |
1942 | 0 | args.dfXRatioDstToSrc = dfXRatioDstToSrc; |
1943 | 0 | args.dfYRatioDstToSrc = dfYRatioDstToSrc; |
1944 | 0 | args.dfSrcXDelta = |
1945 | 0 | dfXOff - nXOff; /* == 0 if bHasXOffVirtual */ |
1946 | 0 | args.dfSrcYDelta = |
1947 | 0 | dfYOff - nYOff; /* == 0 if bHasYOffVirtual */ |
1948 | 0 | args.eWrkDataType = eWrkDataType; |
1949 | 0 | args.pabyChunkNodataMask = bNoDataMaskFullyOpaque |
1950 | 0 | ? nullptr |
1951 | 0 | : pabyChunkNoDataMask; |
1952 | 0 | args.nChunkXOff = |
1953 | 0 | nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff); |
1954 | 0 | args.nChunkXSize = nChunkXSizeQueried; |
1955 | 0 | args.nChunkYOff = |
1956 | 0 | nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff); |
1957 | 0 | args.nChunkYSize = nChunkYSizeQueried; |
1958 | 0 | args.nDstXOff = nDstXOff + nDestXOffVirtual; |
1959 | 0 | args.nDstXOff2 = |
1960 | 0 | nDstXOff + nDestXOffVirtual + nDstXCount; |
1961 | 0 | args.nDstYOff = nDstYOff + nDestYOffVirtual; |
1962 | 0 | args.nDstYOff2 = |
1963 | 0 | nDstYOff + nDestYOffVirtual + nDstYCount; |
1964 | 0 | args.pszResampling = pszResampling; |
1965 | 0 | args.bHasNoData = false; |
1966 | 0 | args.dfNoDataValue = 0.0; |
1967 | 0 | args.poColorTable = nullptr; |
1968 | 0 | args.bPropagateNoData = bPropagateNoData; |
1969 | |
|
1970 | 0 | eErr = |
1971 | 0 | pfnResampleFunc(args, |
1972 | 0 | reinterpret_cast<GByte *>(pChunk) + |
1973 | 0 | i * nChunkBandOffset, |
1974 | 0 | &pDstBuffer, &eDstBufferDataType); |
1975 | 0 | if (eErr == CE_None) |
1976 | 0 | { |
1977 | 0 | eErr = poMEMBand->RasterIO( |
1978 | 0 | GF_Write, nDstXOff + nDestXOffVirtual, |
1979 | 0 | nDstYOff + nDestYOffVirtual, nDstXCount, |
1980 | 0 | nDstYCount, pDstBuffer, nDstXCount, nDstYCount, |
1981 | 0 | eDstBufferDataType, 0, 0, nullptr); |
1982 | 0 | } |
1983 | 0 | CPLFree(pDstBuffer); |
1984 | 0 | } |
1985 | 0 | } |
1986 | |
|
1987 | 0 | nBlocksDone++; |
1988 | 0 | if (eErr == CE_None && psExtraArg->pfnProgress != nullptr && |
1989 | 0 | !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks, |
1990 | 0 | "", psExtraArg->pProgressData)) |
1991 | 0 | { |
1992 | 0 | eErr = CE_Failure; |
1993 | 0 | } |
1994 | 0 | } |
1995 | 0 | } |
1996 | | |
1997 | 0 | CPLFree(pChunk); |
1998 | 0 | CPLFree(pabyChunkNoDataMask); |
1999 | 0 | } |
2000 | | |
2001 | 0 | CPLFree(papoDstBands); |
2002 | 0 | GDALClose(poMEMDS); |
2003 | |
|
2004 | 0 | return eErr; |
2005 | 0 | } |
2006 | | |
2007 | | //! @endcond |
2008 | | |
2009 | | /************************************************************************/ |
2010 | | /* GDALSwapWords() */ |
2011 | | /************************************************************************/ |
2012 | | |
2013 | | /** |
2014 | | * Byte swap words in-place. |
2015 | | * |
2016 | | * This function will byte swap a set of 2, 4 or 8 byte words "in place" in |
2017 | | * a memory array. No assumption is made that the words being swapped are |
2018 | | * word aligned in memory. Use the CPL_LSB and CPL_MSB macros from cpl_port.h |
2019 | | * to determine if the current platform is big endian or little endian. Use |
2020 | | * The macros like CPL_SWAP32() to byte swap single values without the overhead |
2021 | | * of a function call. |
2022 | | * |
2023 | | * @param pData pointer to start of data buffer. |
2024 | | * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8. |
2025 | | * @param nWordCount the number of words to be swapped in this call. |
2026 | | * @param nWordSkip the byte offset from the start of one word to the start of |
2027 | | * the next. For packed buffers this is the same as nWordSize. |
2028 | | */ |
2029 | | |
2030 | | void CPL_STDCALL GDALSwapWords(void *pData, int nWordSize, int nWordCount, |
2031 | | int nWordSkip) |
2032 | | |
2033 | 0 | { |
2034 | 0 | if (nWordCount > 0) |
2035 | 0 | VALIDATE_POINTER0(pData, "GDALSwapWords"); |
2036 | | |
2037 | 0 | GByte *pabyData = static_cast<GByte *>(pData); |
2038 | |
|
2039 | 0 | switch (nWordSize) |
2040 | 0 | { |
2041 | 0 | case 1: |
2042 | 0 | break; |
2043 | | |
2044 | 0 | case 2: |
2045 | 0 | CPLAssert(nWordSkip >= 2 || nWordCount == 1); |
2046 | 0 | for (int i = 0; i < nWordCount; i++) |
2047 | 0 | { |
2048 | 0 | CPL_SWAP16PTR(pabyData); |
2049 | 0 | pabyData += nWordSkip; |
2050 | 0 | } |
2051 | 0 | break; |
2052 | | |
2053 | 0 | case 4: |
2054 | 0 | CPLAssert(nWordSkip >= 4 || nWordCount == 1); |
2055 | 0 | if (CPL_IS_ALIGNED(pabyData, 4) && (nWordSkip % 4) == 0) |
2056 | 0 | { |
2057 | 0 | for (int i = 0; i < nWordCount; i++) |
2058 | 0 | { |
2059 | 0 | *reinterpret_cast<GUInt32 *>(pabyData) = CPL_SWAP32( |
2060 | 0 | *reinterpret_cast<const GUInt32 *>(pabyData)); |
2061 | 0 | pabyData += nWordSkip; |
2062 | 0 | } |
2063 | 0 | } |
2064 | 0 | else |
2065 | 0 | { |
2066 | 0 | for (int i = 0; i < nWordCount; i++) |
2067 | 0 | { |
2068 | 0 | CPL_SWAP32PTR(pabyData); |
2069 | 0 | pabyData += nWordSkip; |
2070 | 0 | } |
2071 | 0 | } |
2072 | 0 | break; |
2073 | | |
2074 | 0 | case 8: |
2075 | 0 | CPLAssert(nWordSkip >= 8 || nWordCount == 1); |
2076 | 0 | if (CPL_IS_ALIGNED(pabyData, 8) && (nWordSkip % 8) == 0) |
2077 | 0 | { |
2078 | 0 | for (int i = 0; i < nWordCount; i++) |
2079 | 0 | { |
2080 | 0 | *reinterpret_cast<GUInt64 *>(pabyData) = CPL_SWAP64( |
2081 | 0 | *reinterpret_cast<const GUInt64 *>(pabyData)); |
2082 | 0 | pabyData += nWordSkip; |
2083 | 0 | } |
2084 | 0 | } |
2085 | 0 | else |
2086 | 0 | { |
2087 | 0 | for (int i = 0; i < nWordCount; i++) |
2088 | 0 | { |
2089 | 0 | CPL_SWAP64PTR(pabyData); |
2090 | 0 | pabyData += nWordSkip; |
2091 | 0 | } |
2092 | 0 | } |
2093 | 0 | break; |
2094 | | |
2095 | 0 | default: |
2096 | 0 | CPLAssert(false); |
2097 | 0 | } |
2098 | 0 | } |
2099 | | |
2100 | | /************************************************************************/ |
2101 | | /* GDALSwapWordsEx() */ |
2102 | | /************************************************************************/ |
2103 | | |
2104 | | /** |
2105 | | * Byte swap words in-place. |
2106 | | * |
2107 | | * This function will byte swap a set of 2, 4 or 8 byte words "in place" in |
2108 | | * a memory array. No assumption is made that the words being swapped are |
2109 | | * word aligned in memory. Use the CPL_LSB and CPL_MSB macros from cpl_port.h |
2110 | | * to determine if the current platform is big endian or little endian. Use |
2111 | | * The macros like CPL_SWAP32() to byte swap single values without the overhead |
2112 | | * of a function call. |
2113 | | * |
2114 | | * @param pData pointer to start of data buffer. |
2115 | | * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8. |
2116 | | * @param nWordCount the number of words to be swapped in this call. |
2117 | | * @param nWordSkip the byte offset from the start of one word to the start of |
2118 | | * the next. For packed buffers this is the same as nWordSize. |
2119 | | * @since GDAL 2.1 |
2120 | | */ |
2121 | | void CPL_STDCALL GDALSwapWordsEx(void *pData, int nWordSize, size_t nWordCount, |
2122 | | int nWordSkip) |
2123 | 0 | { |
2124 | 0 | GByte *pabyData = static_cast<GByte *>(pData); |
2125 | 0 | while (nWordCount) |
2126 | 0 | { |
2127 | | // Pick-up a multiple of 8 as max chunk size. |
2128 | 0 | const int nWordCountSmall = |
2129 | 0 | (nWordCount > (1 << 30)) ? (1 << 30) : static_cast<int>(nWordCount); |
2130 | 0 | GDALSwapWords(pabyData, nWordSize, nWordCountSmall, nWordSkip); |
2131 | 0 | pabyData += static_cast<size_t>(nWordSkip) * nWordCountSmall; |
2132 | 0 | nWordCount -= nWordCountSmall; |
2133 | 0 | } |
2134 | 0 | } |
2135 | | |
2136 | | // Place the new GDALCopyWords helpers in an anonymous namespace |
2137 | | namespace |
2138 | | { |
2139 | | |
2140 | | /************************************************************************/ |
2141 | | /* GDALCopyWordsT() */ |
2142 | | /************************************************************************/ |
2143 | | /** |
2144 | | * Template function, used to copy data from pSrcData into buffer |
2145 | | * pDstData, with stride nSrcPixelStride in the source data and |
2146 | | * stride nDstPixelStride in the destination data. This template can |
2147 | | * deal with the case where the input data type is real or complex and |
2148 | | * the output is real. |
2149 | | * |
2150 | | * @param pSrcData the source data buffer |
2151 | | * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels |
2152 | | * of interest. |
2153 | | * @param pDstData the destination buffer. |
2154 | | * @param nDstPixelStride the stride in the buffer pDstData for pixels of |
2155 | | * interest. |
2156 | | * @param nWordCount the total number of pixel words to copy |
2157 | | * |
2158 | | * @code |
2159 | | * // Assume an input buffer of type GUInt16 named pBufferIn |
2160 | | * GByte *pBufferOut = new GByte[numBytesOut]; |
2161 | | * GDALCopyWordsT<GUInt16, GByte>(pSrcData, 2, pDstData, 1, numBytesOut); |
2162 | | * @endcode |
2163 | | * @note |
2164 | | * This is a private function, and should not be exposed outside of |
2165 | | * rasterio.cpp. External users should call the GDALCopyWords driver function. |
2166 | | */ |
2167 | | |
2168 | | template <class Tin, class Tout> |
2169 | | static void inline GDALCopyWordsGenericT(const Tin *const CPL_RESTRICT pSrcData, |
2170 | | int nSrcPixelStride, |
2171 | | Tout *const CPL_RESTRICT pDstData, |
2172 | | int nDstPixelStride, |
2173 | | GPtrDiff_t nWordCount) |
2174 | 0 | { |
2175 | 0 | decltype(nWordCount) nDstOffset = 0; |
2176 | |
|
2177 | 0 | const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData); |
2178 | 0 | char *const pDstDataPtr = reinterpret_cast<char *>(pDstData); |
2179 | 0 | for (decltype(nWordCount) n = 0; n < nWordCount; n++) |
2180 | 0 | { |
2181 | 0 | const Tin tValue = |
2182 | 0 | *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride)); |
2183 | 0 | Tout *const pOutPixel = |
2184 | 0 | reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset); |
2185 | |
|
2186 | 0 | GDALCopyWord(tValue, *pOutPixel); |
2187 | |
|
2188 | 0 | nDstOffset += nDstPixelStride; |
2189 | 0 | } |
2190 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned char>(unsigned char const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, signed char>(unsigned char const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned short>(unsigned char const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, short>(unsigned char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned int>(unsigned char const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, int>(unsigned char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned long>(unsigned char const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, long>(unsigned char const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, float>(unsigned char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, double>(unsigned char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned char>(signed char const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, signed char>(signed char const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned short>(signed char const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, short>(signed char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned int>(signed char const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, int>(signed char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned long>(signed char const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, long>(signed char const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, float>(signed char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, double>(signed char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned char>(unsigned short const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, signed char>(unsigned short const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned short>(unsigned short const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, short>(unsigned short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned int>(unsigned short const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, int>(unsigned short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned long>(unsigned short const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, long>(unsigned short const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, float>(unsigned short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, double>(unsigned short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned char>(short const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, signed char>(short const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned short>(short const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, short>(short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned int>(short const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, int>(short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned long>(short const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, long>(short const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, float>(short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, double>(short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned char>(unsigned int const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, signed char>(unsigned int const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned short>(unsigned int const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, short>(unsigned int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned int>(unsigned int const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, int>(unsigned int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned long>(unsigned int const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, long>(unsigned int const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, float>(unsigned int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, double>(unsigned int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned char>(int const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, signed char>(int const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned short>(int const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, short>(int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned int>(int const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, int>(int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned long>(int const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, long>(int const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, float>(int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, double>(int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned char>(unsigned long const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, signed char>(unsigned long const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned short>(unsigned long const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, short>(unsigned long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned int>(unsigned long const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, int>(unsigned long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned long>(unsigned long const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, long>(unsigned long const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, float>(unsigned long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, double>(unsigned long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned char>(long const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, signed char>(long const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned short>(long const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, short>(long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned int>(long const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, int>(long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned long>(long const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, long>(long const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, float>(long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, double>(long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned char>(cpl::Float16 const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, signed char>(cpl::Float16 const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned short>(cpl::Float16 const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned int>(cpl::Float16 const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned long>(cpl::Float16 const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, long>(cpl::Float16 const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, signed char>(float const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, unsigned int>(float const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, int>(float const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, unsigned long>(float const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, long>(float const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, float>(float const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, double>(float const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, unsigned char>(double const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, signed char>(double const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, short>(double const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, unsigned int>(double const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, int>(double const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, unsigned long>(double const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, long>(double const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, float>(double const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, double>(double const*, int, double*, int, long long) |
2191 | | |
2192 | | template <class Tin, class Tout> |
2193 | | static void inline GDALCopyWordsT(const Tin *const CPL_RESTRICT pSrcData, |
2194 | | int nSrcPixelStride, |
2195 | | Tout *const CPL_RESTRICT pDstData, |
2196 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2197 | 0 | { |
2198 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, nDstPixelStride, |
2199 | 0 | nWordCount); |
2200 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, unsigned char>(unsigned char const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, signed char>(unsigned char const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, unsigned long>(unsigned char const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, long>(unsigned char const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned char>(signed char const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, signed char>(signed char const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned short>(signed char const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, short>(signed char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned int>(signed char const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, int>(signed char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned long>(signed char const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, long>(signed char const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, float>(signed char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, double>(signed char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, signed char>(unsigned short const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, unsigned short>(unsigned short const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, unsigned int>(unsigned short const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, int>(unsigned short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, unsigned long>(unsigned short const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, long>(unsigned short const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned char>(short const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, signed char>(short const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned short>(short const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, short>(short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned int>(short const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, int>(short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned long>(short const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, long>(short const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, float>(short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, double>(short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned char>(unsigned int const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, signed char>(unsigned int const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned short>(unsigned int const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, short>(unsigned int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned int>(unsigned int const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, int>(unsigned int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned long>(unsigned int const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, long>(unsigned int const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, float>(unsigned int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, double>(unsigned int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned char>(int const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, signed char>(int const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned short>(int const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, short>(int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned int>(int const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, int>(int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned long>(int const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, long>(int const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, float>(int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, double>(int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned char>(unsigned long const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, signed char>(unsigned long const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned short>(unsigned long const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, short>(unsigned long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned int>(unsigned long const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, int>(unsigned long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned long>(unsigned long const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, long>(unsigned long const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, float>(unsigned long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, double>(unsigned long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned char>(long const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, signed char>(long const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned short>(long const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, short>(long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned int>(long const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, int>(long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned long>(long const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, long>(long const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, float>(long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, double>(long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned char>(cpl::Float16 const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, signed char>(cpl::Float16 const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned short>(cpl::Float16 const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned int>(cpl::Float16 const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned long>(cpl::Float16 const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, long>(cpl::Float16 const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, signed char>(float const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, unsigned int>(float const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, int>(float const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, unsigned long>(float const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, long>(float const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, float>(float const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, double>(float const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, unsigned char>(double const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, signed char>(double const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, short>(double const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, unsigned int>(double const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, int>(double const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, unsigned long>(double const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, long>(double const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, float>(double const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, double>(double const*, int, double*, int, long long) |
2201 | | |
2202 | | template <class Tin, class Tout> |
2203 | | static void inline GDALCopyWordsT_8atatime( |
2204 | | const Tin *const CPL_RESTRICT pSrcData, int nSrcPixelStride, |
2205 | | Tout *const CPL_RESTRICT pDstData, int nDstPixelStride, |
2206 | | GPtrDiff_t nWordCount) |
2207 | 0 | { |
2208 | 0 | decltype(nWordCount) nDstOffset = 0; |
2209 | |
|
2210 | 0 | const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData); |
2211 | 0 | char *const pDstDataPtr = reinterpret_cast<char *>(pDstData); |
2212 | 0 | decltype(nWordCount) n = 0; |
2213 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(Tin)) && |
2214 | 0 | nDstPixelStride == static_cast<int>(sizeof(Tout))) |
2215 | 0 | { |
2216 | 0 | for (; n < nWordCount - 7; n += 8) |
2217 | 0 | { |
2218 | 0 | const Tin *pInValues = reinterpret_cast<const Tin *>( |
2219 | 0 | pSrcDataPtr + (n * nSrcPixelStride)); |
2220 | 0 | Tout *const pOutPixels = |
2221 | 0 | reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset); |
2222 | |
|
2223 | 0 | GDALCopy8Words(pInValues, pOutPixels); |
2224 | |
|
2225 | 0 | nDstOffset += 8 * nDstPixelStride; |
2226 | 0 | } |
2227 | 0 | } |
2228 | 0 | for (; n < nWordCount; n++) |
2229 | 0 | { |
2230 | 0 | const Tin tValue = |
2231 | 0 | *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride)); |
2232 | 0 | Tout *const pOutPixel = |
2233 | 0 | reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset); |
2234 | |
|
2235 | 0 | GDALCopyWord(tValue, *pOutPixel); |
2236 | |
|
2237 | 0 | nDstOffset += nDstPixelStride; |
2238 | 0 | } |
2239 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, unsigned char>(float const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, unsigned short>(float const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, short>(float const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<double, unsigned short>(double const*, int, unsigned short*, int, long long) |
2240 | | |
2241 | | #ifdef HAVE_SSE2 |
2242 | | |
2243 | | template <class Tout> |
2244 | | void GDALCopyWordsByteTo16Bit(const GByte *const CPL_RESTRICT pSrcData, |
2245 | | int nSrcPixelStride, |
2246 | | Tout *const CPL_RESTRICT pDstData, |
2247 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2248 | 0 | { |
2249 | 0 | static_assert(std::is_integral<Tout>::value && |
2250 | 0 | sizeof(Tout) == sizeof(uint16_t), |
2251 | 0 | "Bad Tout"); |
2252 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2253 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2254 | 0 | { |
2255 | 0 | decltype(nWordCount) n = 0; |
2256 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2257 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2258 | 0 | reinterpret_cast<GByte *>(pDstData); |
2259 | 0 | for (; n < nWordCount - 15; n += 16) |
2260 | 0 | { |
2261 | 0 | __m128i xmm = _mm_loadu_si128( |
2262 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2263 | 0 | __m128i xmm0 = _mm_unpacklo_epi8(xmm, xmm_zero); |
2264 | 0 | __m128i xmm1 = _mm_unpackhi_epi8(xmm, xmm_zero); |
2265 | 0 | _mm_storeu_si128( |
2266 | 0 | reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2), xmm0); |
2267 | 0 | _mm_storeu_si128( |
2268 | 0 | reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2 + 16), xmm1); |
2269 | 0 | } |
2270 | 0 | for (; n < nWordCount; n++) |
2271 | 0 | { |
2272 | 0 | pDstData[n] = pSrcData[n]; |
2273 | 0 | } |
2274 | 0 | } |
2275 | 0 | else |
2276 | 0 | { |
2277 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2278 | 0 | nDstPixelStride, nWordCount); |
2279 | 0 | } |
2280 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo16Bit<unsigned short>(unsigned char const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo16Bit<short>(unsigned char const*, int, short*, int, long long) |
2281 | | |
2282 | | template <> |
2283 | | void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData, |
2284 | | int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData, |
2285 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2286 | 0 | { |
2287 | 0 | GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData, |
2288 | 0 | nDstPixelStride, nWordCount); |
2289 | 0 | } |
2290 | | |
2291 | | template <> |
2292 | | void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData, |
2293 | | int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData, |
2294 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2295 | 0 | { |
2296 | 0 | GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData, |
2297 | 0 | nDstPixelStride, nWordCount); |
2298 | 0 | } |
2299 | | |
2300 | | template <class Tout> |
2301 | | void GDALCopyWordsByteTo32Bit(const GByte *const CPL_RESTRICT pSrcData, |
2302 | | int nSrcPixelStride, |
2303 | | Tout *const CPL_RESTRICT pDstData, |
2304 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2305 | 0 | { |
2306 | 0 | static_assert(std::is_integral<Tout>::value && |
2307 | 0 | sizeof(Tout) == sizeof(uint32_t), |
2308 | 0 | "Bad Tout"); |
2309 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2310 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2311 | 0 | { |
2312 | 0 | decltype(nWordCount) n = 0; |
2313 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2314 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2315 | 0 | reinterpret_cast<GByte *>(pDstData); |
2316 | 0 | for (; n < nWordCount - 15; n += 16) |
2317 | 0 | { |
2318 | 0 | __m128i xmm = _mm_loadu_si128( |
2319 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2320 | 0 | __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero); |
2321 | 0 | __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero); |
2322 | 0 | __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero); |
2323 | 0 | __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero); |
2324 | 0 | __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero); |
2325 | 0 | __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero); |
2326 | 0 | _mm_storeu_si128( |
2327 | 0 | reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4), xmm0); |
2328 | 0 | _mm_storeu_si128( |
2329 | 0 | reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 16), xmm1); |
2330 | 0 | _mm_storeu_si128( |
2331 | 0 | reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 32), xmm2); |
2332 | 0 | _mm_storeu_si128( |
2333 | 0 | reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 48), xmm3); |
2334 | 0 | } |
2335 | 0 | for (; n < nWordCount; n++) |
2336 | 0 | { |
2337 | 0 | pDstData[n] = pSrcData[n]; |
2338 | 0 | } |
2339 | 0 | } |
2340 | 0 | else |
2341 | 0 | { |
2342 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2343 | 0 | nDstPixelStride, nWordCount); |
2344 | 0 | } |
2345 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo32Bit<unsigned int>(unsigned char const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo32Bit<int>(unsigned char const*, int, int*, int, long long) |
2346 | | |
2347 | | template <> |
2348 | | void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData, |
2349 | | int nSrcPixelStride, GUInt32 *const CPL_RESTRICT pDstData, |
2350 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2351 | 0 | { |
2352 | 0 | GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData, |
2353 | 0 | nDstPixelStride, nWordCount); |
2354 | 0 | } |
2355 | | |
2356 | | template <> |
2357 | | void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData, |
2358 | | int nSrcPixelStride, GInt32 *const CPL_RESTRICT pDstData, |
2359 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2360 | 0 | { |
2361 | 0 | GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData, |
2362 | 0 | nDstPixelStride, nWordCount); |
2363 | 0 | } |
2364 | | |
2365 | | template <> |
2366 | | void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData, |
2367 | | int nSrcPixelStride, float *const CPL_RESTRICT pDstData, |
2368 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2369 | 0 | { |
2370 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2371 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2372 | 0 | { |
2373 | 0 | decltype(nWordCount) n = 0; |
2374 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2375 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2376 | 0 | reinterpret_cast<GByte *>(pDstData); |
2377 | 0 | for (; n < nWordCount - 15; n += 16) |
2378 | 0 | { |
2379 | 0 | __m128i xmm = _mm_loadu_si128( |
2380 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2381 | 0 | __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero); |
2382 | 0 | __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero); |
2383 | 0 | __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero); |
2384 | 0 | __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero); |
2385 | 0 | __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero); |
2386 | 0 | __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero); |
2387 | 0 | __m128 xmm0_f = _mm_cvtepi32_ps(xmm0); |
2388 | 0 | __m128 xmm1_f = _mm_cvtepi32_ps(xmm1); |
2389 | 0 | __m128 xmm2_f = _mm_cvtepi32_ps(xmm2); |
2390 | 0 | __m128 xmm3_f = _mm_cvtepi32_ps(xmm3); |
2391 | 0 | _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4), |
2392 | 0 | xmm0_f); |
2393 | 0 | _mm_storeu_ps( |
2394 | 0 | reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f); |
2395 | 0 | _mm_storeu_ps( |
2396 | 0 | reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 32), xmm2_f); |
2397 | 0 | _mm_storeu_ps( |
2398 | 0 | reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 48), xmm3_f); |
2399 | 0 | } |
2400 | 0 | for (; n < nWordCount; n++) |
2401 | 0 | { |
2402 | 0 | pDstData[n] = pSrcData[n]; |
2403 | 0 | } |
2404 | 0 | } |
2405 | 0 | else |
2406 | 0 | { |
2407 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2408 | 0 | nDstPixelStride, nWordCount); |
2409 | 0 | } |
2410 | 0 | } |
2411 | | |
2412 | | template <> |
2413 | | void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData, |
2414 | | int nSrcPixelStride, double *const CPL_RESTRICT pDstData, |
2415 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2416 | 0 | { |
2417 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2418 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2419 | 0 | { |
2420 | 0 | decltype(nWordCount) n = 0; |
2421 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2422 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2423 | 0 | reinterpret_cast<GByte *>(pDstData); |
2424 | 0 | for (; n < nWordCount - 15; n += 16) |
2425 | 0 | { |
2426 | 0 | __m128i xmm = _mm_loadu_si128( |
2427 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2428 | 0 | __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero); |
2429 | 0 | __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero); |
2430 | 0 | __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero); |
2431 | 0 | __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero); |
2432 | 0 | __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero); |
2433 | 0 | __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero); |
2434 | |
|
2435 | 0 | __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0); |
2436 | 0 | __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1); |
2437 | 0 | __m128d xmm2_low_d = _mm_cvtepi32_pd(xmm2); |
2438 | 0 | __m128d xmm3_low_d = _mm_cvtepi32_pd(xmm3); |
2439 | 0 | xmm0 = _mm_srli_si128(xmm0, 8); |
2440 | 0 | xmm1 = _mm_srli_si128(xmm1, 8); |
2441 | 0 | xmm2 = _mm_srli_si128(xmm2, 8); |
2442 | 0 | xmm3 = _mm_srli_si128(xmm3, 8); |
2443 | 0 | __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0); |
2444 | 0 | __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1); |
2445 | 0 | __m128d xmm2_high_d = _mm_cvtepi32_pd(xmm2); |
2446 | 0 | __m128d xmm3_high_d = _mm_cvtepi32_pd(xmm3); |
2447 | |
|
2448 | 0 | _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8), |
2449 | 0 | xmm0_low_d); |
2450 | 0 | _mm_storeu_pd( |
2451 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16), |
2452 | 0 | xmm0_high_d); |
2453 | 0 | _mm_storeu_pd( |
2454 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32), |
2455 | 0 | xmm1_low_d); |
2456 | 0 | _mm_storeu_pd( |
2457 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48), |
2458 | 0 | xmm1_high_d); |
2459 | 0 | _mm_storeu_pd( |
2460 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 64), |
2461 | 0 | xmm2_low_d); |
2462 | 0 | _mm_storeu_pd( |
2463 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 80), |
2464 | 0 | xmm2_high_d); |
2465 | 0 | _mm_storeu_pd( |
2466 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 96), |
2467 | 0 | xmm3_low_d); |
2468 | 0 | _mm_storeu_pd( |
2469 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 112), |
2470 | 0 | xmm3_high_d); |
2471 | 0 | } |
2472 | 0 | for (; n < nWordCount; n++) |
2473 | 0 | { |
2474 | 0 | pDstData[n] = pSrcData[n]; |
2475 | 0 | } |
2476 | 0 | } |
2477 | 0 | else |
2478 | 0 | { |
2479 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2480 | 0 | nDstPixelStride, nWordCount); |
2481 | 0 | } |
2482 | 0 | } |
2483 | | |
2484 | | template <> |
2485 | | void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData, |
2486 | | int nSrcPixelStride, GByte *const CPL_RESTRICT pDstData, |
2487 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2488 | 0 | { |
2489 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2490 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2491 | 0 | { |
2492 | 0 | decltype(nWordCount) n = 0; |
2493 | | // In SSE2, min_epu16 does not exist, so shift from |
2494 | | // UInt16 to SInt16 to be able to use min_epi16 |
2495 | 0 | const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768); |
2496 | 0 | const __m128i xmm_m255_shifted = _mm_set1_epi16(255 - 32768); |
2497 | 0 | for (; n < nWordCount - 7; n += 8) |
2498 | 0 | { |
2499 | 0 | __m128i xmm = _mm_loadu_si128( |
2500 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2501 | 0 | xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16); |
2502 | 0 | xmm = _mm_min_epi16(xmm, xmm_m255_shifted); |
2503 | 0 | xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16); |
2504 | 0 | xmm = _mm_packus_epi16(xmm, xmm); |
2505 | 0 | GDALCopyXMMToInt64(xmm, |
2506 | 0 | reinterpret_cast<GPtrDiff_t *>(pDstData + n)); |
2507 | 0 | } |
2508 | 0 | for (; n < nWordCount; n++) |
2509 | 0 | { |
2510 | 0 | pDstData[n] = |
2511 | 0 | pSrcData[n] >= 255 ? 255 : static_cast<GByte>(pSrcData[n]); |
2512 | 0 | } |
2513 | 0 | } |
2514 | 0 | else |
2515 | 0 | { |
2516 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2517 | 0 | nDstPixelStride, nWordCount); |
2518 | 0 | } |
2519 | 0 | } |
2520 | | |
2521 | | template <> |
2522 | | void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData, |
2523 | | int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData, |
2524 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2525 | 0 | { |
2526 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2527 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2528 | 0 | { |
2529 | 0 | decltype(nWordCount) n = 0; |
2530 | | // In SSE2, min_epu16 does not exist, so shift from |
2531 | | // UInt16 to SInt16 to be able to use min_epi16 |
2532 | 0 | const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768); |
2533 | 0 | const __m128i xmm_32767_shifted = _mm_set1_epi16(32767 - 32768); |
2534 | 0 | for (; n < nWordCount - 7; n += 8) |
2535 | 0 | { |
2536 | 0 | __m128i xmm = _mm_loadu_si128( |
2537 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2538 | 0 | xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16); |
2539 | 0 | xmm = _mm_min_epi16(xmm, xmm_32767_shifted); |
2540 | 0 | xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16); |
2541 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm); |
2542 | 0 | } |
2543 | 0 | for (; n < nWordCount; n++) |
2544 | 0 | { |
2545 | 0 | pDstData[n] = |
2546 | 0 | pSrcData[n] >= 32767 ? 32767 : static_cast<GInt16>(pSrcData[n]); |
2547 | 0 | } |
2548 | 0 | } |
2549 | 0 | else |
2550 | 0 | { |
2551 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2552 | 0 | nDstPixelStride, nWordCount); |
2553 | 0 | } |
2554 | 0 | } |
2555 | | |
2556 | | template <> |
2557 | | void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData, |
2558 | | int nSrcPixelStride, float *const CPL_RESTRICT pDstData, |
2559 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2560 | 0 | { |
2561 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2562 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2563 | 0 | { |
2564 | 0 | decltype(nWordCount) n = 0; |
2565 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2566 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2567 | 0 | reinterpret_cast<GByte *>(pDstData); |
2568 | 0 | for (; n < nWordCount - 7; n += 8) |
2569 | 0 | { |
2570 | 0 | __m128i xmm = _mm_loadu_si128( |
2571 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2572 | 0 | __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero); |
2573 | 0 | __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero); |
2574 | 0 | __m128 xmm0_f = _mm_cvtepi32_ps(xmm0); |
2575 | 0 | __m128 xmm1_f = _mm_cvtepi32_ps(xmm1); |
2576 | 0 | _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4), |
2577 | 0 | xmm0_f); |
2578 | 0 | _mm_storeu_ps( |
2579 | 0 | reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f); |
2580 | 0 | } |
2581 | 0 | for (; n < nWordCount; n++) |
2582 | 0 | { |
2583 | 0 | pDstData[n] = pSrcData[n]; |
2584 | 0 | } |
2585 | 0 | } |
2586 | 0 | else |
2587 | 0 | { |
2588 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2589 | 0 | nDstPixelStride, nWordCount); |
2590 | 0 | } |
2591 | 0 | } |
2592 | | |
2593 | | template <> |
2594 | | void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData, |
2595 | | int nSrcPixelStride, double *const CPL_RESTRICT pDstData, |
2596 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2597 | 0 | { |
2598 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2599 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2600 | 0 | { |
2601 | 0 | decltype(nWordCount) n = 0; |
2602 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2603 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2604 | 0 | reinterpret_cast<GByte *>(pDstData); |
2605 | 0 | for (; n < nWordCount - 7; n += 8) |
2606 | 0 | { |
2607 | 0 | __m128i xmm = _mm_loadu_si128( |
2608 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2609 | 0 | __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero); |
2610 | 0 | __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero); |
2611 | |
|
2612 | 0 | __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0); |
2613 | 0 | __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1); |
2614 | 0 | xmm0 = _mm_srli_si128(xmm0, 8); |
2615 | 0 | xmm1 = _mm_srli_si128(xmm1, 8); |
2616 | 0 | __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0); |
2617 | 0 | __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1); |
2618 | |
|
2619 | 0 | _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8), |
2620 | 0 | xmm0_low_d); |
2621 | 0 | _mm_storeu_pd( |
2622 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16), |
2623 | 0 | xmm0_high_d); |
2624 | 0 | _mm_storeu_pd( |
2625 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32), |
2626 | 0 | xmm1_low_d); |
2627 | 0 | _mm_storeu_pd( |
2628 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48), |
2629 | 0 | xmm1_high_d); |
2630 | 0 | } |
2631 | 0 | for (; n < nWordCount; n++) |
2632 | 0 | { |
2633 | 0 | pDstData[n] = pSrcData[n]; |
2634 | 0 | } |
2635 | 0 | } |
2636 | 0 | else |
2637 | 0 | { |
2638 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2639 | 0 | nDstPixelStride, nWordCount); |
2640 | 0 | } |
2641 | 0 | } |
2642 | | |
2643 | | template <> |
2644 | | void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData, |
2645 | | int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData, |
2646 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2647 | 0 | { |
2648 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
2649 | 0 | nDstPixelStride, nWordCount); |
2650 | 0 | } |
2651 | | |
2652 | | #endif // HAVE_SSE2 |
2653 | | |
2654 | | template <> |
2655 | | void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData, |
2656 | | int nSrcPixelStride, GByte *const CPL_RESTRICT pDstData, |
2657 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2658 | 0 | { |
2659 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
2660 | 0 | nDstPixelStride, nWordCount); |
2661 | 0 | } |
2662 | | |
2663 | | template <> |
2664 | | void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData, |
2665 | | int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData, |
2666 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2667 | 0 | { |
2668 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
2669 | 0 | nDstPixelStride, nWordCount); |
2670 | 0 | } |
2671 | | |
2672 | | template <> |
2673 | | void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData, |
2674 | | int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData, |
2675 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2676 | 0 | { |
2677 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
2678 | 0 | nDstPixelStride, nWordCount); |
2679 | 0 | } |
2680 | | |
2681 | | /************************************************************************/ |
2682 | | /* GDALCopyWordsComplexT() */ |
2683 | | /************************************************************************/ |
2684 | | /** |
2685 | | * Template function, used to copy data from pSrcData into buffer |
2686 | | * pDstData, with stride nSrcPixelStride in the source data and |
2687 | | * stride nDstPixelStride in the destination data. Deals with the |
2688 | | * complex case, where input is complex and output is complex. |
2689 | | * |
2690 | | * @param pSrcData the source data buffer |
2691 | | * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels |
2692 | | * of interest. |
2693 | | * @param pDstData the destination buffer. |
2694 | | * @param nDstPixelStride the stride in the buffer pDstData for pixels of |
2695 | | * interest. |
2696 | | * @param nWordCount the total number of pixel words to copy |
2697 | | * |
2698 | | */ |
2699 | | template <class Tin, class Tout> |
2700 | | inline void GDALCopyWordsComplexT(const Tin *const CPL_RESTRICT pSrcData, |
2701 | | int nSrcPixelStride, |
2702 | | Tout *const CPL_RESTRICT pDstData, |
2703 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2704 | 0 | { |
2705 | 0 | decltype(nWordCount) nDstOffset = 0; |
2706 | 0 | const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData); |
2707 | 0 | char *const pDstDataPtr = reinterpret_cast<char *>(pDstData); |
2708 | |
|
2709 | 0 | for (decltype(nWordCount) n = 0; n < nWordCount; n++) |
2710 | 0 | { |
2711 | 0 | const Tin *const pPixelIn = |
2712 | 0 | reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride); |
2713 | 0 | Tout *const pPixelOut = |
2714 | 0 | reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset); |
2715 | |
|
2716 | 0 | GDALCopyWord(pPixelIn[0], pPixelOut[0]); |
2717 | 0 | GDALCopyWord(pPixelIn[1], pPixelOut[1]); |
2718 | |
|
2719 | 0 | nDstOffset += nDstPixelStride; |
2720 | 0 | } |
2721 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, short>(unsigned char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, int>(unsigned char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, float>(unsigned char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, double>(unsigned char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, short>(signed char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, int>(signed char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, float>(signed char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, double>(signed char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, short>(unsigned short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, int>(unsigned short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, float>(unsigned short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, double>(unsigned short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, short>(short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, int>(short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, float>(short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, double>(short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, short>(unsigned int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, int>(unsigned int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, float>(unsigned int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, double>(unsigned int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, short>(int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, int>(int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, float>(int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, double>(int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, short>(unsigned long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, int>(unsigned long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, float>(unsigned long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, double>(unsigned long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, short>(long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, int>(long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, float>(long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, double>(long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, short>(float const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, int>(float const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, float>(float const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, double>(float const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, short>(double const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, int>(double const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, float>(double const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, double>(double const*, int, double*, int, long long) |
2722 | | |
2723 | | /************************************************************************/ |
2724 | | /* GDALCopyWordsComplexOutT() */ |
2725 | | /************************************************************************/ |
2726 | | /** |
2727 | | * Template function, used to copy data from pSrcData into buffer |
2728 | | * pDstData, with stride nSrcPixelStride in the source data and |
2729 | | * stride nDstPixelStride in the destination data. Deals with the |
2730 | | * case where the value is real coming in, but complex going out. |
2731 | | * |
2732 | | * @param pSrcData the source data buffer |
2733 | | * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels |
2734 | | * of interest, in bytes. |
2735 | | * @param pDstData the destination buffer. |
2736 | | * @param nDstPixelStride the stride in the buffer pDstData for pixels of |
2737 | | * interest, in bytes. |
2738 | | * @param nWordCount the total number of pixel words to copy |
2739 | | * |
2740 | | */ |
2741 | | template <class Tin, class Tout> |
2742 | | inline void GDALCopyWordsComplexOutT(const Tin *const CPL_RESTRICT pSrcData, |
2743 | | int nSrcPixelStride, |
2744 | | Tout *const CPL_RESTRICT pDstData, |
2745 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2746 | 0 | { |
2747 | 0 | decltype(nWordCount) nDstOffset = 0; |
2748 | |
|
2749 | 0 | const Tout tOutZero = static_cast<Tout>(0); |
2750 | |
|
2751 | 0 | const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData); |
2752 | 0 | char *const pDstDataPtr = reinterpret_cast<char *>(pDstData); |
2753 | |
|
2754 | 0 | for (decltype(nWordCount) n = 0; n < nWordCount; n++) |
2755 | 0 | { |
2756 | 0 | const Tin tValue = |
2757 | 0 | *reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride); |
2758 | 0 | Tout *const pPixelOut = |
2759 | 0 | reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset); |
2760 | 0 | GDALCopyWord(tValue, *pPixelOut); |
2761 | |
|
2762 | 0 | pPixelOut[1] = tOutZero; |
2763 | |
|
2764 | 0 | nDstOffset += nDstPixelStride; |
2765 | 0 | } |
2766 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, short>(unsigned char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, int>(unsigned char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, float>(unsigned char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, double>(unsigned char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, short>(signed char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, int>(signed char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, float>(signed char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, double>(signed char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, short>(unsigned short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, int>(unsigned short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, float>(unsigned short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, double>(unsigned short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, short>(short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, int>(short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, float>(short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, double>(short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, short>(unsigned int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, int>(unsigned int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, float>(unsigned int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, double>(unsigned int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, short>(int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, int>(int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, float>(int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, double>(int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, short>(unsigned long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, int>(unsigned long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, float>(unsigned long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, double>(unsigned long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, short>(long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, int>(long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, float>(long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, double>(long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, short>(float const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, int>(float const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, float>(float const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, double>(float const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, short>(double const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, int>(double const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, float>(double const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, double>(double const*, int, double*, int, long long) |
2767 | | |
2768 | | /************************************************************************/ |
2769 | | /* GDALCopyWordsFromT() */ |
2770 | | /************************************************************************/ |
2771 | | /** |
2772 | | * Template driver function. Given the input type T, call the appropriate |
2773 | | * GDALCopyWordsT function template for the desired output type. You should |
2774 | | * never call this function directly (call GDALCopyWords instead). |
2775 | | * |
2776 | | * @param pSrcData source data buffer |
2777 | | * @param nSrcPixelStride pixel stride in input buffer, in pixel words |
2778 | | * @param bInComplex input is complex |
2779 | | * @param pDstData destination data buffer |
2780 | | * @param eDstType destination data type |
2781 | | * @param nDstPixelStride pixel stride in output buffer, in pixel words |
2782 | | * @param nWordCount number of pixel words to be copied |
2783 | | */ |
2784 | | template <class T> |
2785 | | inline void GDALCopyWordsFromT(const T *const CPL_RESTRICT pSrcData, |
2786 | | int nSrcPixelStride, bool bInComplex, |
2787 | | void *CPL_RESTRICT pDstData, |
2788 | | GDALDataType eDstType, int nDstPixelStride, |
2789 | | GPtrDiff_t nWordCount) |
2790 | 0 | { |
2791 | 0 | switch (eDstType) |
2792 | 0 | { |
2793 | 0 | case GDT_Byte: |
2794 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2795 | 0 | static_cast<unsigned char *>(pDstData), |
2796 | 0 | nDstPixelStride, nWordCount); |
2797 | 0 | break; |
2798 | 0 | case GDT_Int8: |
2799 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2800 | 0 | static_cast<signed char *>(pDstData), |
2801 | 0 | nDstPixelStride, nWordCount); |
2802 | 0 | break; |
2803 | 0 | case GDT_UInt16: |
2804 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2805 | 0 | static_cast<unsigned short *>(pDstData), |
2806 | 0 | nDstPixelStride, nWordCount); |
2807 | 0 | break; |
2808 | 0 | case GDT_Int16: |
2809 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2810 | 0 | static_cast<short *>(pDstData), nDstPixelStride, |
2811 | 0 | nWordCount); |
2812 | 0 | break; |
2813 | 0 | case GDT_UInt32: |
2814 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2815 | 0 | static_cast<unsigned int *>(pDstData), |
2816 | 0 | nDstPixelStride, nWordCount); |
2817 | 0 | break; |
2818 | 0 | case GDT_Int32: |
2819 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2820 | 0 | static_cast<int *>(pDstData), nDstPixelStride, |
2821 | 0 | nWordCount); |
2822 | 0 | break; |
2823 | 0 | case GDT_UInt64: |
2824 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2825 | 0 | static_cast<std::uint64_t *>(pDstData), |
2826 | 0 | nDstPixelStride, nWordCount); |
2827 | 0 | break; |
2828 | 0 | case GDT_Int64: |
2829 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2830 | 0 | static_cast<std::int64_t *>(pDstData), |
2831 | 0 | nDstPixelStride, nWordCount); |
2832 | 0 | break; |
2833 | 0 | case GDT_Float16: |
2834 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2835 | 0 | static_cast<GFloat16 *>(pDstData), nDstPixelStride, |
2836 | 0 | nWordCount); |
2837 | 0 | break; |
2838 | 0 | case GDT_Float32: |
2839 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2840 | 0 | static_cast<float *>(pDstData), nDstPixelStride, |
2841 | 0 | nWordCount); |
2842 | 0 | break; |
2843 | 0 | case GDT_Float64: |
2844 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2845 | 0 | static_cast<double *>(pDstData), nDstPixelStride, |
2846 | 0 | nWordCount); |
2847 | 0 | break; |
2848 | 0 | case GDT_CInt16: |
2849 | 0 | if (bInComplex) |
2850 | 0 | { |
2851 | 0 | GDALCopyWordsComplexT(pSrcData, nSrcPixelStride, |
2852 | 0 | static_cast<short *>(pDstData), |
2853 | 0 | nDstPixelStride, nWordCount); |
2854 | 0 | } |
2855 | 0 | else // input is not complex, so we need to promote to a complex |
2856 | | // buffer |
2857 | 0 | { |
2858 | 0 | GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride, |
2859 | 0 | static_cast<short *>(pDstData), |
2860 | 0 | nDstPixelStride, nWordCount); |
2861 | 0 | } |
2862 | 0 | break; |
2863 | 0 | case GDT_CInt32: |
2864 | 0 | if (bInComplex) |
2865 | 0 | { |
2866 | 0 | GDALCopyWordsComplexT(pSrcData, nSrcPixelStride, |
2867 | 0 | static_cast<int *>(pDstData), |
2868 | 0 | nDstPixelStride, nWordCount); |
2869 | 0 | } |
2870 | 0 | else // input is not complex, so we need to promote to a complex |
2871 | | // buffer |
2872 | 0 | { |
2873 | 0 | GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride, |
2874 | 0 | static_cast<int *>(pDstData), |
2875 | 0 | nDstPixelStride, nWordCount); |
2876 | 0 | } |
2877 | 0 | break; |
2878 | 0 | case GDT_CFloat16: |
2879 | 0 | if (bInComplex) |
2880 | 0 | { |
2881 | 0 | GDALCopyWordsComplexT(pSrcData, nSrcPixelStride, |
2882 | 0 | static_cast<GFloat16 *>(pDstData), |
2883 | 0 | nDstPixelStride, nWordCount); |
2884 | 0 | } |
2885 | 0 | else // input is not complex, so we need to promote to a complex |
2886 | | // buffer |
2887 | 0 | { |
2888 | 0 | GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride, |
2889 | 0 | static_cast<GFloat16 *>(pDstData), |
2890 | 0 | nDstPixelStride, nWordCount); |
2891 | 0 | } |
2892 | 0 | break; |
2893 | 0 | case GDT_CFloat32: |
2894 | 0 | if (bInComplex) |
2895 | 0 | { |
2896 | 0 | GDALCopyWordsComplexT(pSrcData, nSrcPixelStride, |
2897 | 0 | static_cast<float *>(pDstData), |
2898 | 0 | nDstPixelStride, nWordCount); |
2899 | 0 | } |
2900 | 0 | else // input is not complex, so we need to promote to a complex |
2901 | | // buffer |
2902 | 0 | { |
2903 | 0 | GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride, |
2904 | 0 | static_cast<float *>(pDstData), |
2905 | 0 | nDstPixelStride, nWordCount); |
2906 | 0 | } |
2907 | 0 | break; |
2908 | 0 | case GDT_CFloat64: |
2909 | 0 | if (bInComplex) |
2910 | 0 | { |
2911 | 0 | GDALCopyWordsComplexT(pSrcData, nSrcPixelStride, |
2912 | 0 | static_cast<double *>(pDstData), |
2913 | 0 | nDstPixelStride, nWordCount); |
2914 | 0 | } |
2915 | 0 | else // input is not complex, so we need to promote to a complex |
2916 | | // buffer |
2917 | 0 | { |
2918 | 0 | GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride, |
2919 | 0 | static_cast<double *>(pDstData), |
2920 | 0 | nDstPixelStride, nWordCount); |
2921 | 0 | } |
2922 | 0 | break; |
2923 | 0 | case GDT_Unknown: |
2924 | 0 | case GDT_TypeCount: |
2925 | 0 | CPLAssert(false); |
2926 | 0 | } |
2927 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned char>(unsigned char const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<signed char>(signed char const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned short>(unsigned short const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<short>(short const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned int>(unsigned int const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<int>(int const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned long>(unsigned long const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<long>(long const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<cpl::Float16>(cpl::Float16 const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<float>(float const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<double>(double const*, int, bool, void*, GDALDataType, int, long long) |
2928 | | |
2929 | | } // end anonymous namespace |
2930 | | |
2931 | | /************************************************************************/ |
2932 | | /* GDALReplicateWord() */ |
2933 | | /************************************************************************/ |
2934 | | |
2935 | | template <class T> |
2936 | | inline void GDALReplicateWordT(void *pDstData, int nDstPixelStride, |
2937 | | GPtrDiff_t nWordCount) |
2938 | 0 | { |
2939 | 0 | const T valSet = *static_cast<const T *>(pDstData); |
2940 | 0 | if (nDstPixelStride == static_cast<int>(sizeof(T))) |
2941 | 0 | { |
2942 | 0 | T *pDstPtr = static_cast<T *>(pDstData) + 1; |
2943 | 0 | while (nWordCount >= 4) |
2944 | 0 | { |
2945 | 0 | nWordCount -= 4; |
2946 | 0 | pDstPtr[0] = valSet; |
2947 | 0 | pDstPtr[1] = valSet; |
2948 | 0 | pDstPtr[2] = valSet; |
2949 | 0 | pDstPtr[3] = valSet; |
2950 | 0 | pDstPtr += 4; |
2951 | 0 | } |
2952 | 0 | while (nWordCount > 0) |
2953 | 0 | { |
2954 | 0 | --nWordCount; |
2955 | 0 | *pDstPtr = valSet; |
2956 | 0 | pDstPtr++; |
2957 | 0 | } |
2958 | 0 | } |
2959 | 0 | else |
2960 | 0 | { |
2961 | 0 | GByte *pabyDstPtr = static_cast<GByte *>(pDstData) + nDstPixelStride; |
2962 | 0 | while (nWordCount > 0) |
2963 | 0 | { |
2964 | 0 | --nWordCount; |
2965 | 0 | *reinterpret_cast<T *>(pabyDstPtr) = valSet; |
2966 | 0 | pabyDstPtr += nDstPixelStride; |
2967 | 0 | } |
2968 | 0 | } |
2969 | 0 | } Unexecuted instantiation: void GDALReplicateWordT<unsigned short>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<short>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<unsigned int>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<int>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<unsigned long>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<long>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<cpl::Float16>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<float>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<double>(void*, int, long long) |
2970 | | |
2971 | | static void GDALReplicateWord(const void *CPL_RESTRICT pSrcData, |
2972 | | GDALDataType eSrcType, |
2973 | | void *CPL_RESTRICT pDstData, |
2974 | | GDALDataType eDstType, int nDstPixelStride, |
2975 | | GPtrDiff_t nWordCount) |
2976 | 0 | { |
2977 | | /* ----------------------------------------------------------------------- |
2978 | | */ |
2979 | | /* Special case when the source data is always the same value */ |
2980 | | /* (for VRTSourcedRasterBand::IRasterIO and |
2981 | | * VRTDerivedRasterBand::IRasterIO*/ |
2982 | | /* for example) */ |
2983 | | /* ----------------------------------------------------------------------- |
2984 | | */ |
2985 | | // Let the general translation case do the necessary conversions |
2986 | | // on the first destination element. |
2987 | 0 | GDALCopyWords64(pSrcData, eSrcType, 0, pDstData, eDstType, 0, 1); |
2988 | | |
2989 | | // Now copy the first element to the nWordCount - 1 following destination |
2990 | | // elements. |
2991 | 0 | nWordCount--; |
2992 | 0 | GByte *pabyDstWord = reinterpret_cast<GByte *>(pDstData) + nDstPixelStride; |
2993 | |
|
2994 | 0 | switch (eDstType) |
2995 | 0 | { |
2996 | 0 | case GDT_Byte: |
2997 | 0 | case GDT_Int8: |
2998 | 0 | { |
2999 | 0 | if (nDstPixelStride == 1) |
3000 | 0 | { |
3001 | 0 | if (nWordCount > 0) |
3002 | 0 | memset(pabyDstWord, |
3003 | 0 | *reinterpret_cast<const GByte *>(pDstData), |
3004 | 0 | nWordCount); |
3005 | 0 | } |
3006 | 0 | else |
3007 | 0 | { |
3008 | 0 | GByte valSet = *reinterpret_cast<const GByte *>(pDstData); |
3009 | 0 | while (nWordCount > 0) |
3010 | 0 | { |
3011 | 0 | --nWordCount; |
3012 | 0 | *pabyDstWord = valSet; |
3013 | 0 | pabyDstWord += nDstPixelStride; |
3014 | 0 | } |
3015 | 0 | } |
3016 | 0 | break; |
3017 | 0 | } |
3018 | | |
3019 | 0 | #define CASE_DUPLICATE_SIMPLE(enum_type, c_type) \ |
3020 | 0 | case enum_type: \ |
3021 | 0 | { \ |
3022 | 0 | GDALReplicateWordT<c_type>(pDstData, nDstPixelStride, nWordCount); \ |
3023 | 0 | break; \ |
3024 | 0 | } |
3025 | | |
3026 | 0 | CASE_DUPLICATE_SIMPLE(GDT_UInt16, GUInt16) |
3027 | 0 | CASE_DUPLICATE_SIMPLE(GDT_Int16, GInt16) |
3028 | 0 | CASE_DUPLICATE_SIMPLE(GDT_UInt32, GUInt32) |
3029 | 0 | CASE_DUPLICATE_SIMPLE(GDT_Int32, GInt32) |
3030 | 0 | CASE_DUPLICATE_SIMPLE(GDT_UInt64, std::uint64_t) |
3031 | 0 | CASE_DUPLICATE_SIMPLE(GDT_Int64, std::int64_t) |
3032 | 0 | CASE_DUPLICATE_SIMPLE(GDT_Float16, GFloat16) |
3033 | 0 | CASE_DUPLICATE_SIMPLE(GDT_Float32, float) |
3034 | 0 | CASE_DUPLICATE_SIMPLE(GDT_Float64, double) |
3035 | | |
3036 | 0 | #define CASE_DUPLICATE_COMPLEX(enum_type, c_type) \ |
3037 | 0 | case enum_type: \ |
3038 | 0 | { \ |
3039 | 0 | c_type valSet1 = reinterpret_cast<const c_type *>(pDstData)[0]; \ |
3040 | 0 | c_type valSet2 = reinterpret_cast<const c_type *>(pDstData)[1]; \ |
3041 | 0 | while (nWordCount > 0) \ |
3042 | 0 | { \ |
3043 | 0 | --nWordCount; \ |
3044 | 0 | reinterpret_cast<c_type *>(pabyDstWord)[0] = valSet1; \ |
3045 | 0 | reinterpret_cast<c_type *>(pabyDstWord)[1] = valSet2; \ |
3046 | 0 | pabyDstWord += nDstPixelStride; \ |
3047 | 0 | } \ |
3048 | 0 | break; \ |
3049 | 0 | } |
3050 | | |
3051 | 0 | CASE_DUPLICATE_COMPLEX(GDT_CInt16, GInt16) |
3052 | 0 | CASE_DUPLICATE_COMPLEX(GDT_CInt32, GInt32) |
3053 | 0 | CASE_DUPLICATE_COMPLEX(GDT_CFloat16, GFloat16) |
3054 | 0 | CASE_DUPLICATE_COMPLEX(GDT_CFloat32, float) |
3055 | 0 | CASE_DUPLICATE_COMPLEX(GDT_CFloat64, double) |
3056 | | |
3057 | 0 | case GDT_Unknown: |
3058 | 0 | case GDT_TypeCount: |
3059 | 0 | CPLAssert(false); |
3060 | 0 | } |
3061 | 0 | } |
3062 | | |
3063 | | /************************************************************************/ |
3064 | | /* GDALUnrolledCopy() */ |
3065 | | /************************************************************************/ |
3066 | | |
3067 | | template <class T, int srcStride, int dstStride> |
3068 | | static inline void GDALUnrolledCopyGeneric(T *CPL_RESTRICT pDest, |
3069 | | const T *CPL_RESTRICT pSrc, |
3070 | | GPtrDiff_t nIters) |
3071 | 0 | { |
3072 | 0 | if (nIters >= 16) |
3073 | 0 | { |
3074 | 0 | for (GPtrDiff_t i = nIters / 16; i != 0; i--) |
3075 | 0 | { |
3076 | 0 | pDest[0 * dstStride] = pSrc[0 * srcStride]; |
3077 | 0 | pDest[1 * dstStride] = pSrc[1 * srcStride]; |
3078 | 0 | pDest[2 * dstStride] = pSrc[2 * srcStride]; |
3079 | 0 | pDest[3 * dstStride] = pSrc[3 * srcStride]; |
3080 | 0 | pDest[4 * dstStride] = pSrc[4 * srcStride]; |
3081 | 0 | pDest[5 * dstStride] = pSrc[5 * srcStride]; |
3082 | 0 | pDest[6 * dstStride] = pSrc[6 * srcStride]; |
3083 | 0 | pDest[7 * dstStride] = pSrc[7 * srcStride]; |
3084 | 0 | pDest[8 * dstStride] = pSrc[8 * srcStride]; |
3085 | 0 | pDest[9 * dstStride] = pSrc[9 * srcStride]; |
3086 | 0 | pDest[10 * dstStride] = pSrc[10 * srcStride]; |
3087 | 0 | pDest[11 * dstStride] = pSrc[11 * srcStride]; |
3088 | 0 | pDest[12 * dstStride] = pSrc[12 * srcStride]; |
3089 | 0 | pDest[13 * dstStride] = pSrc[13 * srcStride]; |
3090 | 0 | pDest[14 * dstStride] = pSrc[14 * srcStride]; |
3091 | 0 | pDest[15 * dstStride] = pSrc[15 * srcStride]; |
3092 | 0 | pDest += 16 * dstStride; |
3093 | 0 | pSrc += 16 * srcStride; |
3094 | 0 | } |
3095 | 0 | nIters = nIters % 16; |
3096 | 0 | } |
3097 | 0 | for (GPtrDiff_t i = 0; i < nIters; i++) |
3098 | 0 | { |
3099 | 0 | pDest[i * dstStride] = *pSrc; |
3100 | 0 | pSrc += srcStride; |
3101 | 0 | } |
3102 | 0 | } Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 3, 1>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 1, 2>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 1, 3>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 1, 4>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 2, 1>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 3, 1>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 4, 1>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 1, 2>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 1, 3>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 1, 4>(short*, short const*, long long) |
3103 | | |
3104 | | template <class T, int srcStride, int dstStride> |
3105 | | static inline void GDALUnrolledCopy(T *CPL_RESTRICT pDest, |
3106 | | const T *CPL_RESTRICT pSrc, |
3107 | | GPtrDiff_t nIters) |
3108 | 0 | { |
3109 | 0 | GDALUnrolledCopyGeneric<T, srcStride, dstStride>(pDest, pSrc, nIters); |
3110 | 0 | } Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<unsigned char, 1, 2>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<unsigned char, 1, 3>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<unsigned char, 1, 4>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 2, 1>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 3, 1>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 4, 1>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 1, 2>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 1, 3>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 1, 4>(short*, short const*, long long) |
3111 | | |
3112 | | #ifdef HAVE_SSE2 |
3113 | | |
3114 | | template <> |
3115 | | void GDALUnrolledCopy<GByte, 2, 1>(GByte *CPL_RESTRICT pDest, |
3116 | | const GByte *CPL_RESTRICT pSrc, |
3117 | | GPtrDiff_t nIters) |
3118 | 0 | { |
3119 | 0 | decltype(nIters) i = 0; |
3120 | 0 | if (nIters > 16) |
3121 | 0 | { |
3122 | 0 | const __m128i xmm_mask = _mm_set1_epi16(0xff); |
3123 | | // If we were sure that there would always be 1 trailing byte, we could |
3124 | | // check against nIters - 15 |
3125 | 0 | for (; i < nIters - 16; i += 16) |
3126 | 0 | { |
3127 | 0 | __m128i xmm0 = |
3128 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0)); |
3129 | 0 | __m128i xmm1 = |
3130 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16)); |
3131 | | // Set higher 8bit of each int16 packed word to 0 |
3132 | 0 | xmm0 = _mm_and_si128(xmm0, xmm_mask); |
3133 | 0 | xmm1 = _mm_and_si128(xmm1, xmm_mask); |
3134 | | // Pack int16 to uint8 and merge back both vector |
3135 | 0 | xmm0 = _mm_packus_epi16(xmm0, xmm1); |
3136 | | |
3137 | | // Store result |
3138 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0); |
3139 | |
|
3140 | 0 | pSrc += 2 * 16; |
3141 | 0 | } |
3142 | 0 | } |
3143 | 0 | for (; i < nIters; i++) |
3144 | 0 | { |
3145 | 0 | pDest[i] = *pSrc; |
3146 | 0 | pSrc += 2; |
3147 | 0 | } |
3148 | 0 | } |
3149 | | |
3150 | | #ifdef HAVE_SSSE3_AT_COMPILE_TIME |
3151 | | |
3152 | | template <> |
3153 | | void GDALUnrolledCopy<GByte, 3, 1>(GByte *CPL_RESTRICT pDest, |
3154 | | const GByte *CPL_RESTRICT pSrc, |
3155 | | GPtrDiff_t nIters) |
3156 | 0 | { |
3157 | 0 | if (nIters > 16 && CPLHaveRuntimeSSSE3()) |
3158 | 0 | { |
3159 | 0 | GDALUnrolledCopy_GByte_3_1_SSSE3(pDest, pSrc, nIters); |
3160 | 0 | } |
3161 | 0 | else |
3162 | 0 | { |
3163 | 0 | GDALUnrolledCopyGeneric<GByte, 3, 1>(pDest, pSrc, nIters); |
3164 | 0 | } |
3165 | 0 | } |
3166 | | |
3167 | | #endif |
3168 | | |
3169 | | template <> |
3170 | | void GDALUnrolledCopy<GByte, 4, 1>(GByte *CPL_RESTRICT pDest, |
3171 | | const GByte *CPL_RESTRICT pSrc, |
3172 | | GPtrDiff_t nIters) |
3173 | 0 | { |
3174 | 0 | decltype(nIters) i = 0; |
3175 | 0 | if (nIters > 16) |
3176 | 0 | { |
3177 | 0 | const __m128i xmm_mask = _mm_set1_epi32(0xff); |
3178 | | // If we were sure that there would always be 3 trailing bytes, we could |
3179 | | // check against nIters - 15 |
3180 | 0 | for (; i < nIters - 16; i += 16) |
3181 | 0 | { |
3182 | 0 | __m128i xmm0 = |
3183 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0)); |
3184 | 0 | __m128i xmm1 = |
3185 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16)); |
3186 | 0 | __m128i xmm2 = |
3187 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 32)); |
3188 | 0 | __m128i xmm3 = |
3189 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 48)); |
3190 | | // Set higher 24bit of each int32 packed word to 0 |
3191 | 0 | xmm0 = _mm_and_si128(xmm0, xmm_mask); |
3192 | 0 | xmm1 = _mm_and_si128(xmm1, xmm_mask); |
3193 | 0 | xmm2 = _mm_and_si128(xmm2, xmm_mask); |
3194 | 0 | xmm3 = _mm_and_si128(xmm3, xmm_mask); |
3195 | | // Pack int32 to int16 |
3196 | 0 | xmm0 = _mm_packs_epi32(xmm0, xmm1); |
3197 | 0 | xmm2 = _mm_packs_epi32(xmm2, xmm3); |
3198 | | // Pack int16 to uint8 |
3199 | 0 | xmm0 = _mm_packus_epi16(xmm0, xmm2); |
3200 | | |
3201 | | // Store result |
3202 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0); |
3203 | |
|
3204 | 0 | pSrc += 4 * 16; |
3205 | 0 | } |
3206 | 0 | } |
3207 | 0 | for (; i < nIters; i++) |
3208 | 0 | { |
3209 | 0 | pDest[i] = *pSrc; |
3210 | 0 | pSrc += 4; |
3211 | 0 | } |
3212 | 0 | } |
3213 | | #endif // HAVE_SSE2 |
3214 | | |
3215 | | /************************************************************************/ |
3216 | | /* GDALFastCopy() */ |
3217 | | /************************************************************************/ |
3218 | | |
3219 | | template <class T> |
3220 | | static inline void GDALFastCopy(T *CPL_RESTRICT pDest, int nDestStride, |
3221 | | const T *CPL_RESTRICT pSrc, int nSrcStride, |
3222 | | GPtrDiff_t nIters) |
3223 | 0 | { |
3224 | 0 | constexpr int sizeofT = static_cast<int>(sizeof(T)); |
3225 | 0 | if (nIters == 1) |
3226 | 0 | { |
3227 | 0 | *pDest = *pSrc; |
3228 | 0 | } |
3229 | 0 | else if (nDestStride == sizeofT) |
3230 | 0 | { |
3231 | 0 | if (nSrcStride == sizeofT) |
3232 | 0 | { |
3233 | 0 | memcpy(pDest, pSrc, nIters * sizeof(T)); |
3234 | 0 | } |
3235 | 0 | else if (nSrcStride == 2 * sizeofT) |
3236 | 0 | { |
3237 | 0 | GDALUnrolledCopy<T, 2, 1>(pDest, pSrc, nIters); |
3238 | 0 | } |
3239 | 0 | else if (nSrcStride == 3 * sizeofT) |
3240 | 0 | { |
3241 | 0 | GDALUnrolledCopy<T, 3, 1>(pDest, pSrc, nIters); |
3242 | 0 | } |
3243 | 0 | else if (nSrcStride == 4 * sizeofT) |
3244 | 0 | { |
3245 | 0 | GDALUnrolledCopy<T, 4, 1>(pDest, pSrc, nIters); |
3246 | 0 | } |
3247 | 0 | else |
3248 | 0 | { |
3249 | 0 | while (nIters-- > 0) |
3250 | 0 | { |
3251 | 0 | *pDest = *pSrc; |
3252 | 0 | pSrc += nSrcStride / sizeofT; |
3253 | 0 | pDest++; |
3254 | 0 | } |
3255 | 0 | } |
3256 | 0 | } |
3257 | 0 | else if (nSrcStride == sizeofT) |
3258 | 0 | { |
3259 | 0 | if (nDestStride == 2 * sizeofT) |
3260 | 0 | { |
3261 | 0 | GDALUnrolledCopy<T, 1, 2>(pDest, pSrc, nIters); |
3262 | 0 | } |
3263 | 0 | else if (nDestStride == 3 * sizeofT) |
3264 | 0 | { |
3265 | 0 | GDALUnrolledCopy<T, 1, 3>(pDest, pSrc, nIters); |
3266 | 0 | } |
3267 | 0 | else if (nDestStride == 4 * sizeofT) |
3268 | 0 | { |
3269 | 0 | GDALUnrolledCopy<T, 1, 4>(pDest, pSrc, nIters); |
3270 | 0 | } |
3271 | 0 | else |
3272 | 0 | { |
3273 | 0 | while (nIters-- > 0) |
3274 | 0 | { |
3275 | 0 | *pDest = *pSrc; |
3276 | 0 | pSrc++; |
3277 | 0 | pDest += nDestStride / sizeofT; |
3278 | 0 | } |
3279 | 0 | } |
3280 | 0 | } |
3281 | 0 | else |
3282 | 0 | { |
3283 | 0 | while (nIters-- > 0) |
3284 | 0 | { |
3285 | 0 | *pDest = *pSrc; |
3286 | 0 | pSrc += nSrcStride / sizeofT; |
3287 | 0 | pDest += nDestStride / sizeofT; |
3288 | 0 | } |
3289 | 0 | } |
3290 | 0 | } Unexecuted instantiation: rasterio.cpp:void GDALFastCopy<unsigned char>(unsigned char*, int, unsigned char const*, int, long long) Unexecuted instantiation: rasterio.cpp:void GDALFastCopy<short>(short*, int, short const*, int, long long) |
3291 | | |
3292 | | /************************************************************************/ |
3293 | | /* GDALFastCopyByte() */ |
3294 | | /************************************************************************/ |
3295 | | |
3296 | | static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData, |
3297 | | int nSrcPixelStride, GByte *CPL_RESTRICT pDstData, |
3298 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
3299 | 0 | { |
3300 | 0 | GDALFastCopy(pDstData, nDstPixelStride, pSrcData, nSrcPixelStride, |
3301 | 0 | nWordCount); |
3302 | 0 | } |
3303 | | |
3304 | | /************************************************************************/ |
3305 | | /* GDALCopyWords() */ |
3306 | | /************************************************************************/ |
3307 | | |
3308 | | /** |
3309 | | * Copy pixel words from buffer to buffer. |
3310 | | * |
3311 | | * @see GDALCopyWords64() |
3312 | | */ |
3313 | | void CPL_STDCALL GDALCopyWords(const void *CPL_RESTRICT pSrcData, |
3314 | | GDALDataType eSrcType, int nSrcPixelStride, |
3315 | | void *CPL_RESTRICT pDstData, |
3316 | | GDALDataType eDstType, int nDstPixelStride, |
3317 | | int nWordCount) |
3318 | 0 | { |
3319 | 0 | GDALCopyWords64(pSrcData, eSrcType, nSrcPixelStride, pDstData, eDstType, |
3320 | 0 | nDstPixelStride, nWordCount); |
3321 | 0 | } |
3322 | | |
3323 | | /************************************************************************/ |
3324 | | /* GDALCopyWords64() */ |
3325 | | /************************************************************************/ |
3326 | | |
3327 | | /** |
3328 | | * Copy pixel words from buffer to buffer. |
3329 | | * |
3330 | | * This function is used to copy pixel word values from one memory buffer |
3331 | | * to another, with support for conversion between data types, and differing |
3332 | | * step factors. The data type conversion is done using the following |
3333 | | * rules: |
3334 | | * <ul> |
3335 | | * <li>Values assigned to a lower range integer type are clipped. For |
3336 | | * instance assigning GDT_Int16 values to a GDT_Byte buffer will cause values |
3337 | | * less the 0 to be set to 0, and values larger than 255 to be set to 255. |
3338 | | * </li> |
3339 | | * <li> |
3340 | | * Assignment from floating point to integer rounds to closest integer. |
3341 | | * +Infinity is mapped to the largest integer. -Infinity is mapped to the |
3342 | | * smallest integer. NaN is mapped to 0. |
3343 | | * </li> |
3344 | | * <li> |
3345 | | * Assignment from non-complex to complex will result in the imaginary part |
3346 | | * being set to zero on output. |
3347 | | * </li> |
3348 | | * <li> Assignment from complex to |
3349 | | * non-complex will result in the complex portion being lost and the real |
3350 | | * component being preserved (<i>not magnitude!</i>). |
3351 | | * </li> |
3352 | | * </ul> |
3353 | | * |
3354 | | * No assumptions are made about the source or destination words occurring |
3355 | | * on word boundaries. It is assumed that all values are in native machine |
3356 | | * byte order. |
3357 | | * |
3358 | | * @param pSrcData Pointer to source data to be converted. |
3359 | | * @param eSrcType the source data type (see GDALDataType enum) |
3360 | | * @param nSrcPixelStride Source pixel stride (i.e. distance between 2 words), |
3361 | | * in bytes |
3362 | | * @param pDstData Pointer to buffer where destination data should go |
3363 | | * @param eDstType the destination data type (see GDALDataType enum) |
3364 | | * @param nDstPixelStride Destination pixel stride (i.e. distance between 2 |
3365 | | * words), in bytes |
3366 | | * @param nWordCount number of words to be copied |
3367 | | * |
3368 | | * @note |
3369 | | * When adding a new data type to GDAL, you must do the following to |
3370 | | * support it properly within the GDALCopyWords function: |
3371 | | * 1. Add the data type to the switch on eSrcType in GDALCopyWords. |
3372 | | * This should invoke the appropriate GDALCopyWordsFromT wrapper. |
3373 | | * 2. Add the data type to the switch on eDstType in GDALCopyWordsFromT. |
3374 | | * This should call the appropriate GDALCopyWordsT template. |
3375 | | * 3. If appropriate, overload the appropriate CopyWord template in the |
3376 | | * above namespace. This will ensure that any conversion issues are |
3377 | | * handled (cases like the float -> int32 case, where the min/max) |
3378 | | * values are subject to roundoff error. |
3379 | | */ |
3380 | | |
3381 | | void CPL_STDCALL GDALCopyWords64(const void *CPL_RESTRICT pSrcData, |
3382 | | GDALDataType eSrcType, int nSrcPixelStride, |
3383 | | void *CPL_RESTRICT pDstData, |
3384 | | GDALDataType eDstType, int nDstPixelStride, |
3385 | | GPtrDiff_t nWordCount) |
3386 | | |
3387 | 0 | { |
3388 | | // On platforms where alignment matters, be careful |
3389 | 0 | const int nSrcDataTypeSize = GDALGetDataTypeSizeBytes(eSrcType); |
3390 | 0 | const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(eDstType); |
3391 | 0 | if (CPL_UNLIKELY(nSrcDataTypeSize == 0 || nDstDataTypeSize == 0)) |
3392 | 0 | { |
3393 | 0 | CPLError(CE_Failure, CPLE_NotSupported, |
3394 | 0 | "GDALCopyWords64(): unsupported GDT_Unknown/GDT_TypeCount " |
3395 | 0 | "argument"); |
3396 | 0 | return; |
3397 | 0 | } |
3398 | 0 | if (!(eSrcType == eDstType && nSrcPixelStride == nDstPixelStride) && |
3399 | 0 | ((reinterpret_cast<uintptr_t>(pSrcData) % nSrcDataTypeSize) != 0 || |
3400 | 0 | (reinterpret_cast<uintptr_t>(pDstData) % nDstDataTypeSize) != 0 || |
3401 | 0 | (nSrcPixelStride % nSrcDataTypeSize) != 0 || |
3402 | 0 | (nDstPixelStride % nDstDataTypeSize) != 0)) |
3403 | 0 | { |
3404 | 0 | if (eSrcType == eDstType) |
3405 | 0 | { |
3406 | 0 | for (decltype(nWordCount) i = 0; i < nWordCount; i++) |
3407 | 0 | { |
3408 | 0 | memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i, |
3409 | 0 | static_cast<const GByte *>(pSrcData) + |
3410 | 0 | nSrcPixelStride * i, |
3411 | 0 | nDstDataTypeSize); |
3412 | 0 | } |
3413 | 0 | } |
3414 | 0 | else |
3415 | 0 | { |
3416 | 0 | const auto getAlignedPtr = [](GByte *ptr, int align) |
3417 | 0 | { |
3418 | 0 | return ptr + |
3419 | 0 | ((align - (reinterpret_cast<uintptr_t>(ptr) % align)) % |
3420 | 0 | align); |
3421 | 0 | }; |
3422 | | |
3423 | | // The largest we need is for CFloat64 (16 bytes), so 32 bytes to |
3424 | | // be sure to get correctly aligned pointer. |
3425 | 0 | constexpr size_t SIZEOF_CFLOAT64 = 2 * sizeof(double); |
3426 | 0 | GByte abySrcBuffer[2 * SIZEOF_CFLOAT64]; |
3427 | 0 | GByte abyDstBuffer[2 * SIZEOF_CFLOAT64]; |
3428 | 0 | GByte *pabySrcBuffer = |
3429 | 0 | getAlignedPtr(abySrcBuffer, nSrcDataTypeSize); |
3430 | 0 | GByte *pabyDstBuffer = |
3431 | 0 | getAlignedPtr(abyDstBuffer, nDstDataTypeSize); |
3432 | 0 | for (decltype(nWordCount) i = 0; i < nWordCount; i++) |
3433 | 0 | { |
3434 | 0 | memcpy(pabySrcBuffer, |
3435 | 0 | static_cast<const GByte *>(pSrcData) + |
3436 | 0 | nSrcPixelStride * i, |
3437 | 0 | nSrcDataTypeSize); |
3438 | 0 | GDALCopyWords64(pabySrcBuffer, eSrcType, 0, pabyDstBuffer, |
3439 | 0 | eDstType, 0, 1); |
3440 | 0 | memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i, |
3441 | 0 | pabyDstBuffer, nDstDataTypeSize); |
3442 | 0 | } |
3443 | 0 | } |
3444 | 0 | return; |
3445 | 0 | } |
3446 | | |
3447 | | // Deal with the case where we're replicating a single word into the |
3448 | | // provided buffer |
3449 | 0 | if (nSrcPixelStride == 0 && nWordCount > 1) |
3450 | 0 | { |
3451 | 0 | GDALReplicateWord(pSrcData, eSrcType, pDstData, eDstType, |
3452 | 0 | nDstPixelStride, nWordCount); |
3453 | 0 | return; |
3454 | 0 | } |
3455 | | |
3456 | 0 | if (eSrcType == eDstType) |
3457 | 0 | { |
3458 | 0 | if (eSrcType == GDT_Byte || eSrcType == GDT_Int8) |
3459 | 0 | { |
3460 | 0 | GDALFastCopy(static_cast<GByte *>(pDstData), nDstPixelStride, |
3461 | 0 | static_cast<const GByte *>(pSrcData), nSrcPixelStride, |
3462 | 0 | nWordCount); |
3463 | 0 | return; |
3464 | 0 | } |
3465 | | |
3466 | 0 | if (nSrcDataTypeSize == 2 && (nSrcPixelStride % 2) == 0 && |
3467 | 0 | (nDstPixelStride % 2) == 0) |
3468 | 0 | { |
3469 | 0 | GDALFastCopy(static_cast<short *>(pDstData), nDstPixelStride, |
3470 | 0 | static_cast<const short *>(pSrcData), nSrcPixelStride, |
3471 | 0 | nWordCount); |
3472 | 0 | return; |
3473 | 0 | } |
3474 | | |
3475 | 0 | if (nWordCount == 1) |
3476 | 0 | { |
3477 | | #if defined(CSA_BUILD) || defined(__COVERITY__) |
3478 | | // Avoid false positives... |
3479 | | memcpy(pDstData, pSrcData, nSrcDataTypeSize); |
3480 | | #else |
3481 | 0 | if (nSrcDataTypeSize == 2) |
3482 | 0 | memcpy(pDstData, pSrcData, 2); |
3483 | 0 | else if (nSrcDataTypeSize == 4) |
3484 | 0 | memcpy(pDstData, pSrcData, 4); |
3485 | 0 | else if (nSrcDataTypeSize == 8) |
3486 | 0 | memcpy(pDstData, pSrcData, 8); |
3487 | 0 | else /* if( eSrcType == GDT_CFloat64 ) */ |
3488 | 0 | memcpy(pDstData, pSrcData, 16); |
3489 | 0 | #endif |
3490 | 0 | return; |
3491 | 0 | } |
3492 | | |
3493 | | // Let memcpy() handle the case where we're copying a packed buffer |
3494 | | // of pixels. |
3495 | 0 | if (nSrcPixelStride == nDstPixelStride) |
3496 | 0 | { |
3497 | 0 | if (nSrcPixelStride == nSrcDataTypeSize) |
3498 | 0 | { |
3499 | 0 | memcpy(pDstData, pSrcData, nWordCount * nSrcDataTypeSize); |
3500 | 0 | return; |
3501 | 0 | } |
3502 | 0 | } |
3503 | 0 | } |
3504 | | |
3505 | | // Handle the more general case -- deals with conversion of data types |
3506 | | // directly. |
3507 | 0 | switch (eSrcType) |
3508 | 0 | { |
3509 | 0 | case GDT_Byte: |
3510 | 0 | GDALCopyWordsFromT<unsigned char>( |
3511 | 0 | static_cast<const unsigned char *>(pSrcData), nSrcPixelStride, |
3512 | 0 | false, pDstData, eDstType, nDstPixelStride, nWordCount); |
3513 | 0 | break; |
3514 | 0 | case GDT_Int8: |
3515 | 0 | GDALCopyWordsFromT<signed char>( |
3516 | 0 | static_cast<const signed char *>(pSrcData), nSrcPixelStride, |
3517 | 0 | false, pDstData, eDstType, nDstPixelStride, nWordCount); |
3518 | 0 | break; |
3519 | 0 | case GDT_UInt16: |
3520 | 0 | GDALCopyWordsFromT<unsigned short>( |
3521 | 0 | static_cast<const unsigned short *>(pSrcData), nSrcPixelStride, |
3522 | 0 | false, pDstData, eDstType, nDstPixelStride, nWordCount); |
3523 | 0 | break; |
3524 | 0 | case GDT_Int16: |
3525 | 0 | GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData), |
3526 | 0 | nSrcPixelStride, false, pDstData, |
3527 | 0 | eDstType, nDstPixelStride, nWordCount); |
3528 | 0 | break; |
3529 | 0 | case GDT_UInt32: |
3530 | 0 | GDALCopyWordsFromT<unsigned int>( |
3531 | 0 | static_cast<const unsigned int *>(pSrcData), nSrcPixelStride, |
3532 | 0 | false, pDstData, eDstType, nDstPixelStride, nWordCount); |
3533 | 0 | break; |
3534 | 0 | case GDT_Int32: |
3535 | 0 | GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData), |
3536 | 0 | nSrcPixelStride, false, pDstData, eDstType, |
3537 | 0 | nDstPixelStride, nWordCount); |
3538 | 0 | break; |
3539 | 0 | case GDT_UInt64: |
3540 | 0 | GDALCopyWordsFromT<std::uint64_t>( |
3541 | 0 | static_cast<const std::uint64_t *>(pSrcData), nSrcPixelStride, |
3542 | 0 | false, pDstData, eDstType, nDstPixelStride, nWordCount); |
3543 | 0 | break; |
3544 | 0 | case GDT_Int64: |
3545 | 0 | GDALCopyWordsFromT<std::int64_t>( |
3546 | 0 | static_cast<const std::int64_t *>(pSrcData), nSrcPixelStride, |
3547 | 0 | false, pDstData, eDstType, nDstPixelStride, nWordCount); |
3548 | 0 | break; |
3549 | 0 | case GDT_Float16: |
3550 | 0 | GDALCopyWordsFromT<GFloat16>( |
3551 | 0 | static_cast<const GFloat16 *>(pSrcData), nSrcPixelStride, false, |
3552 | 0 | pDstData, eDstType, nDstPixelStride, nWordCount); |
3553 | 0 | break; |
3554 | 0 | case GDT_Float32: |
3555 | 0 | GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData), |
3556 | 0 | nSrcPixelStride, false, pDstData, |
3557 | 0 | eDstType, nDstPixelStride, nWordCount); |
3558 | 0 | break; |
3559 | 0 | case GDT_Float64: |
3560 | 0 | GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData), |
3561 | 0 | nSrcPixelStride, false, pDstData, |
3562 | 0 | eDstType, nDstPixelStride, nWordCount); |
3563 | 0 | break; |
3564 | 0 | case GDT_CInt16: |
3565 | 0 | GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData), |
3566 | 0 | nSrcPixelStride, true, pDstData, eDstType, |
3567 | 0 | nDstPixelStride, nWordCount); |
3568 | 0 | break; |
3569 | 0 | case GDT_CInt32: |
3570 | 0 | GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData), |
3571 | 0 | nSrcPixelStride, true, pDstData, eDstType, |
3572 | 0 | nDstPixelStride, nWordCount); |
3573 | 0 | break; |
3574 | 0 | case GDT_CFloat16: |
3575 | 0 | GDALCopyWordsFromT<GFloat16>( |
3576 | 0 | static_cast<const GFloat16 *>(pSrcData), nSrcPixelStride, true, |
3577 | 0 | pDstData, eDstType, nDstPixelStride, nWordCount); |
3578 | 0 | break; |
3579 | 0 | case GDT_CFloat32: |
3580 | 0 | GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData), |
3581 | 0 | nSrcPixelStride, true, pDstData, eDstType, |
3582 | 0 | nDstPixelStride, nWordCount); |
3583 | 0 | break; |
3584 | 0 | case GDT_CFloat64: |
3585 | 0 | GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData), |
3586 | 0 | nSrcPixelStride, true, pDstData, |
3587 | 0 | eDstType, nDstPixelStride, nWordCount); |
3588 | 0 | break; |
3589 | 0 | case GDT_Unknown: |
3590 | 0 | case GDT_TypeCount: |
3591 | 0 | CPLAssert(false); |
3592 | 0 | } |
3593 | 0 | } |
3594 | | |
3595 | | /************************************************************************/ |
3596 | | /* GDALCopyBits() */ |
3597 | | /************************************************************************/ |
3598 | | |
3599 | | /** |
3600 | | * Bitwise word copying. |
3601 | | * |
3602 | | * A function for moving sets of partial bytes around. Loosely |
3603 | | * speaking this is a bitwise analog to GDALCopyWords(). |
3604 | | * |
3605 | | * It copies nStepCount "words" where each word is nBitCount bits long. |
3606 | | * The nSrcStep and nDstStep are the number of bits from the start of one |
3607 | | * word to the next (same as nBitCount if they are packed). The nSrcOffset |
3608 | | * and nDstOffset are the offset into the source and destination buffers |
3609 | | * to start at, also measured in bits. |
3610 | | * |
3611 | | * All bit offsets are assumed to start from the high order bit in a byte |
3612 | | * (i.e. most significant bit first). Currently this function is not very |
3613 | | * optimized, but it may be improved for some common cases in the future |
3614 | | * as needed. |
3615 | | * |
3616 | | * @param pabySrcData the source data buffer. |
3617 | | * @param nSrcOffset the offset (in bits) in pabySrcData to the start of the |
3618 | | * first word to copy. |
3619 | | * @param nSrcStep the offset in bits from the start one source word to the |
3620 | | * start of the next. |
3621 | | * @param pabyDstData the destination data buffer. |
3622 | | * @param nDstOffset the offset (in bits) in pabyDstData to the start of the |
3623 | | * first word to copy over. |
3624 | | * @param nDstStep the offset in bits from the start one word to the |
3625 | | * start of the next. |
3626 | | * @param nBitCount the number of bits in a word to be copied. |
3627 | | * @param nStepCount the number of words to copy. |
3628 | | */ |
3629 | | |
3630 | | void GDALCopyBits(const GByte *pabySrcData, int nSrcOffset, int nSrcStep, |
3631 | | GByte *pabyDstData, int nDstOffset, int nDstStep, |
3632 | | int nBitCount, int nStepCount) |
3633 | | |
3634 | 0 | { |
3635 | 0 | VALIDATE_POINTER0(pabySrcData, "GDALCopyBits"); |
3636 | | |
3637 | 0 | for (int iStep = 0; iStep < nStepCount; iStep++) |
3638 | 0 | { |
3639 | 0 | for (int iBit = 0; iBit < nBitCount; iBit++) |
3640 | 0 | { |
3641 | 0 | if (pabySrcData[nSrcOffset >> 3] & (0x80 >> (nSrcOffset & 7))) |
3642 | 0 | pabyDstData[nDstOffset >> 3] |= (0x80 >> (nDstOffset & 7)); |
3643 | 0 | else |
3644 | 0 | pabyDstData[nDstOffset >> 3] &= ~(0x80 >> (nDstOffset & 7)); |
3645 | |
|
3646 | 0 | nSrcOffset++; |
3647 | 0 | nDstOffset++; |
3648 | 0 | } |
3649 | |
|
3650 | 0 | nSrcOffset += (nSrcStep - nBitCount); |
3651 | 0 | nDstOffset += (nDstStep - nBitCount); |
3652 | 0 | } |
3653 | 0 | } |
3654 | | |
3655 | | /************************************************************************/ |
3656 | | /* GDALGetBestOverviewLevel() */ |
3657 | | /* */ |
3658 | | /* Returns the best overview level to satisfy the query or -1 if none */ |
3659 | | /* Also updates nXOff, nYOff, nXSize, nYSize and psExtraArg when */ |
3660 | | /* returning a valid overview level */ |
3661 | | /************************************************************************/ |
3662 | | |
3663 | | int GDALBandGetBestOverviewLevel(GDALRasterBand *poBand, int &nXOff, int &nYOff, |
3664 | | int &nXSize, int &nYSize, int nBufXSize, |
3665 | | int nBufYSize) |
3666 | 0 | { |
3667 | 0 | return GDALBandGetBestOverviewLevel2(poBand, nXOff, nYOff, nXSize, nYSize, |
3668 | 0 | nBufXSize, nBufYSize, nullptr); |
3669 | 0 | } |
3670 | | |
3671 | | int GDALBandGetBestOverviewLevel2(GDALRasterBand *poBand, int &nXOff, |
3672 | | int &nYOff, int &nXSize, int &nYSize, |
3673 | | int nBufXSize, int nBufYSize, |
3674 | | GDALRasterIOExtraArg *psExtraArg) |
3675 | 0 | { |
3676 | 0 | if (psExtraArg != nullptr && psExtraArg->nVersion > 1 && |
3677 | 0 | psExtraArg->bUseOnlyThisScale) |
3678 | 0 | return -1; |
3679 | | /* -------------------------------------------------------------------- */ |
3680 | | /* Compute the desired downsampling factor. It is */ |
3681 | | /* based on the least reduced axis, and represents the number */ |
3682 | | /* of source pixels to one destination pixel. */ |
3683 | | /* -------------------------------------------------------------------- */ |
3684 | 0 | const double dfDesiredDownsamplingFactor = |
3685 | 0 | ((nXSize / static_cast<double>(nBufXSize)) < |
3686 | 0 | (nYSize / static_cast<double>(nBufYSize)) || |
3687 | 0 | nBufYSize == 1) |
3688 | 0 | ? nXSize / static_cast<double>(nBufXSize) |
3689 | 0 | : nYSize / static_cast<double>(nBufYSize); |
3690 | | |
3691 | | /* -------------------------------------------------------------------- */ |
3692 | | /* Find the overview level that largest downsampling factor (most */ |
3693 | | /* downsampled) that is still less than (or only a little more) */ |
3694 | | /* downsampled than the request. */ |
3695 | | /* -------------------------------------------------------------------- */ |
3696 | 0 | const int nOverviewCount = poBand->GetOverviewCount(); |
3697 | 0 | GDALRasterBand *poBestOverview = nullptr; |
3698 | 0 | double dfBestDownsamplingFactor = 0; |
3699 | 0 | int nBestOverviewLevel = -1; |
3700 | |
|
3701 | 0 | const char *pszOversampligThreshold = |
3702 | 0 | CPLGetConfigOption("GDAL_OVERVIEW_OVERSAMPLING_THRESHOLD", nullptr); |
3703 | | |
3704 | | // Note: keep this logic for overview selection in sync between |
3705 | | // gdalwarp_lib.cpp and rasterio.cpp |
3706 | | // Cf https://github.com/OSGeo/gdal/pull/9040#issuecomment-1898524693 |
3707 | 0 | const double dfOversamplingThreshold = |
3708 | 0 | pszOversampligThreshold ? CPLAtof(pszOversampligThreshold) |
3709 | 0 | : psExtraArg && psExtraArg->eResampleAlg != GRIORA_NearestNeighbour |
3710 | 0 | ? 1.0 |
3711 | 0 | : 1.2; |
3712 | 0 | for (int iOverview = 0; iOverview < nOverviewCount; iOverview++) |
3713 | 0 | { |
3714 | 0 | GDALRasterBand *poOverview = poBand->GetOverview(iOverview); |
3715 | 0 | if (poOverview == nullptr || |
3716 | 0 | poOverview->GetXSize() > poBand->GetXSize() || |
3717 | 0 | poOverview->GetYSize() > poBand->GetYSize()) |
3718 | 0 | { |
3719 | 0 | continue; |
3720 | 0 | } |
3721 | | |
3722 | | // Compute downsampling factor of this overview |
3723 | 0 | const double dfDownsamplingFactor = std::min( |
3724 | 0 | poBand->GetXSize() / static_cast<double>(poOverview->GetXSize()), |
3725 | 0 | poBand->GetYSize() / static_cast<double>(poOverview->GetYSize())); |
3726 | | |
3727 | | // Is it nearly the requested factor and better (lower) than |
3728 | | // the current best factor? |
3729 | | // Use an epsilon because of numerical instability. |
3730 | 0 | constexpr double EPSILON = 1e-1; |
3731 | 0 | if (dfDownsamplingFactor >= |
3732 | 0 | dfDesiredDownsamplingFactor * dfOversamplingThreshold + |
3733 | 0 | EPSILON || |
3734 | 0 | dfDownsamplingFactor <= dfBestDownsamplingFactor) |
3735 | 0 | { |
3736 | 0 | continue; |
3737 | 0 | } |
3738 | | |
3739 | | // Ignore AVERAGE_BIT2GRAYSCALE overviews for RasterIO purposes. |
3740 | 0 | const char *pszResampling = poOverview->GetMetadataItem("RESAMPLING"); |
3741 | |
|
3742 | 0 | if (pszResampling != nullptr && |
3743 | 0 | STARTS_WITH_CI(pszResampling, "AVERAGE_BIT2")) |
3744 | 0 | continue; |
3745 | | |
3746 | | // OK, this is our new best overview. |
3747 | 0 | poBestOverview = poOverview; |
3748 | 0 | nBestOverviewLevel = iOverview; |
3749 | 0 | dfBestDownsamplingFactor = dfDownsamplingFactor; |
3750 | |
|
3751 | 0 | if (std::abs(dfDesiredDownsamplingFactor - dfDownsamplingFactor) < |
3752 | 0 | EPSILON) |
3753 | 0 | { |
3754 | 0 | break; |
3755 | 0 | } |
3756 | 0 | } |
3757 | | |
3758 | | /* -------------------------------------------------------------------- */ |
3759 | | /* If we didn't find an overview that helps us, just return */ |
3760 | | /* indicating failure and the full resolution image will be used. */ |
3761 | | /* -------------------------------------------------------------------- */ |
3762 | 0 | if (nBestOverviewLevel < 0) |
3763 | 0 | return -1; |
3764 | | |
3765 | | /* -------------------------------------------------------------------- */ |
3766 | | /* Recompute the source window in terms of the selected */ |
3767 | | /* overview. */ |
3768 | | /* -------------------------------------------------------------------- */ |
3769 | 0 | const double dfXFactor = |
3770 | 0 | poBand->GetXSize() / static_cast<double>(poBestOverview->GetXSize()); |
3771 | 0 | const double dfYFactor = |
3772 | 0 | poBand->GetYSize() / static_cast<double>(poBestOverview->GetYSize()); |
3773 | 0 | CPLDebug("GDAL", "Selecting overview %d x %d", poBestOverview->GetXSize(), |
3774 | 0 | poBestOverview->GetYSize()); |
3775 | |
|
3776 | 0 | const int nOXOff = std::min(poBestOverview->GetXSize() - 1, |
3777 | 0 | static_cast<int>(nXOff / dfXFactor + 0.5)); |
3778 | 0 | const int nOYOff = std::min(poBestOverview->GetYSize() - 1, |
3779 | 0 | static_cast<int>(nYOff / dfYFactor + 0.5)); |
3780 | 0 | int nOXSize = std::max(1, static_cast<int>(nXSize / dfXFactor + 0.5)); |
3781 | 0 | int nOYSize = std::max(1, static_cast<int>(nYSize / dfYFactor + 0.5)); |
3782 | 0 | if (nOXOff + nOXSize > poBestOverview->GetXSize()) |
3783 | 0 | nOXSize = poBestOverview->GetXSize() - nOXOff; |
3784 | 0 | if (nOYOff + nOYSize > poBestOverview->GetYSize()) |
3785 | 0 | nOYSize = poBestOverview->GetYSize() - nOYOff; |
3786 | |
|
3787 | 0 | if (psExtraArg) |
3788 | 0 | { |
3789 | 0 | if (psExtraArg->bFloatingPointWindowValidity) |
3790 | 0 | { |
3791 | 0 | psExtraArg->dfXOff /= dfXFactor; |
3792 | 0 | psExtraArg->dfXSize /= dfXFactor; |
3793 | 0 | psExtraArg->dfYOff /= dfYFactor; |
3794 | 0 | psExtraArg->dfYSize /= dfYFactor; |
3795 | 0 | } |
3796 | 0 | else if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour) |
3797 | 0 | { |
3798 | 0 | psExtraArg->bFloatingPointWindowValidity = true; |
3799 | 0 | psExtraArg->dfXOff = nXOff / dfXFactor; |
3800 | 0 | psExtraArg->dfXSize = nXSize / dfXFactor; |
3801 | 0 | psExtraArg->dfYOff = nYOff / dfYFactor; |
3802 | 0 | psExtraArg->dfYSize = nYSize / dfYFactor; |
3803 | 0 | } |
3804 | 0 | } |
3805 | |
|
3806 | 0 | nXOff = nOXOff; |
3807 | 0 | nYOff = nOYOff; |
3808 | 0 | nXSize = nOXSize; |
3809 | 0 | nYSize = nOYSize; |
3810 | |
|
3811 | 0 | return nBestOverviewLevel; |
3812 | 0 | } |
3813 | | |
3814 | | /************************************************************************/ |
3815 | | /* OverviewRasterIO() */ |
3816 | | /* */ |
3817 | | /* Special work function to utilize available overviews to */ |
3818 | | /* more efficiently satisfy downsampled requests. It will */ |
3819 | | /* return CE_Failure if there are no appropriate overviews */ |
3820 | | /* available but it doesn't emit any error messages. */ |
3821 | | /************************************************************************/ |
3822 | | |
3823 | | //! @cond Doxygen_Suppress |
3824 | | CPLErr GDALRasterBand::OverviewRasterIO( |
3825 | | GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize, |
3826 | | void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType, |
3827 | | GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg) |
3828 | | |
3829 | 0 | { |
3830 | 0 | GDALRasterIOExtraArg sExtraArg; |
3831 | 0 | GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg); |
3832 | |
|
3833 | 0 | const int nOverview = GDALBandGetBestOverviewLevel2( |
3834 | 0 | this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, &sExtraArg); |
3835 | 0 | if (nOverview < 0) |
3836 | 0 | return CE_Failure; |
3837 | | |
3838 | | /* -------------------------------------------------------------------- */ |
3839 | | /* Recast the call in terms of the new raster layer. */ |
3840 | | /* -------------------------------------------------------------------- */ |
3841 | 0 | GDALRasterBand *poOverviewBand = GetOverview(nOverview); |
3842 | 0 | if (poOverviewBand == nullptr) |
3843 | 0 | return CE_Failure; |
3844 | | |
3845 | 0 | return poOverviewBand->RasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, |
3846 | 0 | pData, nBufXSize, nBufYSize, eBufType, |
3847 | 0 | nPixelSpace, nLineSpace, &sExtraArg); |
3848 | 0 | } |
3849 | | |
3850 | | /************************************************************************/ |
3851 | | /* TryOverviewRasterIO() */ |
3852 | | /************************************************************************/ |
3853 | | |
3854 | | CPLErr GDALRasterBand::TryOverviewRasterIO( |
3855 | | GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize, |
3856 | | void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType, |
3857 | | GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg, |
3858 | | int *pbTried) |
3859 | 0 | { |
3860 | 0 | int nXOffMod = nXOff; |
3861 | 0 | int nYOffMod = nYOff; |
3862 | 0 | int nXSizeMod = nXSize; |
3863 | 0 | int nYSizeMod = nYSize; |
3864 | 0 | GDALRasterIOExtraArg sExtraArg; |
3865 | |
|
3866 | 0 | GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg); |
3867 | |
|
3868 | 0 | int iOvrLevel = GDALBandGetBestOverviewLevel2( |
3869 | 0 | this, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize, nBufYSize, |
3870 | 0 | &sExtraArg); |
3871 | |
|
3872 | 0 | if (iOvrLevel >= 0) |
3873 | 0 | { |
3874 | 0 | GDALRasterBand *poOverviewBand = GetOverview(iOvrLevel); |
3875 | 0 | if (poOverviewBand) |
3876 | 0 | { |
3877 | 0 | *pbTried = TRUE; |
3878 | 0 | return poOverviewBand->RasterIO( |
3879 | 0 | eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData, |
3880 | 0 | nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace, |
3881 | 0 | &sExtraArg); |
3882 | 0 | } |
3883 | 0 | } |
3884 | | |
3885 | 0 | *pbTried = FALSE; |
3886 | 0 | return CE_None; |
3887 | 0 | } |
3888 | | |
3889 | | /************************************************************************/ |
3890 | | /* TryOverviewRasterIO() */ |
3891 | | /************************************************************************/ |
3892 | | |
3893 | | CPLErr GDALDataset::TryOverviewRasterIO( |
3894 | | GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize, |
3895 | | void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType, |
3896 | | int nBandCount, const int *panBandMap, GSpacing nPixelSpace, |
3897 | | GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg, |
3898 | | int *pbTried) |
3899 | 0 | { |
3900 | 0 | int nXOffMod = nXOff; |
3901 | 0 | int nYOffMod = nYOff; |
3902 | 0 | int nXSizeMod = nXSize; |
3903 | 0 | int nYSizeMod = nYSize; |
3904 | 0 | GDALRasterIOExtraArg sExtraArg; |
3905 | 0 | GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg); |
3906 | |
|
3907 | 0 | int iOvrLevel = GDALBandGetBestOverviewLevel2( |
3908 | 0 | papoBands[0], nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize, |
3909 | 0 | nBufYSize, &sExtraArg); |
3910 | |
|
3911 | 0 | if (iOvrLevel >= 0 && papoBands[0]->GetOverview(iOvrLevel) != nullptr && |
3912 | 0 | papoBands[0]->GetOverview(iOvrLevel)->GetDataset() != nullptr) |
3913 | 0 | { |
3914 | 0 | *pbTried = TRUE; |
3915 | 0 | return papoBands[0]->GetOverview(iOvrLevel)->GetDataset()->RasterIO( |
3916 | 0 | eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData, nBufXSize, |
3917 | 0 | nBufYSize, eBufType, nBandCount, panBandMap, nPixelSpace, |
3918 | 0 | nLineSpace, nBandSpace, &sExtraArg); |
3919 | 0 | } |
3920 | 0 | else |
3921 | 0 | { |
3922 | 0 | *pbTried = FALSE; |
3923 | 0 | return CE_None; |
3924 | 0 | } |
3925 | 0 | } |
3926 | | |
3927 | | /************************************************************************/ |
3928 | | /* GetBestOverviewLevel() */ |
3929 | | /* */ |
3930 | | /* Returns the best overview level to satisfy the query or -1 if none */ |
3931 | | /* Also updates nXOff, nYOff, nXSize, nYSize when returning a valid */ |
3932 | | /* overview level */ |
3933 | | /************************************************************************/ |
3934 | | |
3935 | | static int GDALDatasetGetBestOverviewLevel(GDALDataset *poDS, int &nXOff, |
3936 | | int &nYOff, int &nXSize, int &nYSize, |
3937 | | int nBufXSize, int nBufYSize, |
3938 | | int nBandCount, |
3939 | | const int *panBandMap, |
3940 | | GDALRasterIOExtraArg *psExtraArg) |
3941 | 0 | { |
3942 | 0 | int nOverviewCount = 0; |
3943 | 0 | GDALRasterBand *poFirstBand = nullptr; |
3944 | | |
3945 | | /* -------------------------------------------------------------------- */ |
3946 | | /* Check that all bands have the same number of overviews and */ |
3947 | | /* that they have all the same size and block dimensions */ |
3948 | | /* -------------------------------------------------------------------- */ |
3949 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
3950 | 0 | { |
3951 | 0 | GDALRasterBand *poBand = poDS->GetRasterBand(panBandMap[iBand]); |
3952 | 0 | if (poBand == nullptr) |
3953 | 0 | return -1; |
3954 | 0 | if (iBand == 0) |
3955 | 0 | { |
3956 | 0 | poFirstBand = poBand; |
3957 | 0 | nOverviewCount = poBand->GetOverviewCount(); |
3958 | 0 | } |
3959 | 0 | else if (nOverviewCount != poBand->GetOverviewCount()) |
3960 | 0 | { |
3961 | 0 | CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... " |
3962 | 0 | "mismatched overview count, use std method."); |
3963 | 0 | return -1; |
3964 | 0 | } |
3965 | 0 | else |
3966 | 0 | { |
3967 | 0 | for (int iOverview = 0; iOverview < nOverviewCount; iOverview++) |
3968 | 0 | { |
3969 | 0 | GDALRasterBand *poOvrBand = poBand->GetOverview(iOverview); |
3970 | 0 | GDALRasterBand *poOvrFirstBand = |
3971 | 0 | poFirstBand->GetOverview(iOverview); |
3972 | 0 | if (poOvrBand == nullptr || poOvrFirstBand == nullptr) |
3973 | 0 | continue; |
3974 | | |
3975 | 0 | if (poOvrFirstBand->GetXSize() != poOvrBand->GetXSize() || |
3976 | 0 | poOvrFirstBand->GetYSize() != poOvrBand->GetYSize()) |
3977 | 0 | { |
3978 | 0 | CPLDebug("GDAL", |
3979 | 0 | "GDALDataset::GetBestOverviewLevel() ... " |
3980 | 0 | "mismatched overview sizes, use std method."); |
3981 | 0 | return -1; |
3982 | 0 | } |
3983 | 0 | int nBlockXSizeFirst = 0; |
3984 | 0 | int nBlockYSizeFirst = 0; |
3985 | 0 | poOvrFirstBand->GetBlockSize(&nBlockXSizeFirst, |
3986 | 0 | &nBlockYSizeFirst); |
3987 | |
|
3988 | 0 | int nBlockXSizeCurrent = 0; |
3989 | 0 | int nBlockYSizeCurrent = 0; |
3990 | 0 | poOvrBand->GetBlockSize(&nBlockXSizeCurrent, |
3991 | 0 | &nBlockYSizeCurrent); |
3992 | |
|
3993 | 0 | if (nBlockXSizeFirst != nBlockXSizeCurrent || |
3994 | 0 | nBlockYSizeFirst != nBlockYSizeCurrent) |
3995 | 0 | { |
3996 | 0 | CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... " |
3997 | 0 | "mismatched block sizes, use std method."); |
3998 | 0 | return -1; |
3999 | 0 | } |
4000 | 0 | } |
4001 | 0 | } |
4002 | 0 | } |
4003 | 0 | if (poFirstBand == nullptr) |
4004 | 0 | return -1; |
4005 | | |
4006 | 0 | return GDALBandGetBestOverviewLevel2(poFirstBand, nXOff, nYOff, nXSize, |
4007 | 0 | nYSize, nBufXSize, nBufYSize, |
4008 | 0 | psExtraArg); |
4009 | 0 | } |
4010 | | |
4011 | | /************************************************************************/ |
4012 | | /* BlockBasedRasterIO() */ |
4013 | | /* */ |
4014 | | /* This convenience function implements a dataset level */ |
4015 | | /* RasterIO() interface based on calling down to fetch blocks, */ |
4016 | | /* much like the GDALRasterBand::IRasterIO(), but it handles */ |
4017 | | /* all bands at once, so that a format driver that handles a */ |
4018 | | /* request for different bands of the same block efficiently */ |
4019 | | /* (i.e. without re-reading interleaved data) will efficiently. */ |
4020 | | /* */ |
4021 | | /* This method is intended to be called by an overridden */ |
4022 | | /* IRasterIO() method in the driver specific GDALDataset */ |
4023 | | /* derived class. */ |
4024 | | /* */ |
4025 | | /* Default internal implementation of RasterIO() ... utilizes */ |
4026 | | /* the Block access methods to satisfy the request. This would */ |
4027 | | /* normally only be overridden by formats with overviews. */ |
4028 | | /* */ |
4029 | | /* To keep things relatively simple, this method does not */ |
4030 | | /* currently take advantage of some special cases addressed in */ |
4031 | | /* GDALRasterBand::IRasterIO(), so it is likely best to only */ |
4032 | | /* call it when you know it will help. That is in cases where */ |
4033 | | /* data is at 1:1 to the buffer, and you know the driver is */ |
4034 | | /* implementing interleaved IO efficiently on a block by block */ |
4035 | | /* basis. Overviews will be used when possible. */ |
4036 | | /************************************************************************/ |
4037 | | |
4038 | | CPLErr GDALDataset::BlockBasedRasterIO( |
4039 | | GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize, |
4040 | | void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType, |
4041 | | int nBandCount, const int *panBandMap, GSpacing nPixelSpace, |
4042 | | GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg) |
4043 | | |
4044 | 0 | { |
4045 | 0 | CPLAssert(nullptr != pData); |
4046 | | |
4047 | 0 | GByte **papabySrcBlock = nullptr; |
4048 | 0 | GDALRasterBlock *poBlock = nullptr; |
4049 | 0 | GDALRasterBlock **papoBlocks = nullptr; |
4050 | 0 | int nLBlockX = -1; |
4051 | 0 | int nLBlockY = -1; |
4052 | 0 | int iBufYOff; |
4053 | 0 | int iBufXOff; |
4054 | 0 | int nBlockXSize = 1; |
4055 | 0 | int nBlockYSize = 1; |
4056 | 0 | CPLErr eErr = CE_None; |
4057 | 0 | GDALDataType eDataType = GDT_Byte; |
4058 | |
|
4059 | 0 | const bool bUseIntegerRequestCoords = |
4060 | 0 | (!psExtraArg->bFloatingPointWindowValidity || |
4061 | 0 | (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff && |
4062 | 0 | nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize)); |
4063 | | |
4064 | | /* -------------------------------------------------------------------- */ |
4065 | | /* Ensure that all bands share a common block size and data type. */ |
4066 | | /* -------------------------------------------------------------------- */ |
4067 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4068 | 0 | { |
4069 | 0 | GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]); |
4070 | |
|
4071 | 0 | if (iBand == 0) |
4072 | 0 | { |
4073 | 0 | poBand->GetBlockSize(&nBlockXSize, &nBlockYSize); |
4074 | 0 | eDataType = poBand->GetRasterDataType(); |
4075 | 0 | } |
4076 | 0 | else |
4077 | 0 | { |
4078 | 0 | int nThisBlockXSize = 0; |
4079 | 0 | int nThisBlockYSize = 0; |
4080 | 0 | poBand->GetBlockSize(&nThisBlockXSize, &nThisBlockYSize); |
4081 | 0 | if (nThisBlockXSize != nBlockXSize || |
4082 | 0 | nThisBlockYSize != nBlockYSize) |
4083 | 0 | { |
4084 | 0 | CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... " |
4085 | 0 | "mismatched block sizes, use std method."); |
4086 | 0 | return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, |
4087 | 0 | pData, nBufXSize, nBufYSize, eBufType, |
4088 | 0 | nBandCount, panBandMap, nPixelSpace, |
4089 | 0 | nLineSpace, nBandSpace, psExtraArg); |
4090 | 0 | } |
4091 | | |
4092 | 0 | if (eDataType != poBand->GetRasterDataType() && |
4093 | 0 | (nXSize != nBufXSize || nYSize != nBufYSize)) |
4094 | 0 | { |
4095 | 0 | CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... " |
4096 | 0 | "mismatched band data types, use std method."); |
4097 | 0 | return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, |
4098 | 0 | pData, nBufXSize, nBufYSize, eBufType, |
4099 | 0 | nBandCount, panBandMap, nPixelSpace, |
4100 | 0 | nLineSpace, nBandSpace, psExtraArg); |
4101 | 0 | } |
4102 | 0 | } |
4103 | 0 | } |
4104 | | |
4105 | | /* ==================================================================== */ |
4106 | | /* In this special case at full resolution we step through in */ |
4107 | | /* blocks, turning the request over to the per-band */ |
4108 | | /* IRasterIO(), but ensuring that all bands of one block are */ |
4109 | | /* called before proceeding to the next. */ |
4110 | | /* ==================================================================== */ |
4111 | | |
4112 | 0 | if (nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords) |
4113 | 0 | { |
4114 | 0 | GDALRasterIOExtraArg sDummyExtraArg; |
4115 | 0 | INIT_RASTERIO_EXTRA_ARG(sDummyExtraArg); |
4116 | |
|
4117 | 0 | int nChunkYSize = 0; |
4118 | 0 | int nChunkXSize = 0; |
4119 | |
|
4120 | 0 | for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff += nChunkYSize) |
4121 | 0 | { |
4122 | 0 | const int nChunkYOff = iBufYOff + nYOff; |
4123 | 0 | nChunkYSize = nBlockYSize - (nChunkYOff % nBlockYSize); |
4124 | 0 | if (nChunkYOff + nChunkYSize > nYOff + nYSize) |
4125 | 0 | nChunkYSize = (nYOff + nYSize) - nChunkYOff; |
4126 | |
|
4127 | 0 | for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff += nChunkXSize) |
4128 | 0 | { |
4129 | 0 | const int nChunkXOff = iBufXOff + nXOff; |
4130 | 0 | nChunkXSize = nBlockXSize - (nChunkXOff % nBlockXSize); |
4131 | 0 | if (nChunkXOff + nChunkXSize > nXOff + nXSize) |
4132 | 0 | nChunkXSize = (nXOff + nXSize) - nChunkXOff; |
4133 | |
|
4134 | 0 | GByte *pabyChunkData = |
4135 | 0 | static_cast<GByte *>(pData) + iBufXOff * nPixelSpace + |
4136 | 0 | static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace; |
4137 | |
|
4138 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4139 | 0 | { |
4140 | 0 | GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]); |
4141 | |
|
4142 | 0 | eErr = poBand->IRasterIO( |
4143 | 0 | eRWFlag, nChunkXOff, nChunkYOff, nChunkXSize, |
4144 | 0 | nChunkYSize, |
4145 | 0 | pabyChunkData + |
4146 | 0 | static_cast<GPtrDiff_t>(iBand) * nBandSpace, |
4147 | 0 | nChunkXSize, nChunkYSize, eBufType, nPixelSpace, |
4148 | 0 | nLineSpace, &sDummyExtraArg); |
4149 | 0 | if (eErr != CE_None) |
4150 | 0 | return eErr; |
4151 | 0 | } |
4152 | 0 | } |
4153 | | |
4154 | 0 | if (psExtraArg->pfnProgress != nullptr && |
4155 | 0 | !psExtraArg->pfnProgress( |
4156 | 0 | 1.0 * std::min(nBufYSize, iBufYOff + nChunkYSize) / |
4157 | 0 | nBufYSize, |
4158 | 0 | "", psExtraArg->pProgressData)) |
4159 | 0 | { |
4160 | 0 | return CE_Failure; |
4161 | 0 | } |
4162 | 0 | } |
4163 | | |
4164 | 0 | return CE_None; |
4165 | 0 | } |
4166 | | |
4167 | | /* Below code is not compatible with that case. It would need a complete */ |
4168 | | /* separate code like done in GDALRasterBand::IRasterIO. */ |
4169 | 0 | if (eRWFlag == GF_Write && (nBufXSize < nXSize || nBufYSize < nYSize)) |
4170 | 0 | { |
4171 | 0 | return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, |
4172 | 0 | nBufXSize, nBufYSize, eBufType, nBandCount, |
4173 | 0 | panBandMap, nPixelSpace, nLineSpace, |
4174 | 0 | nBandSpace, psExtraArg); |
4175 | 0 | } |
4176 | | |
4177 | | /* We could have a smarter implementation, but that will do for now */ |
4178 | 0 | if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour && |
4179 | 0 | (nBufXSize != nXSize || nBufYSize != nYSize)) |
4180 | 0 | { |
4181 | 0 | return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, |
4182 | 0 | nBufXSize, nBufYSize, eBufType, nBandCount, |
4183 | 0 | panBandMap, nPixelSpace, nLineSpace, |
4184 | 0 | nBandSpace, psExtraArg); |
4185 | 0 | } |
4186 | | |
4187 | | /* ==================================================================== */ |
4188 | | /* Loop reading required source blocks to satisfy output */ |
4189 | | /* request. This is the most general implementation. */ |
4190 | | /* ==================================================================== */ |
4191 | | |
4192 | 0 | const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType); |
4193 | |
|
4194 | 0 | papabySrcBlock = |
4195 | 0 | static_cast<GByte **>(CPLCalloc(sizeof(GByte *), nBandCount)); |
4196 | 0 | papoBlocks = |
4197 | 0 | static_cast<GDALRasterBlock **>(CPLCalloc(sizeof(void *), nBandCount)); |
4198 | | |
4199 | | /* -------------------------------------------------------------------- */ |
4200 | | /* Select an overview level if appropriate. */ |
4201 | | /* -------------------------------------------------------------------- */ |
4202 | |
|
4203 | 0 | GDALRasterIOExtraArg sExtraArg; |
4204 | 0 | GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg); |
4205 | 0 | const int nOverviewLevel = GDALDatasetGetBestOverviewLevel( |
4206 | 0 | this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, nBandCount, |
4207 | 0 | panBandMap, &sExtraArg); |
4208 | 0 | if (nOverviewLevel >= 0) |
4209 | 0 | { |
4210 | 0 | GetRasterBand(panBandMap[0]) |
4211 | 0 | ->GetOverview(nOverviewLevel) |
4212 | 0 | ->GetBlockSize(&nBlockXSize, &nBlockYSize); |
4213 | 0 | } |
4214 | |
|
4215 | 0 | double dfXOff = nXOff; |
4216 | 0 | double dfYOff = nYOff; |
4217 | 0 | double dfXSize = nXSize; |
4218 | 0 | double dfYSize = nYSize; |
4219 | 0 | if (sExtraArg.bFloatingPointWindowValidity) |
4220 | 0 | { |
4221 | 0 | dfXOff = sExtraArg.dfXOff; |
4222 | 0 | dfYOff = sExtraArg.dfYOff; |
4223 | 0 | dfXSize = sExtraArg.dfXSize; |
4224 | 0 | dfYSize = sExtraArg.dfYSize; |
4225 | 0 | } |
4226 | | |
4227 | | /* -------------------------------------------------------------------- */ |
4228 | | /* Compute stepping increment. */ |
4229 | | /* -------------------------------------------------------------------- */ |
4230 | 0 | const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize); |
4231 | 0 | const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize); |
4232 | |
|
4233 | 0 | constexpr double EPS = 1e-10; |
4234 | | /* -------------------------------------------------------------------- */ |
4235 | | /* Loop over buffer computing source locations. */ |
4236 | | /* -------------------------------------------------------------------- */ |
4237 | 0 | for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++) |
4238 | 0 | { |
4239 | 0 | GPtrDiff_t iSrcOffset; |
4240 | | |
4241 | | // Add small epsilon to avoid some numeric precision issues. |
4242 | 0 | const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS; |
4243 | 0 | const int iSrcY = static_cast<int>(std::min( |
4244 | 0 | std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1))); |
4245 | |
|
4246 | 0 | GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) * |
4247 | 0 | static_cast<GPtrDiff_t>(nLineSpace); |
4248 | |
|
4249 | 0 | for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff++) |
4250 | 0 | { |
4251 | 0 | const double dfSrcX = (iBufXOff + 0.5) * dfSrcXInc + dfXOff + EPS; |
4252 | 0 | const int iSrcX = static_cast<int>(std::min( |
4253 | 0 | std::max(0.0, dfSrcX), static_cast<double>(nRasterXSize - 1))); |
4254 | | |
4255 | | // FIXME: this code likely doesn't work if the dirty block gets |
4256 | | // flushed to disk before being completely written. In the meantime, |
4257 | | // bJustInitialize should probably be set to FALSE even if it is not |
4258 | | // ideal performance wise, and for lossy compression |
4259 | | |
4260 | | /* -------------------------------------------------------------------- |
4261 | | */ |
4262 | | /* Ensure we have the appropriate block loaded. */ |
4263 | | /* -------------------------------------------------------------------- |
4264 | | */ |
4265 | 0 | if (iSrcX < nLBlockX * nBlockXSize || |
4266 | 0 | iSrcX - nBlockXSize >= nLBlockX * nBlockXSize || |
4267 | 0 | iSrcY < nLBlockY * nBlockYSize || |
4268 | 0 | iSrcY - nBlockYSize >= nLBlockY * nBlockYSize) |
4269 | 0 | { |
4270 | 0 | nLBlockX = iSrcX / nBlockXSize; |
4271 | 0 | nLBlockY = iSrcY / nBlockYSize; |
4272 | |
|
4273 | 0 | const bool bJustInitialize = |
4274 | 0 | eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize && |
4275 | 0 | nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize && |
4276 | 0 | nXOff <= nLBlockX * nBlockXSize && |
4277 | 0 | nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize; |
4278 | | /*bool bMemZeroBuffer = FALSE; |
4279 | | if( eRWFlag == GF_Write && !bJustInitialize && |
4280 | | nXOff <= nLBlockX * nBlockXSize && |
4281 | | nYOff <= nLBlockY * nBlockYSize && |
4282 | | (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize || |
4283 | | (nXOff + nXSize == GetRasterXSize() && |
4284 | | (nLBlockX+1) * nBlockXSize > GetRasterXSize())) && |
4285 | | (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize || |
4286 | | (nYOff + nYSize == GetRasterYSize() && |
4287 | | (nLBlockY+1) * nBlockYSize > GetRasterYSize())) ) |
4288 | | { |
4289 | | bJustInitialize = TRUE; |
4290 | | bMemZeroBuffer = TRUE; |
4291 | | }*/ |
4292 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4293 | 0 | { |
4294 | 0 | GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]); |
4295 | 0 | if (nOverviewLevel >= 0) |
4296 | 0 | poBand = poBand->GetOverview(nOverviewLevel); |
4297 | 0 | poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY, |
4298 | 0 | bJustInitialize); |
4299 | 0 | if (poBlock == nullptr) |
4300 | 0 | { |
4301 | 0 | eErr = CE_Failure; |
4302 | 0 | goto CleanupAndReturn; |
4303 | 0 | } |
4304 | | |
4305 | 0 | if (eRWFlag == GF_Write) |
4306 | 0 | poBlock->MarkDirty(); |
4307 | |
|
4308 | 0 | if (papoBlocks[iBand] != nullptr) |
4309 | 0 | papoBlocks[iBand]->DropLock(); |
4310 | |
|
4311 | 0 | papoBlocks[iBand] = poBlock; |
4312 | |
|
4313 | 0 | papabySrcBlock[iBand] = |
4314 | 0 | static_cast<GByte *>(poBlock->GetDataRef()); |
4315 | | /*if( bMemZeroBuffer ) |
4316 | | { |
4317 | | memset(papabySrcBlock[iBand], 0, |
4318 | | static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize |
4319 | | * nBlockYSize); |
4320 | | }*/ |
4321 | 0 | } |
4322 | 0 | } |
4323 | | |
4324 | | /* -------------------------------------------------------------------- |
4325 | | */ |
4326 | | /* Copy over this pixel of data. */ |
4327 | | /* -------------------------------------------------------------------- |
4328 | | */ |
4329 | 0 | iSrcOffset = (static_cast<GPtrDiff_t>(iSrcX) - |
4330 | 0 | static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize + |
4331 | 0 | (static_cast<GPtrDiff_t>(iSrcY) - |
4332 | 0 | static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) * |
4333 | 0 | nBlockXSize) * |
4334 | 0 | nBandDataSize; |
4335 | |
|
4336 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4337 | 0 | { |
4338 | 0 | GByte *pabySrcBlock = papabySrcBlock[iBand]; |
4339 | 0 | GPtrDiff_t iBandBufOffset = |
4340 | 0 | iBufOffset + static_cast<GPtrDiff_t>(iBand) * |
4341 | 0 | static_cast<GPtrDiff_t>(nBandSpace); |
4342 | |
|
4343 | 0 | if (eDataType == eBufType) |
4344 | 0 | { |
4345 | 0 | if (eRWFlag == GF_Read) |
4346 | 0 | memcpy(static_cast<GByte *>(pData) + iBandBufOffset, |
4347 | 0 | pabySrcBlock + iSrcOffset, nBandDataSize); |
4348 | 0 | else |
4349 | 0 | memcpy(pabySrcBlock + iSrcOffset, |
4350 | 0 | static_cast<const GByte *>(pData) + |
4351 | 0 | iBandBufOffset, |
4352 | 0 | nBandDataSize); |
4353 | 0 | } |
4354 | 0 | else |
4355 | 0 | { |
4356 | | /* type to type conversion ... ouch, this is expensive way |
4357 | | of handling single words */ |
4358 | |
|
4359 | 0 | if (eRWFlag == GF_Read) |
4360 | 0 | GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0, |
4361 | 0 | static_cast<GByte *>(pData) + |
4362 | 0 | iBandBufOffset, |
4363 | 0 | eBufType, 0, 1); |
4364 | 0 | else |
4365 | 0 | GDALCopyWords64(static_cast<const GByte *>(pData) + |
4366 | 0 | iBandBufOffset, |
4367 | 0 | eBufType, 0, pabySrcBlock + iSrcOffset, |
4368 | 0 | eDataType, 0, 1); |
4369 | 0 | } |
4370 | 0 | } |
4371 | |
|
4372 | 0 | iBufOffset += static_cast<int>(nPixelSpace); |
4373 | 0 | } |
4374 | 0 | } |
4375 | | |
4376 | | /* -------------------------------------------------------------------- */ |
4377 | | /* CleanupAndReturn. */ |
4378 | | /* -------------------------------------------------------------------- */ |
4379 | 0 | CleanupAndReturn: |
4380 | 0 | CPLFree(papabySrcBlock); |
4381 | 0 | if (papoBlocks != nullptr) |
4382 | 0 | { |
4383 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4384 | 0 | { |
4385 | 0 | if (papoBlocks[iBand] != nullptr) |
4386 | 0 | papoBlocks[iBand]->DropLock(); |
4387 | 0 | } |
4388 | 0 | CPLFree(papoBlocks); |
4389 | 0 | } |
4390 | |
|
4391 | 0 | return eErr; |
4392 | 0 | } |
4393 | | |
4394 | | //! @endcond |
4395 | | |
4396 | | /************************************************************************/ |
4397 | | /* GDALCopyWholeRasterGetSwathSize() */ |
4398 | | /************************************************************************/ |
4399 | | |
4400 | | static void GDALCopyWholeRasterGetSwathSize(GDALRasterBand *poSrcPrototypeBand, |
4401 | | GDALRasterBand *poDstPrototypeBand, |
4402 | | int nBandCount, |
4403 | | int bDstIsCompressed, |
4404 | | int bInterleave, int *pnSwathCols, |
4405 | | int *pnSwathLines) |
4406 | 0 | { |
4407 | 0 | GDALDataType eDT = poDstPrototypeBand->GetRasterDataType(); |
4408 | 0 | int nSrcBlockXSize = 0; |
4409 | 0 | int nSrcBlockYSize = 0; |
4410 | 0 | int nBlockXSize = 0; |
4411 | 0 | int nBlockYSize = 0; |
4412 | |
|
4413 | 0 | int nXSize = poSrcPrototypeBand->GetXSize(); |
4414 | 0 | int nYSize = poSrcPrototypeBand->GetYSize(); |
4415 | |
|
4416 | 0 | poSrcPrototypeBand->GetBlockSize(&nSrcBlockXSize, &nSrcBlockYSize); |
4417 | 0 | poDstPrototypeBand->GetBlockSize(&nBlockXSize, &nBlockYSize); |
4418 | |
|
4419 | 0 | const int nMaxBlockXSize = std::max(nBlockXSize, nSrcBlockXSize); |
4420 | 0 | const int nMaxBlockYSize = std::max(nBlockYSize, nSrcBlockYSize); |
4421 | |
|
4422 | 0 | int nPixelSize = GDALGetDataTypeSizeBytes(eDT); |
4423 | 0 | if (bInterleave) |
4424 | 0 | nPixelSize *= nBandCount; |
4425 | | |
4426 | | // aim for one row of blocks. Do not settle for less. |
4427 | 0 | int nSwathCols = nXSize; |
4428 | 0 | int nSwathLines = nMaxBlockYSize; |
4429 | |
|
4430 | 0 | const char *pszSrcCompression = |
4431 | 0 | poSrcPrototypeBand->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE"); |
4432 | 0 | if (pszSrcCompression == nullptr) |
4433 | 0 | { |
4434 | 0 | auto poSrcDS = poSrcPrototypeBand->GetDataset(); |
4435 | 0 | if (poSrcDS) |
4436 | 0 | pszSrcCompression = |
4437 | 0 | poSrcDS->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE"); |
4438 | 0 | } |
4439 | | |
4440 | | /* -------------------------------------------------------------------- */ |
4441 | | /* What will our swath size be? */ |
4442 | | /* -------------------------------------------------------------------- */ |
4443 | | // When writing interleaved data in a compressed format, we want to be sure |
4444 | | // that each block will only be written once, so the swath size must not be |
4445 | | // greater than the block cache. |
4446 | 0 | const char *pszSwathSize = CPLGetConfigOption("GDAL_SWATH_SIZE", nullptr); |
4447 | 0 | int nTargetSwathSize; |
4448 | 0 | if (pszSwathSize != nullptr) |
4449 | 0 | nTargetSwathSize = static_cast<int>( |
4450 | 0 | std::min(GIntBig(INT_MAX), CPLAtoGIntBig(pszSwathSize))); |
4451 | 0 | else |
4452 | 0 | { |
4453 | | // As a default, take one 1/4 of the cache size. |
4454 | 0 | nTargetSwathSize = static_cast<int>( |
4455 | 0 | std::min(GIntBig(INT_MAX), GDALGetCacheMax64() / 4)); |
4456 | | |
4457 | | // but if the minimum idal swath buf size is less, then go for it to |
4458 | | // avoid unnecessarily abusing RAM usage. |
4459 | | // but try to use 10 MB at least. |
4460 | 0 | GIntBig nIdealSwathBufSize = |
4461 | 0 | static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize; |
4462 | 0 | int nMinTargetSwathSize = 10 * 1000 * 1000; |
4463 | |
|
4464 | 0 | if ((poSrcPrototypeBand->GetSuggestedBlockAccessPattern() & |
4465 | 0 | GSBAP_LARGEST_CHUNK_POSSIBLE) != 0) |
4466 | 0 | { |
4467 | 0 | nMinTargetSwathSize = nTargetSwathSize; |
4468 | 0 | } |
4469 | |
|
4470 | 0 | if (nIdealSwathBufSize < nTargetSwathSize && |
4471 | 0 | nIdealSwathBufSize < nMinTargetSwathSize) |
4472 | 0 | { |
4473 | 0 | nIdealSwathBufSize = nMinTargetSwathSize; |
4474 | 0 | } |
4475 | |
|
4476 | 0 | if (pszSrcCompression != nullptr && |
4477 | 0 | EQUAL(pszSrcCompression, "JPEG2000") && |
4478 | 0 | (!bDstIsCompressed || ((nSrcBlockXSize % nBlockXSize) == 0 && |
4479 | 0 | (nSrcBlockYSize % nBlockYSize) == 0))) |
4480 | 0 | { |
4481 | 0 | nIdealSwathBufSize = |
4482 | 0 | std::max(nIdealSwathBufSize, static_cast<GIntBig>(nSwathCols) * |
4483 | 0 | nSrcBlockYSize * nPixelSize); |
4484 | 0 | } |
4485 | 0 | if (nTargetSwathSize > nIdealSwathBufSize) |
4486 | 0 | nTargetSwathSize = static_cast<int>( |
4487 | 0 | std::min(GIntBig(INT_MAX), nIdealSwathBufSize)); |
4488 | 0 | } |
4489 | |
|
4490 | 0 | if (nTargetSwathSize < 1000000) |
4491 | 0 | nTargetSwathSize = 1000000; |
4492 | | |
4493 | | /* But let's check that */ |
4494 | 0 | if (bDstIsCompressed && bInterleave && |
4495 | 0 | nTargetSwathSize > GDALGetCacheMax64()) |
4496 | 0 | { |
4497 | 0 | CPLError(CE_Warning, CPLE_AppDefined, |
4498 | 0 | "When translating into a compressed interleave format, " |
4499 | 0 | "the block cache size (" CPL_FRMT_GIB ") " |
4500 | 0 | "should be at least the size of the swath (%d) " |
4501 | 0 | "(GDAL_SWATH_SIZE config. option)", |
4502 | 0 | GDALGetCacheMax64(), nTargetSwathSize); |
4503 | 0 | } |
4504 | |
|
4505 | 0 | #define IS_DIVIDER_OF(x, y) ((y) % (x) == 0) |
4506 | 0 | #define ROUND_TO(x, y) (((x) / (y)) * (y)) |
4507 | | |
4508 | | // if both input and output datasets are tiled, that the tile dimensions |
4509 | | // are "compatible", try to stick to a swath dimension that is a multiple |
4510 | | // of input and output block dimensions. |
4511 | 0 | if (nBlockXSize != nXSize && nSrcBlockXSize != nXSize && |
4512 | 0 | IS_DIVIDER_OF(nBlockXSize, nMaxBlockXSize) && |
4513 | 0 | IS_DIVIDER_OF(nSrcBlockXSize, nMaxBlockXSize) && |
4514 | 0 | IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) && |
4515 | 0 | IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize)) |
4516 | 0 | { |
4517 | 0 | if (static_cast<GIntBig>(nMaxBlockXSize) * nMaxBlockYSize * |
4518 | 0 | nPixelSize <= |
4519 | 0 | static_cast<GIntBig>(nTargetSwathSize)) |
4520 | 0 | { |
4521 | 0 | nSwathCols = nTargetSwathSize / (nMaxBlockYSize * nPixelSize); |
4522 | 0 | nSwathCols = ROUND_TO(nSwathCols, nMaxBlockXSize); |
4523 | 0 | if (nSwathCols == 0) |
4524 | 0 | nSwathCols = nMaxBlockXSize; |
4525 | 0 | if (nSwathCols > nXSize) |
4526 | 0 | nSwathCols = nXSize; |
4527 | 0 | nSwathLines = nMaxBlockYSize; |
4528 | |
|
4529 | 0 | if (static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize > |
4530 | 0 | static_cast<GIntBig>(nTargetSwathSize)) |
4531 | 0 | { |
4532 | 0 | nSwathCols = nXSize; |
4533 | 0 | nSwathLines = nBlockYSize; |
4534 | 0 | } |
4535 | 0 | } |
4536 | 0 | } |
4537 | |
|
4538 | 0 | const GIntBig nMemoryPerCol = static_cast<GIntBig>(nSwathCols) * nPixelSize; |
4539 | 0 | const GIntBig nSwathBufSize = nMemoryPerCol * nSwathLines; |
4540 | 0 | if (nSwathBufSize > static_cast<GIntBig>(nTargetSwathSize)) |
4541 | 0 | { |
4542 | 0 | nSwathLines = static_cast<int>(nTargetSwathSize / nMemoryPerCol); |
4543 | 0 | if (nSwathLines == 0) |
4544 | 0 | nSwathLines = 1; |
4545 | |
|
4546 | 0 | CPLDebug( |
4547 | 0 | "GDAL", |
4548 | 0 | "GDALCopyWholeRasterGetSwathSize(): adjusting to %d line swath " |
4549 | 0 | "since requirement (" CPL_FRMT_GIB " bytes) exceed target swath " |
4550 | 0 | "size (%d bytes) (GDAL_SWATH_SIZE config. option)", |
4551 | 0 | nSwathLines, nBlockYSize * nMemoryPerCol, nTargetSwathSize); |
4552 | 0 | } |
4553 | | // If we are processing single scans, try to handle several at once. |
4554 | | // If we are handling swaths already, only grow the swath if a row |
4555 | | // of blocks is substantially less than our target buffer size. |
4556 | 0 | else if (nSwathLines == 1 || |
4557 | 0 | nMemoryPerCol * nSwathLines < |
4558 | 0 | static_cast<GIntBig>(nTargetSwathSize) / 10) |
4559 | 0 | { |
4560 | 0 | nSwathLines = std::min( |
4561 | 0 | nYSize, |
4562 | 0 | std::max(1, static_cast<int>(nTargetSwathSize / nMemoryPerCol))); |
4563 | | |
4564 | | /* If possible try to align to source and target block height */ |
4565 | 0 | if ((nSwathLines % nMaxBlockYSize) != 0 && |
4566 | 0 | nSwathLines > nMaxBlockYSize && |
4567 | 0 | IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) && |
4568 | 0 | IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize)) |
4569 | 0 | nSwathLines = ROUND_TO(nSwathLines, nMaxBlockYSize); |
4570 | 0 | } |
4571 | |
|
4572 | 0 | if (pszSrcCompression != nullptr && EQUAL(pszSrcCompression, "JPEG2000") && |
4573 | 0 | (!bDstIsCompressed || (IS_DIVIDER_OF(nBlockXSize, nSrcBlockXSize) && |
4574 | 0 | IS_DIVIDER_OF(nBlockYSize, nSrcBlockYSize)))) |
4575 | 0 | { |
4576 | | // Typical use case: converting from Pleaiades that is 2048x2048 tiled. |
4577 | 0 | if (nSwathLines < nSrcBlockYSize) |
4578 | 0 | { |
4579 | 0 | nSwathLines = nSrcBlockYSize; |
4580 | | |
4581 | | // Number of pixels that can be read/write simultaneously. |
4582 | 0 | nSwathCols = nTargetSwathSize / (nSrcBlockXSize * nPixelSize); |
4583 | 0 | nSwathCols = ROUND_TO(nSwathCols, nSrcBlockXSize); |
4584 | 0 | if (nSwathCols == 0) |
4585 | 0 | nSwathCols = nSrcBlockXSize; |
4586 | 0 | if (nSwathCols > nXSize) |
4587 | 0 | nSwathCols = nXSize; |
4588 | |
|
4589 | 0 | CPLDebug( |
4590 | 0 | "GDAL", |
4591 | 0 | "GDALCopyWholeRasterGetSwathSize(): because of compression and " |
4592 | 0 | "too high block, " |
4593 | 0 | "use partial width at one time"); |
4594 | 0 | } |
4595 | 0 | else if ((nSwathLines % nSrcBlockYSize) != 0) |
4596 | 0 | { |
4597 | | /* Round on a multiple of nSrcBlockYSize */ |
4598 | 0 | nSwathLines = ROUND_TO(nSwathLines, nSrcBlockYSize); |
4599 | 0 | CPLDebug( |
4600 | 0 | "GDAL", |
4601 | 0 | "GDALCopyWholeRasterGetSwathSize(): because of compression, " |
4602 | 0 | "round nSwathLines to block height : %d", |
4603 | 0 | nSwathLines); |
4604 | 0 | } |
4605 | 0 | } |
4606 | 0 | else if (bDstIsCompressed) |
4607 | 0 | { |
4608 | 0 | if (nSwathLines < nBlockYSize) |
4609 | 0 | { |
4610 | 0 | nSwathLines = nBlockYSize; |
4611 | | |
4612 | | // Number of pixels that can be read/write simultaneously. |
4613 | 0 | nSwathCols = nTargetSwathSize / (nSwathLines * nPixelSize); |
4614 | 0 | nSwathCols = ROUND_TO(nSwathCols, nBlockXSize); |
4615 | 0 | if (nSwathCols == 0) |
4616 | 0 | nSwathCols = nBlockXSize; |
4617 | 0 | if (nSwathCols > nXSize) |
4618 | 0 | nSwathCols = nXSize; |
4619 | |
|
4620 | 0 | CPLDebug( |
4621 | 0 | "GDAL", |
4622 | 0 | "GDALCopyWholeRasterGetSwathSize(): because of compression and " |
4623 | 0 | "too high block, " |
4624 | 0 | "use partial width at one time"); |
4625 | 0 | } |
4626 | 0 | else if ((nSwathLines % nBlockYSize) != 0) |
4627 | 0 | { |
4628 | | // Round on a multiple of nBlockYSize. |
4629 | 0 | nSwathLines = ROUND_TO(nSwathLines, nBlockYSize); |
4630 | 0 | CPLDebug( |
4631 | 0 | "GDAL", |
4632 | 0 | "GDALCopyWholeRasterGetSwathSize(): because of compression, " |
4633 | 0 | "round nSwathLines to block height : %d", |
4634 | 0 | nSwathLines); |
4635 | 0 | } |
4636 | 0 | } |
4637 | |
|
4638 | 0 | *pnSwathCols = nSwathCols; |
4639 | 0 | *pnSwathLines = nSwathLines; |
4640 | 0 | } |
4641 | | |
4642 | | /************************************************************************/ |
4643 | | /* GDALDatasetCopyWholeRaster() */ |
4644 | | /************************************************************************/ |
4645 | | |
4646 | | /** |
4647 | | * \brief Copy all dataset raster data. |
4648 | | * |
4649 | | * This function copies the complete raster contents of one dataset to |
4650 | | * another similarly configured dataset. The source and destination |
4651 | | * dataset must have the same number of bands, and the same width |
4652 | | * and height. The bands do not have to have the same data type. |
4653 | | * |
4654 | | * This function is primarily intended to support implementation of |
4655 | | * driver specific CreateCopy() functions. It implements efficient copying, |
4656 | | * in particular "chunking" the copy in substantial blocks and, if appropriate, |
4657 | | * performing the transfer in a pixel interleaved fashion. |
4658 | | * |
4659 | | * Currently the only papszOptions value supported are : |
4660 | | * <ul> |
4661 | | * <li>"INTERLEAVE=PIXEL/BAND" to force pixel (resp. band) interleaved read and |
4662 | | * write access pattern (this does not modify the layout of the destination |
4663 | | * data)</li> <li>"COMPRESSED=YES" to force alignment on target dataset block |
4664 | | * sizes to achieve best compression.</li> <li>"SKIP_HOLES=YES" to skip chunks |
4665 | | * for which GDALGetDataCoverageStatus() returns GDAL_DATA_COVERAGE_STATUS_EMPTY |
4666 | | * (GDAL >= 2.2)</li> |
4667 | | * </ul> |
4668 | | * More options may be supported in the future. |
4669 | | * |
4670 | | * @param hSrcDS the source dataset |
4671 | | * @param hDstDS the destination dataset |
4672 | | * @param papszOptions transfer hints in "StringList" Name=Value format. |
4673 | | * @param pfnProgress progress reporting function. |
4674 | | * @param pProgressData callback data for progress function. |
4675 | | * |
4676 | | * @return CE_None on success, or CE_Failure on failure. |
4677 | | */ |
4678 | | |
4679 | | CPLErr CPL_STDCALL GDALDatasetCopyWholeRaster(GDALDatasetH hSrcDS, |
4680 | | GDALDatasetH hDstDS, |
4681 | | CSLConstList papszOptions, |
4682 | | GDALProgressFunc pfnProgress, |
4683 | | void *pProgressData) |
4684 | | |
4685 | 0 | { |
4686 | 0 | VALIDATE_POINTER1(hSrcDS, "GDALDatasetCopyWholeRaster", CE_Failure); |
4687 | 0 | VALIDATE_POINTER1(hDstDS, "GDALDatasetCopyWholeRaster", CE_Failure); |
4688 | | |
4689 | 0 | GDALDataset *poSrcDS = GDALDataset::FromHandle(hSrcDS); |
4690 | 0 | GDALDataset *poDstDS = GDALDataset::FromHandle(hDstDS); |
4691 | |
|
4692 | 0 | if (pfnProgress == nullptr) |
4693 | 0 | pfnProgress = GDALDummyProgress; |
4694 | | |
4695 | | /* -------------------------------------------------------------------- */ |
4696 | | /* Confirm the datasets match in size and band counts. */ |
4697 | | /* -------------------------------------------------------------------- */ |
4698 | 0 | const int nXSize = poDstDS->GetRasterXSize(); |
4699 | 0 | const int nYSize = poDstDS->GetRasterYSize(); |
4700 | 0 | const int nBandCount = poDstDS->GetRasterCount(); |
4701 | |
|
4702 | 0 | if (poSrcDS->GetRasterXSize() != nXSize || |
4703 | 0 | poSrcDS->GetRasterYSize() != nYSize || |
4704 | 0 | poSrcDS->GetRasterCount() != nBandCount) |
4705 | 0 | { |
4706 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
4707 | 0 | "Input and output dataset sizes or band counts do not\n" |
4708 | 0 | "match in GDALDatasetCopyWholeRaster()"); |
4709 | 0 | return CE_Failure; |
4710 | 0 | } |
4711 | | |
4712 | | /* -------------------------------------------------------------------- */ |
4713 | | /* Report preliminary (0) progress. */ |
4714 | | /* -------------------------------------------------------------------- */ |
4715 | 0 | if (!pfnProgress(0.0, nullptr, pProgressData)) |
4716 | 0 | { |
4717 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, |
4718 | 0 | "User terminated CreateCopy()"); |
4719 | 0 | return CE_Failure; |
4720 | 0 | } |
4721 | | |
4722 | | /* -------------------------------------------------------------------- */ |
4723 | | /* Get our prototype band, and assume the others are similarly */ |
4724 | | /* configured. */ |
4725 | | /* -------------------------------------------------------------------- */ |
4726 | 0 | if (nBandCount == 0) |
4727 | 0 | return CE_None; |
4728 | | |
4729 | 0 | GDALRasterBand *poSrcPrototypeBand = poSrcDS->GetRasterBand(1); |
4730 | 0 | GDALRasterBand *poDstPrototypeBand = poDstDS->GetRasterBand(1); |
4731 | 0 | GDALDataType eDT = poDstPrototypeBand->GetRasterDataType(); |
4732 | | |
4733 | | /* -------------------------------------------------------------------- */ |
4734 | | /* Do we want to try and do the operation in a pixel */ |
4735 | | /* interleaved fashion? */ |
4736 | | /* -------------------------------------------------------------------- */ |
4737 | 0 | bool bInterleave = false; |
4738 | 0 | const char *pszInterleave = |
4739 | 0 | poSrcDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE"); |
4740 | 0 | if (pszInterleave != nullptr && |
4741 | 0 | (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE"))) |
4742 | 0 | bInterleave = true; |
4743 | |
|
4744 | 0 | pszInterleave = poDstDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE"); |
4745 | 0 | if (pszInterleave != nullptr && |
4746 | 0 | (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE"))) |
4747 | 0 | bInterleave = true; |
4748 | |
|
4749 | 0 | pszInterleave = CSLFetchNameValue(papszOptions, "INTERLEAVE"); |
4750 | 0 | if (pszInterleave != nullptr && EQUAL(pszInterleave, "PIXEL")) |
4751 | 0 | bInterleave = true; |
4752 | 0 | else if (pszInterleave != nullptr && EQUAL(pszInterleave, "BAND")) |
4753 | 0 | bInterleave = false; |
4754 | | // attributes is specific to the TileDB driver |
4755 | 0 | else if (pszInterleave != nullptr && EQUAL(pszInterleave, "ATTRIBUTES")) |
4756 | 0 | bInterleave = true; |
4757 | 0 | else if (pszInterleave != nullptr) |
4758 | 0 | { |
4759 | 0 | CPLError(CE_Warning, CPLE_NotSupported, |
4760 | 0 | "Unsupported value for option INTERLEAVE"); |
4761 | 0 | } |
4762 | | |
4763 | | // If the destination is compressed, we must try to write blocks just once, |
4764 | | // to save disk space (GTiff case for example), and to avoid data loss |
4765 | | // (JPEG compression for example). |
4766 | 0 | bool bDstIsCompressed = false; |
4767 | 0 | const char *pszDstCompressed = |
4768 | 0 | CSLFetchNameValue(papszOptions, "COMPRESSED"); |
4769 | 0 | if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed)) |
4770 | 0 | bDstIsCompressed = true; |
4771 | | |
4772 | | /* -------------------------------------------------------------------- */ |
4773 | | /* What will our swath size be? */ |
4774 | | /* -------------------------------------------------------------------- */ |
4775 | |
|
4776 | 0 | int nSwathCols = 0; |
4777 | 0 | int nSwathLines = 0; |
4778 | 0 | GDALCopyWholeRasterGetSwathSize(poSrcPrototypeBand, poDstPrototypeBand, |
4779 | 0 | nBandCount, bDstIsCompressed, bInterleave, |
4780 | 0 | &nSwathCols, &nSwathLines); |
4781 | |
|
4782 | 0 | int nPixelSize = GDALGetDataTypeSizeBytes(eDT); |
4783 | 0 | if (bInterleave) |
4784 | 0 | nPixelSize *= nBandCount; |
4785 | |
|
4786 | 0 | void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize); |
4787 | 0 | if (pSwathBuf == nullptr) |
4788 | 0 | { |
4789 | 0 | return CE_Failure; |
4790 | 0 | } |
4791 | | |
4792 | 0 | CPLDebug("GDAL", |
4793 | 0 | "GDALDatasetCopyWholeRaster(): %d*%d swaths, bInterleave=%d", |
4794 | 0 | nSwathCols, nSwathLines, static_cast<int>(bInterleave)); |
4795 | | |
4796 | | // Advise the source raster that we are going to read it completely |
4797 | | // Note: this might already have been done by GDALCreateCopy() in the |
4798 | | // likely case this function is indirectly called by it |
4799 | 0 | poSrcDS->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nBandCount, |
4800 | 0 | nullptr, nullptr); |
4801 | | |
4802 | | /* ==================================================================== */ |
4803 | | /* Band oriented (uninterleaved) case. */ |
4804 | | /* ==================================================================== */ |
4805 | 0 | CPLErr eErr = CE_None; |
4806 | 0 | const bool bCheckHoles = |
4807 | 0 | CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO")); |
4808 | |
|
4809 | 0 | if (!bInterleave) |
4810 | 0 | { |
4811 | 0 | GDALRasterIOExtraArg sExtraArg; |
4812 | 0 | INIT_RASTERIO_EXTRA_ARG(sExtraArg); |
4813 | 0 | CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress); // to make cppcheck happy |
4814 | |
|
4815 | 0 | const GIntBig nTotalBlocks = static_cast<GIntBig>(nBandCount) * |
4816 | 0 | DIV_ROUND_UP(nYSize, nSwathLines) * |
4817 | 0 | DIV_ROUND_UP(nXSize, nSwathCols); |
4818 | 0 | GIntBig nBlocksDone = 0; |
4819 | |
|
4820 | 0 | for (int iBand = 0; iBand < nBandCount && eErr == CE_None; iBand++) |
4821 | 0 | { |
4822 | 0 | int nBand = iBand + 1; |
4823 | |
|
4824 | 0 | for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines) |
4825 | 0 | { |
4826 | 0 | int nThisLines = nSwathLines; |
4827 | |
|
4828 | 0 | if (iY + nThisLines > nYSize) |
4829 | 0 | nThisLines = nYSize - iY; |
4830 | |
|
4831 | 0 | for (int iX = 0; iX < nXSize && eErr == CE_None; |
4832 | 0 | iX += nSwathCols) |
4833 | 0 | { |
4834 | 0 | int nThisCols = nSwathCols; |
4835 | |
|
4836 | 0 | if (iX + nThisCols > nXSize) |
4837 | 0 | nThisCols = nXSize - iX; |
4838 | |
|
4839 | 0 | int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA; |
4840 | 0 | if (bCheckHoles) |
4841 | 0 | { |
4842 | 0 | nStatus = poSrcDS->GetRasterBand(nBand) |
4843 | 0 | ->GetDataCoverageStatus( |
4844 | 0 | iX, iY, nThisCols, nThisLines, |
4845 | 0 | GDAL_DATA_COVERAGE_STATUS_DATA); |
4846 | 0 | } |
4847 | 0 | if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA) |
4848 | 0 | { |
4849 | 0 | sExtraArg.pfnProgress = GDALScaledProgress; |
4850 | 0 | sExtraArg.pProgressData = GDALCreateScaledProgress( |
4851 | 0 | nBlocksDone / static_cast<double>(nTotalBlocks), |
4852 | 0 | (nBlocksDone + 0.5) / |
4853 | 0 | static_cast<double>(nTotalBlocks), |
4854 | 0 | pfnProgress, pProgressData); |
4855 | 0 | if (sExtraArg.pProgressData == nullptr) |
4856 | 0 | sExtraArg.pfnProgress = nullptr; |
4857 | |
|
4858 | 0 | eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols, |
4859 | 0 | nThisLines, pSwathBuf, |
4860 | 0 | nThisCols, nThisLines, eDT, 1, |
4861 | 0 | &nBand, 0, 0, 0, &sExtraArg); |
4862 | |
|
4863 | 0 | GDALDestroyScaledProgress(sExtraArg.pProgressData); |
4864 | |
|
4865 | 0 | if (eErr == CE_None) |
4866 | 0 | eErr = poDstDS->RasterIO( |
4867 | 0 | GF_Write, iX, iY, nThisCols, nThisLines, |
4868 | 0 | pSwathBuf, nThisCols, nThisLines, eDT, 1, |
4869 | 0 | &nBand, 0, 0, 0, nullptr); |
4870 | 0 | } |
4871 | |
|
4872 | 0 | nBlocksDone++; |
4873 | 0 | if (eErr == CE_None && |
4874 | 0 | !pfnProgress(nBlocksDone / |
4875 | 0 | static_cast<double>(nTotalBlocks), |
4876 | 0 | nullptr, pProgressData)) |
4877 | 0 | { |
4878 | 0 | eErr = CE_Failure; |
4879 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, |
4880 | 0 | "User terminated CreateCopy()"); |
4881 | 0 | } |
4882 | 0 | } |
4883 | 0 | } |
4884 | 0 | } |
4885 | 0 | } |
4886 | | |
4887 | | /* ==================================================================== */ |
4888 | | /* Pixel interleaved case. */ |
4889 | | /* ==================================================================== */ |
4890 | 0 | else /* if( bInterleave ) */ |
4891 | 0 | { |
4892 | 0 | GDALRasterIOExtraArg sExtraArg; |
4893 | 0 | INIT_RASTERIO_EXTRA_ARG(sExtraArg); |
4894 | 0 | CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress); // to make cppcheck happy |
4895 | |
|
4896 | 0 | const GIntBig nTotalBlocks = |
4897 | 0 | static_cast<GIntBig>(DIV_ROUND_UP(nYSize, nSwathLines)) * |
4898 | 0 | DIV_ROUND_UP(nXSize, nSwathCols); |
4899 | 0 | GIntBig nBlocksDone = 0; |
4900 | |
|
4901 | 0 | for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines) |
4902 | 0 | { |
4903 | 0 | int nThisLines = nSwathLines; |
4904 | |
|
4905 | 0 | if (iY + nThisLines > nYSize) |
4906 | 0 | nThisLines = nYSize - iY; |
4907 | |
|
4908 | 0 | for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols) |
4909 | 0 | { |
4910 | 0 | int nThisCols = nSwathCols; |
4911 | |
|
4912 | 0 | if (iX + nThisCols > nXSize) |
4913 | 0 | nThisCols = nXSize - iX; |
4914 | |
|
4915 | 0 | int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA; |
4916 | 0 | if (bCheckHoles) |
4917 | 0 | { |
4918 | 0 | nStatus = 0; |
4919 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4920 | 0 | { |
4921 | 0 | nStatus |= poSrcDS->GetRasterBand(iBand + 1) |
4922 | 0 | ->GetDataCoverageStatus( |
4923 | 0 | iX, iY, nThisCols, nThisLines, |
4924 | 0 | GDAL_DATA_COVERAGE_STATUS_DATA); |
4925 | 0 | if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA) |
4926 | 0 | break; |
4927 | 0 | } |
4928 | 0 | } |
4929 | 0 | if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA) |
4930 | 0 | { |
4931 | 0 | sExtraArg.pfnProgress = GDALScaledProgress; |
4932 | 0 | sExtraArg.pProgressData = GDALCreateScaledProgress( |
4933 | 0 | nBlocksDone / static_cast<double>(nTotalBlocks), |
4934 | 0 | (nBlocksDone + 0.5) / static_cast<double>(nTotalBlocks), |
4935 | 0 | pfnProgress, pProgressData); |
4936 | 0 | if (sExtraArg.pProgressData == nullptr) |
4937 | 0 | sExtraArg.pfnProgress = nullptr; |
4938 | |
|
4939 | 0 | eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols, |
4940 | 0 | nThisLines, pSwathBuf, nThisCols, |
4941 | 0 | nThisLines, eDT, nBandCount, |
4942 | 0 | nullptr, 0, 0, 0, &sExtraArg); |
4943 | |
|
4944 | 0 | GDALDestroyScaledProgress(sExtraArg.pProgressData); |
4945 | |
|
4946 | 0 | if (eErr == CE_None) |
4947 | 0 | eErr = poDstDS->RasterIO( |
4948 | 0 | GF_Write, iX, iY, nThisCols, nThisLines, pSwathBuf, |
4949 | 0 | nThisCols, nThisLines, eDT, nBandCount, nullptr, 0, |
4950 | 0 | 0, 0, nullptr); |
4951 | 0 | } |
4952 | |
|
4953 | 0 | nBlocksDone++; |
4954 | 0 | if (eErr == CE_None && |
4955 | 0 | !pfnProgress(nBlocksDone / |
4956 | 0 | static_cast<double>(nTotalBlocks), |
4957 | 0 | nullptr, pProgressData)) |
4958 | 0 | { |
4959 | 0 | eErr = CE_Failure; |
4960 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, |
4961 | 0 | "User terminated CreateCopy()"); |
4962 | 0 | } |
4963 | 0 | } |
4964 | 0 | } |
4965 | 0 | } |
4966 | | |
4967 | | /* -------------------------------------------------------------------- */ |
4968 | | /* Cleanup */ |
4969 | | /* -------------------------------------------------------------------- */ |
4970 | 0 | CPLFree(pSwathBuf); |
4971 | |
|
4972 | 0 | return eErr; |
4973 | 0 | } |
4974 | | |
4975 | | /************************************************************************/ |
4976 | | /* GDALRasterBandCopyWholeRaster() */ |
4977 | | /************************************************************************/ |
4978 | | |
4979 | | /** |
4980 | | * \brief Copy a whole raster band |
4981 | | * |
4982 | | * This function copies the complete raster contents of one band to |
4983 | | * another similarly configured band. The source and destination |
4984 | | * bands must have the same width and height. The bands do not have |
4985 | | * to have the same data type. |
4986 | | * |
4987 | | * It implements efficient copying, in particular "chunking" the copy in |
4988 | | * substantial blocks. |
4989 | | * |
4990 | | * Currently the only papszOptions value supported are : |
4991 | | * <ul> |
4992 | | * <li>"COMPRESSED=YES" to force alignment on target dataset block sizes to |
4993 | | * achieve best compression.</li> |
4994 | | * <li>"SKIP_HOLES=YES" to skip chunks for which GDALGetDataCoverageStatus() |
4995 | | * returns GDAL_DATA_COVERAGE_STATUS_EMPTY (GDAL >= 2.2)</li> |
4996 | | * </ul> |
4997 | | * |
4998 | | * @param hSrcBand the source band |
4999 | | * @param hDstBand the destination band |
5000 | | * @param papszOptions transfer hints in "StringList" Name=Value format. |
5001 | | * @param pfnProgress progress reporting function. |
5002 | | * @param pProgressData callback data for progress function. |
5003 | | * |
5004 | | * @return CE_None on success, or CE_Failure on failure. |
5005 | | */ |
5006 | | |
5007 | | CPLErr CPL_STDCALL GDALRasterBandCopyWholeRaster( |
5008 | | GDALRasterBandH hSrcBand, GDALRasterBandH hDstBand, |
5009 | | const char *const *const papszOptions, GDALProgressFunc pfnProgress, |
5010 | | void *pProgressData) |
5011 | | |
5012 | 0 | { |
5013 | 0 | VALIDATE_POINTER1(hSrcBand, "GDALRasterBandCopyWholeRaster", CE_Failure); |
5014 | 0 | VALIDATE_POINTER1(hDstBand, "GDALRasterBandCopyWholeRaster", CE_Failure); |
5015 | | |
5016 | 0 | GDALRasterBand *poSrcBand = GDALRasterBand::FromHandle(hSrcBand); |
5017 | 0 | GDALRasterBand *poDstBand = GDALRasterBand::FromHandle(hDstBand); |
5018 | 0 | CPLErr eErr = CE_None; |
5019 | |
|
5020 | 0 | if (pfnProgress == nullptr) |
5021 | 0 | pfnProgress = GDALDummyProgress; |
5022 | | |
5023 | | /* -------------------------------------------------------------------- */ |
5024 | | /* Confirm the datasets match in size and band counts. */ |
5025 | | /* -------------------------------------------------------------------- */ |
5026 | 0 | int nXSize = poSrcBand->GetXSize(); |
5027 | 0 | int nYSize = poSrcBand->GetYSize(); |
5028 | |
|
5029 | 0 | if (poDstBand->GetXSize() != nXSize || poDstBand->GetYSize() != nYSize) |
5030 | 0 | { |
5031 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
5032 | 0 | "Input and output band sizes do not\n" |
5033 | 0 | "match in GDALRasterBandCopyWholeRaster()"); |
5034 | 0 | return CE_Failure; |
5035 | 0 | } |
5036 | | |
5037 | | /* -------------------------------------------------------------------- */ |
5038 | | /* Report preliminary (0) progress. */ |
5039 | | /* -------------------------------------------------------------------- */ |
5040 | 0 | if (!pfnProgress(0.0, nullptr, pProgressData)) |
5041 | 0 | { |
5042 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, |
5043 | 0 | "User terminated CreateCopy()"); |
5044 | 0 | return CE_Failure; |
5045 | 0 | } |
5046 | | |
5047 | 0 | GDALDataType eDT = poDstBand->GetRasterDataType(); |
5048 | | |
5049 | | // If the destination is compressed, we must try to write blocks just once, |
5050 | | // to save disk space (GTiff case for example), and to avoid data loss |
5051 | | // (JPEG compression for example). |
5052 | 0 | bool bDstIsCompressed = false; |
5053 | 0 | const char *pszDstCompressed = |
5054 | 0 | CSLFetchNameValue(const_cast<char **>(papszOptions), "COMPRESSED"); |
5055 | 0 | if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed)) |
5056 | 0 | bDstIsCompressed = true; |
5057 | | |
5058 | | /* -------------------------------------------------------------------- */ |
5059 | | /* What will our swath size be? */ |
5060 | | /* -------------------------------------------------------------------- */ |
5061 | |
|
5062 | 0 | int nSwathCols = 0; |
5063 | 0 | int nSwathLines = 0; |
5064 | 0 | GDALCopyWholeRasterGetSwathSize(poSrcBand, poDstBand, 1, bDstIsCompressed, |
5065 | 0 | FALSE, &nSwathCols, &nSwathLines); |
5066 | |
|
5067 | 0 | const int nPixelSize = GDALGetDataTypeSizeBytes(eDT); |
5068 | |
|
5069 | 0 | void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize); |
5070 | 0 | if (pSwathBuf == nullptr) |
5071 | 0 | { |
5072 | 0 | return CE_Failure; |
5073 | 0 | } |
5074 | | |
5075 | 0 | CPLDebug("GDAL", "GDALRasterBandCopyWholeRaster(): %d*%d swaths", |
5076 | 0 | nSwathCols, nSwathLines); |
5077 | |
|
5078 | 0 | const bool bCheckHoles = |
5079 | 0 | CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO")); |
5080 | | |
5081 | | // Advise the source raster that we are going to read it completely |
5082 | 0 | poSrcBand->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nullptr); |
5083 | | |
5084 | | /* ==================================================================== */ |
5085 | | /* Band oriented (uninterleaved) case. */ |
5086 | | /* ==================================================================== */ |
5087 | |
|
5088 | 0 | for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines) |
5089 | 0 | { |
5090 | 0 | int nThisLines = nSwathLines; |
5091 | |
|
5092 | 0 | if (iY + nThisLines > nYSize) |
5093 | 0 | nThisLines = nYSize - iY; |
5094 | |
|
5095 | 0 | for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols) |
5096 | 0 | { |
5097 | 0 | int nThisCols = nSwathCols; |
5098 | |
|
5099 | 0 | if (iX + nThisCols > nXSize) |
5100 | 0 | nThisCols = nXSize - iX; |
5101 | |
|
5102 | 0 | int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA; |
5103 | 0 | if (bCheckHoles) |
5104 | 0 | { |
5105 | 0 | nStatus = poSrcBand->GetDataCoverageStatus( |
5106 | 0 | iX, iY, nThisCols, nThisLines, |
5107 | 0 | GDAL_DATA_COVERAGE_STATUS_DATA); |
5108 | 0 | } |
5109 | 0 | if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA) |
5110 | 0 | { |
5111 | 0 | eErr = poSrcBand->RasterIO(GF_Read, iX, iY, nThisCols, |
5112 | 0 | nThisLines, pSwathBuf, nThisCols, |
5113 | 0 | nThisLines, eDT, 0, 0, nullptr); |
5114 | |
|
5115 | 0 | if (eErr == CE_None) |
5116 | 0 | eErr = poDstBand->RasterIO(GF_Write, iX, iY, nThisCols, |
5117 | 0 | nThisLines, pSwathBuf, nThisCols, |
5118 | 0 | nThisLines, eDT, 0, 0, nullptr); |
5119 | 0 | } |
5120 | |
|
5121 | 0 | if (eErr == CE_None && |
5122 | 0 | !pfnProgress((iY + nThisLines) / static_cast<float>(nYSize), |
5123 | 0 | nullptr, pProgressData)) |
5124 | 0 | { |
5125 | 0 | eErr = CE_Failure; |
5126 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, |
5127 | 0 | "User terminated CreateCopy()"); |
5128 | 0 | } |
5129 | 0 | } |
5130 | 0 | } |
5131 | | |
5132 | | /* -------------------------------------------------------------------- */ |
5133 | | /* Cleanup */ |
5134 | | /* -------------------------------------------------------------------- */ |
5135 | 0 | CPLFree(pSwathBuf); |
5136 | |
|
5137 | 0 | return eErr; |
5138 | 0 | } |
5139 | | |
5140 | | /************************************************************************/ |
5141 | | /* GDALCopyRasterIOExtraArg () */ |
5142 | | /************************************************************************/ |
5143 | | |
5144 | | void GDALCopyRasterIOExtraArg(GDALRasterIOExtraArg *psDestArg, |
5145 | | GDALRasterIOExtraArg *psSrcArg) |
5146 | 0 | { |
5147 | 0 | INIT_RASTERIO_EXTRA_ARG(*psDestArg); |
5148 | 0 | if (psSrcArg) |
5149 | 0 | { |
5150 | 0 | psDestArg->eResampleAlg = psSrcArg->eResampleAlg; |
5151 | 0 | psDestArg->pfnProgress = psSrcArg->pfnProgress; |
5152 | 0 | psDestArg->pProgressData = psSrcArg->pProgressData; |
5153 | 0 | psDestArg->bFloatingPointWindowValidity = |
5154 | 0 | psSrcArg->bFloatingPointWindowValidity; |
5155 | 0 | if (psSrcArg->bFloatingPointWindowValidity) |
5156 | 0 | { |
5157 | 0 | psDestArg->dfXOff = psSrcArg->dfXOff; |
5158 | 0 | psDestArg->dfYOff = psSrcArg->dfYOff; |
5159 | 0 | psDestArg->dfXSize = psSrcArg->dfXSize; |
5160 | 0 | psDestArg->dfYSize = psSrcArg->dfYSize; |
5161 | 0 | } |
5162 | 0 | if (psSrcArg->nVersion >= 2) |
5163 | 0 | { |
5164 | 0 | psDestArg->bUseOnlyThisScale = psSrcArg->bUseOnlyThisScale; |
5165 | 0 | } |
5166 | 0 | } |
5167 | 0 | } |
5168 | | |
5169 | | /************************************************************************/ |
5170 | | /* HasOnlyNoData() */ |
5171 | | /************************************************************************/ |
5172 | | |
5173 | | template <class T> static inline bool IsEqualToNoData(T value, T noDataValue) |
5174 | 0 | { |
5175 | 0 | return value == noDataValue; |
5176 | 0 | } Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned char>(unsigned char, unsigned char) Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned short>(unsigned short, unsigned short) Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned int>(unsigned int, unsigned int) Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned long>(unsigned long, unsigned long) |
5177 | | |
5178 | | template <> bool IsEqualToNoData<GFloat16>(GFloat16 value, GFloat16 noDataValue) |
5179 | 0 | { |
5180 | 0 | using std::isnan; |
5181 | 0 | return isnan(noDataValue) ? isnan(value) : value == noDataValue; |
5182 | 0 | } |
5183 | | |
5184 | | template <> bool IsEqualToNoData<float>(float value, float noDataValue) |
5185 | 0 | { |
5186 | 0 | return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue; |
5187 | 0 | } |
5188 | | |
5189 | | template <> bool IsEqualToNoData<double>(double value, double noDataValue) |
5190 | 0 | { |
5191 | 0 | return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue; |
5192 | 0 | } |
5193 | | |
5194 | | template <class T> |
5195 | | static bool HasOnlyNoDataT(const T *pBuffer, T noDataValue, size_t nWidth, |
5196 | | size_t nHeight, size_t nLineStride, |
5197 | | size_t nComponents) |
5198 | 0 | { |
5199 | | // Fast test: check the 4 corners and the middle pixel. |
5200 | 0 | for (size_t iBand = 0; iBand < nComponents; iBand++) |
5201 | 0 | { |
5202 | 0 | if (!(IsEqualToNoData(pBuffer[iBand], noDataValue) && |
5203 | 0 | IsEqualToNoData(pBuffer[(nWidth - 1) * nComponents + iBand], |
5204 | 0 | noDataValue) && |
5205 | 0 | IsEqualToNoData( |
5206 | 0 | pBuffer[((nHeight - 1) / 2 * nLineStride + (nWidth - 1) / 2) * |
5207 | 0 | nComponents + |
5208 | 0 | iBand], |
5209 | 0 | noDataValue) && |
5210 | 0 | IsEqualToNoData( |
5211 | 0 | pBuffer[(nHeight - 1) * nLineStride * nComponents + iBand], |
5212 | 0 | noDataValue) && |
5213 | 0 | IsEqualToNoData( |
5214 | 0 | pBuffer[((nHeight - 1) * nLineStride + nWidth - 1) * |
5215 | 0 | nComponents + |
5216 | 0 | iBand], |
5217 | 0 | noDataValue))) |
5218 | 0 | { |
5219 | 0 | return false; |
5220 | 0 | } |
5221 | 0 | } |
5222 | | |
5223 | | // Test all pixels. |
5224 | 0 | for (size_t iY = 0; iY < nHeight; iY++) |
5225 | 0 | { |
5226 | 0 | const T *pBufferLine = pBuffer + iY * nLineStride * nComponents; |
5227 | 0 | for (size_t iX = 0; iX < nWidth * nComponents; iX++) |
5228 | 0 | { |
5229 | 0 | if (!IsEqualToNoData(pBufferLine[iX], noDataValue)) |
5230 | 0 | { |
5231 | 0 | return false; |
5232 | 0 | } |
5233 | 0 | } |
5234 | 0 | } |
5235 | 0 | return true; |
5236 | 0 | } Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned char>(unsigned char const*, unsigned char, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned short>(unsigned short const*, unsigned short, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned int>(unsigned int const*, unsigned int, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned long>(unsigned long const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<cpl::Float16>(cpl::Float16 const*, cpl::Float16, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<float>(float const*, float, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<double>(double const*, double, unsigned long, unsigned long, unsigned long, unsigned long) |
5237 | | |
5238 | | /************************************************************************/ |
5239 | | /* GDALBufferHasOnlyNoData() */ |
5240 | | /************************************************************************/ |
5241 | | |
5242 | | bool GDALBufferHasOnlyNoData(const void *pBuffer, double dfNoDataValue, |
5243 | | size_t nWidth, size_t nHeight, size_t nLineStride, |
5244 | | size_t nComponents, int nBitsPerSample, |
5245 | | GDALBufferSampleFormat nSampleFormat) |
5246 | 0 | { |
5247 | | // In the case where the nodata is 0, we can compare several bytes at |
5248 | | // once. Select the largest natural integer type for the architecture. |
5249 | 0 | #if SIZEOF_VOIDP >= 8 || defined(__x86_64__) |
5250 | | // We test __x86_64__ for x32 arch where SIZEOF_VOIDP == 4 |
5251 | 0 | typedef std::uint64_t WordType; |
5252 | | #else |
5253 | | typedef std::uint32_t WordType; |
5254 | | #endif |
5255 | 0 | if (dfNoDataValue == 0.0 && nWidth == nLineStride && |
5256 | | // Do not use this optimized code path for floating point numbers, |
5257 | | // as it can't detect negative zero. |
5258 | 0 | nSampleFormat != GSF_FLOATING_POINT) |
5259 | 0 | { |
5260 | 0 | const GByte *pabyBuffer = static_cast<const GByte *>(pBuffer); |
5261 | 0 | const size_t nSize = |
5262 | 0 | (nWidth * nHeight * nComponents * nBitsPerSample + 7) / 8; |
5263 | 0 | size_t i = 0; |
5264 | 0 | const size_t nInitialIters = |
5265 | 0 | std::min(sizeof(WordType) - |
5266 | 0 | static_cast<size_t>( |
5267 | 0 | reinterpret_cast<std::uintptr_t>(pabyBuffer) % |
5268 | 0 | sizeof(WordType)), |
5269 | 0 | nSize); |
5270 | 0 | for (; i < nInitialIters; i++) |
5271 | 0 | { |
5272 | 0 | if (pabyBuffer[i]) |
5273 | 0 | return false; |
5274 | 0 | } |
5275 | 0 | for (; i + sizeof(WordType) - 1 < nSize; i += sizeof(WordType)) |
5276 | 0 | { |
5277 | 0 | if (*(reinterpret_cast<const WordType *>(pabyBuffer + i))) |
5278 | 0 | return false; |
5279 | 0 | } |
5280 | 0 | for (; i < nSize; i++) |
5281 | 0 | { |
5282 | 0 | if (pabyBuffer[i]) |
5283 | 0 | return false; |
5284 | 0 | } |
5285 | 0 | return true; |
5286 | 0 | } |
5287 | | |
5288 | 0 | if (nBitsPerSample == 8 && nSampleFormat == GSF_UNSIGNED_INT) |
5289 | 0 | { |
5290 | 0 | return GDALIsValueInRange<uint8_t>(dfNoDataValue) && |
5291 | 0 | HasOnlyNoDataT(static_cast<const uint8_t *>(pBuffer), |
5292 | 0 | static_cast<uint8_t>(dfNoDataValue), nWidth, |
5293 | 0 | nHeight, nLineStride, nComponents); |
5294 | 0 | } |
5295 | 0 | if (nBitsPerSample == 8 && nSampleFormat == GSF_SIGNED_INT) |
5296 | 0 | { |
5297 | | // Use unsigned implementation by converting the nodatavalue to |
5298 | | // unsigned |
5299 | 0 | return GDALIsValueInRange<int8_t>(dfNoDataValue) && |
5300 | 0 | HasOnlyNoDataT( |
5301 | 0 | static_cast<const uint8_t *>(pBuffer), |
5302 | 0 | static_cast<uint8_t>(static_cast<int8_t>(dfNoDataValue)), |
5303 | 0 | nWidth, nHeight, nLineStride, nComponents); |
5304 | 0 | } |
5305 | 0 | if (nBitsPerSample == 16 && nSampleFormat == GSF_UNSIGNED_INT) |
5306 | 0 | { |
5307 | 0 | return GDALIsValueInRange<uint16_t>(dfNoDataValue) && |
5308 | 0 | HasOnlyNoDataT(static_cast<const uint16_t *>(pBuffer), |
5309 | 0 | static_cast<uint16_t>(dfNoDataValue), nWidth, |
5310 | 0 | nHeight, nLineStride, nComponents); |
5311 | 0 | } |
5312 | 0 | if (nBitsPerSample == 16 && nSampleFormat == GSF_SIGNED_INT) |
5313 | 0 | { |
5314 | | // Use unsigned implementation by converting the nodatavalue to |
5315 | | // unsigned |
5316 | 0 | return GDALIsValueInRange<int16_t>(dfNoDataValue) && |
5317 | 0 | HasOnlyNoDataT( |
5318 | 0 | static_cast<const uint16_t *>(pBuffer), |
5319 | 0 | static_cast<uint16_t>(static_cast<int16_t>(dfNoDataValue)), |
5320 | 0 | nWidth, nHeight, nLineStride, nComponents); |
5321 | 0 | } |
5322 | 0 | if (nBitsPerSample == 32 && nSampleFormat == GSF_UNSIGNED_INT) |
5323 | 0 | { |
5324 | 0 | return GDALIsValueInRange<uint32_t>(dfNoDataValue) && |
5325 | 0 | HasOnlyNoDataT(static_cast<const uint32_t *>(pBuffer), |
5326 | 0 | static_cast<uint32_t>(dfNoDataValue), nWidth, |
5327 | 0 | nHeight, nLineStride, nComponents); |
5328 | 0 | } |
5329 | 0 | if (nBitsPerSample == 32 && nSampleFormat == GSF_SIGNED_INT) |
5330 | 0 | { |
5331 | | // Use unsigned implementation by converting the nodatavalue to |
5332 | | // unsigned |
5333 | 0 | return GDALIsValueInRange<int32_t>(dfNoDataValue) && |
5334 | 0 | HasOnlyNoDataT( |
5335 | 0 | static_cast<const uint32_t *>(pBuffer), |
5336 | 0 | static_cast<uint32_t>(static_cast<int32_t>(dfNoDataValue)), |
5337 | 0 | nWidth, nHeight, nLineStride, nComponents); |
5338 | 0 | } |
5339 | 0 | if (nBitsPerSample == 64 && nSampleFormat == GSF_UNSIGNED_INT) |
5340 | 0 | { |
5341 | 0 | return GDALIsValueInRange<uint64_t>(dfNoDataValue) && |
5342 | 0 | HasOnlyNoDataT(static_cast<const uint64_t *>(pBuffer), |
5343 | 0 | static_cast<uint64_t>(dfNoDataValue), nWidth, |
5344 | 0 | nHeight, nLineStride, nComponents); |
5345 | 0 | } |
5346 | 0 | if (nBitsPerSample == 64 && nSampleFormat == GSF_SIGNED_INT) |
5347 | 0 | { |
5348 | | // Use unsigned implementation by converting the nodatavalue to |
5349 | | // unsigned |
5350 | 0 | return GDALIsValueInRange<int64_t>(dfNoDataValue) && |
5351 | 0 | HasOnlyNoDataT( |
5352 | 0 | static_cast<const uint64_t *>(pBuffer), |
5353 | 0 | static_cast<uint64_t>(static_cast<int64_t>(dfNoDataValue)), |
5354 | 0 | nWidth, nHeight, nLineStride, nComponents); |
5355 | 0 | } |
5356 | 0 | if (nBitsPerSample == 16 && nSampleFormat == GSF_FLOATING_POINT) |
5357 | 0 | { |
5358 | 0 | return (std::isnan(dfNoDataValue) || |
5359 | 0 | GDALIsValueInRange<GFloat16>(dfNoDataValue)) && |
5360 | 0 | HasOnlyNoDataT(static_cast<const GFloat16 *>(pBuffer), |
5361 | 0 | static_cast<GFloat16>(dfNoDataValue), nWidth, |
5362 | 0 | nHeight, nLineStride, nComponents); |
5363 | 0 | } |
5364 | 0 | if (nBitsPerSample == 32 && nSampleFormat == GSF_FLOATING_POINT) |
5365 | 0 | { |
5366 | 0 | return (std::isnan(dfNoDataValue) || |
5367 | 0 | GDALIsValueInRange<float>(dfNoDataValue)) && |
5368 | 0 | HasOnlyNoDataT(static_cast<const float *>(pBuffer), |
5369 | 0 | static_cast<float>(dfNoDataValue), nWidth, |
5370 | 0 | nHeight, nLineStride, nComponents); |
5371 | 0 | } |
5372 | 0 | if (nBitsPerSample == 64 && nSampleFormat == GSF_FLOATING_POINT) |
5373 | 0 | { |
5374 | 0 | return HasOnlyNoDataT(static_cast<const double *>(pBuffer), |
5375 | 0 | dfNoDataValue, nWidth, nHeight, nLineStride, |
5376 | 0 | nComponents); |
5377 | 0 | } |
5378 | 0 | return false; |
5379 | 0 | } |
5380 | | |
5381 | | #ifdef HAVE_SSE2 |
5382 | | |
5383 | | /************************************************************************/ |
5384 | | /* GDALDeinterleave3Byte() */ |
5385 | | /************************************************************************/ |
5386 | | |
5387 | | #if defined(__GNUC__) && !defined(__clang__) |
5388 | | __attribute__((optimize("no-tree-vectorize"))) |
5389 | | #endif |
5390 | | static void |
5391 | | GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc, |
5392 | | GByte *CPL_RESTRICT pabyDest0, |
5393 | | GByte *CPL_RESTRICT pabyDest1, |
5394 | | GByte *CPL_RESTRICT pabyDest2, size_t nIters) |
5395 | | #ifdef USE_NEON_OPTIMIZATIONS |
5396 | | { |
5397 | | return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2, |
5398 | | nIters); |
5399 | | } |
5400 | | #else |
5401 | 0 | { |
5402 | 0 | #ifdef HAVE_SSSE3_AT_COMPILE_TIME |
5403 | 0 | if (CPLHaveRuntimeSSSE3()) |
5404 | 0 | { |
5405 | 0 | return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, |
5406 | 0 | pabyDest2, nIters); |
5407 | 0 | } |
5408 | 0 | #endif |
5409 | | |
5410 | 0 | size_t i = 0; |
5411 | 0 | if (((reinterpret_cast<uintptr_t>(pabySrc) | |
5412 | 0 | reinterpret_cast<uintptr_t>(pabyDest0) | |
5413 | 0 | reinterpret_cast<uintptr_t>(pabyDest1) | |
5414 | 0 | reinterpret_cast<uintptr_t>(pabyDest2)) % |
5415 | 0 | sizeof(unsigned int)) == 0) |
5416 | 0 | { |
5417 | | // Slightly better than GCC autovectorizer |
5418 | 0 | for (size_t j = 0; i + 3 < nIters; i += 4, ++j) |
5419 | 0 | { |
5420 | 0 | unsigned int word0 = |
5421 | 0 | *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i); |
5422 | 0 | unsigned int word1 = |
5423 | 0 | *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 4); |
5424 | 0 | unsigned int word2 = |
5425 | 0 | *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 8); |
5426 | 0 | reinterpret_cast<unsigned int *>(pabyDest0)[j] = |
5427 | 0 | (word0 & 0xff) | ((word0 >> 24) << 8) | (word1 & 0x00ff0000) | |
5428 | 0 | ((word2 >> 8) << 24); |
5429 | 0 | reinterpret_cast<unsigned int *>(pabyDest1)[j] = |
5430 | 0 | ((word0 >> 8) & 0xff) | ((word1 & 0xff) << 8) | |
5431 | 0 | (((word1 >> 24)) << 16) | ((word2 >> 16) << 24); |
5432 | 0 | pabyDest2[j * 4] = static_cast<GByte>(word0 >> 16); |
5433 | 0 | pabyDest2[j * 4 + 1] = static_cast<GByte>(word1 >> 8); |
5434 | 0 | pabyDest2[j * 4 + 2] = static_cast<GByte>(word2); |
5435 | 0 | pabyDest2[j * 4 + 3] = static_cast<GByte>(word2 >> 24); |
5436 | 0 | } |
5437 | 0 | } |
5438 | 0 | #if defined(__clang__) |
5439 | 0 | #pragma clang loop vectorize(disable) |
5440 | 0 | #endif |
5441 | 0 | for (; i < nIters; ++i) |
5442 | 0 | { |
5443 | 0 | pabyDest0[i] = pabySrc[3 * i + 0]; |
5444 | 0 | pabyDest1[i] = pabySrc[3 * i + 1]; |
5445 | 0 | pabyDest2[i] = pabySrc[3 * i + 2]; |
5446 | 0 | } |
5447 | 0 | } |
5448 | | #endif |
5449 | | |
5450 | | /************************************************************************/ |
5451 | | /* GDALDeinterleave4Byte() */ |
5452 | | /************************************************************************/ |
5453 | | |
5454 | | #if !defined(__GNUC__) || defined(__clang__) |
5455 | | |
5456 | | /************************************************************************/ |
5457 | | /* deinterleave() */ |
5458 | | /************************************************************************/ |
5459 | | |
5460 | | template <bool SHIFT, bool MASK> |
5461 | | inline __m128i deinterleave(__m128i &xmm0_ori, __m128i &xmm1_ori, |
5462 | | __m128i &xmm2_ori, __m128i &xmm3_ori) |
5463 | 0 | { |
5464 | | // Set higher 24bit of each int32 packed word to 0 |
5465 | 0 | if (SHIFT) |
5466 | 0 | { |
5467 | 0 | xmm0_ori = _mm_srli_epi32(xmm0_ori, 8); |
5468 | 0 | xmm1_ori = _mm_srli_epi32(xmm1_ori, 8); |
5469 | 0 | xmm2_ori = _mm_srli_epi32(xmm2_ori, 8); |
5470 | 0 | xmm3_ori = _mm_srli_epi32(xmm3_ori, 8); |
5471 | 0 | } |
5472 | 0 | __m128i xmm0; |
5473 | 0 | __m128i xmm1; |
5474 | 0 | __m128i xmm2; |
5475 | 0 | __m128i xmm3; |
5476 | 0 | if (MASK) |
5477 | 0 | { |
5478 | 0 | const __m128i xmm_mask = _mm_set1_epi32(0xff); |
5479 | 0 | xmm0 = _mm_and_si128(xmm0_ori, xmm_mask); |
5480 | 0 | xmm1 = _mm_and_si128(xmm1_ori, xmm_mask); |
5481 | 0 | xmm2 = _mm_and_si128(xmm2_ori, xmm_mask); |
5482 | 0 | xmm3 = _mm_and_si128(xmm3_ori, xmm_mask); |
5483 | 0 | } |
5484 | 0 | else |
5485 | 0 | { |
5486 | 0 | xmm0 = xmm0_ori; |
5487 | 0 | xmm1 = xmm1_ori; |
5488 | 0 | xmm2 = xmm2_ori; |
5489 | 0 | xmm3 = xmm3_ori; |
5490 | 0 | } |
5491 | | // Pack int32 to int16 |
5492 | 0 | xmm0 = _mm_packs_epi32(xmm0, xmm1); |
5493 | 0 | xmm2 = _mm_packs_epi32(xmm2, xmm3); |
5494 | | // Pack int16 to uint8 |
5495 | 0 | xmm0 = _mm_packus_epi16(xmm0, xmm2); |
5496 | 0 | return xmm0; |
5497 | 0 | } Unexecuted instantiation: long long __vector(2) deinterleave<false, true>(long long __vector(2)&, long long __vector(2)&, long long __vector(2)&, long long __vector(2)&) Unexecuted instantiation: long long __vector(2) deinterleave<true, true>(long long __vector(2)&, long long __vector(2)&, long long __vector(2)&, long long __vector(2)&) Unexecuted instantiation: long long __vector(2) deinterleave<true, false>(long long __vector(2)&, long long __vector(2)&, long long __vector(2)&, long long __vector(2)&) |
5498 | | |
5499 | | static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc, |
5500 | | GByte *CPL_RESTRICT pabyDest0, |
5501 | | GByte *CPL_RESTRICT pabyDest1, |
5502 | | GByte *CPL_RESTRICT pabyDest2, |
5503 | | GByte *CPL_RESTRICT pabyDest3, size_t nIters) |
5504 | | #ifdef USE_NEON_OPTIMIZATIONS |
5505 | | { |
5506 | | return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2, |
5507 | | pabyDest3, nIters); |
5508 | | } |
5509 | | #else |
5510 | 0 | { |
5511 | 0 | #ifdef HAVE_SSSE3_AT_COMPILE_TIME |
5512 | 0 | if (CPLHaveRuntimeSSSE3()) |
5513 | 0 | { |
5514 | 0 | return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, |
5515 | 0 | pabyDest2, pabyDest3, nIters); |
5516 | 0 | } |
5517 | 0 | #endif |
5518 | | |
5519 | | // Not the optimal SSE2-only code, as gcc auto-vectorizer manages to |
5520 | | // do something slightly better. |
5521 | 0 | size_t i = 0; |
5522 | 0 | for (; i + 15 < nIters; i += 16) |
5523 | 0 | { |
5524 | 0 | __m128i xmm0_ori = _mm_loadu_si128( |
5525 | 0 | reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 0)); |
5526 | 0 | __m128i xmm1_ori = _mm_loadu_si128( |
5527 | 0 | reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 16)); |
5528 | 0 | __m128i xmm2_ori = _mm_loadu_si128( |
5529 | 0 | reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 32)); |
5530 | 0 | __m128i xmm3_ori = _mm_loadu_si128( |
5531 | 0 | reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 48)); |
5532 | |
|
5533 | 0 | _mm_storeu_si128( |
5534 | 0 | reinterpret_cast<__m128i *>(pabyDest0 + i), |
5535 | 0 | deinterleave<false, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori)); |
5536 | 0 | _mm_storeu_si128( |
5537 | 0 | reinterpret_cast<__m128i *>(pabyDest1 + i), |
5538 | 0 | deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori)); |
5539 | 0 | _mm_storeu_si128( |
5540 | 0 | reinterpret_cast<__m128i *>(pabyDest2 + i), |
5541 | 0 | deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori)); |
5542 | 0 | _mm_storeu_si128( |
5543 | 0 | reinterpret_cast<__m128i *>(pabyDest3 + i), |
5544 | 0 | deinterleave<true, false>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori)); |
5545 | 0 | } |
5546 | |
|
5547 | 0 | #if defined(__clang__) |
5548 | 0 | #pragma clang loop vectorize(disable) |
5549 | 0 | #endif |
5550 | 0 | for (; i < nIters; ++i) |
5551 | 0 | { |
5552 | 0 | pabyDest0[i] = pabySrc[4 * i + 0]; |
5553 | 0 | pabyDest1[i] = pabySrc[4 * i + 1]; |
5554 | 0 | pabyDest2[i] = pabySrc[4 * i + 2]; |
5555 | 0 | pabyDest3[i] = pabySrc[4 * i + 3]; |
5556 | 0 | } |
5557 | 0 | } |
5558 | | #endif |
5559 | | #else |
5560 | | // GCC autovectorizer does an excellent job |
5561 | | __attribute__((optimize("tree-vectorize"))) static void GDALDeinterleave4Byte( |
5562 | | const GByte *CPL_RESTRICT pabySrc, GByte *CPL_RESTRICT pabyDest0, |
5563 | | GByte *CPL_RESTRICT pabyDest1, GByte *CPL_RESTRICT pabyDest2, |
5564 | | GByte *CPL_RESTRICT pabyDest3, size_t nIters) |
5565 | | { |
5566 | | for (size_t i = 0; i < nIters; ++i) |
5567 | | { |
5568 | | pabyDest0[i] = pabySrc[4 * i + 0]; |
5569 | | pabyDest1[i] = pabySrc[4 * i + 1]; |
5570 | | pabyDest2[i] = pabySrc[4 * i + 2]; |
5571 | | pabyDest3[i] = pabySrc[4 * i + 3]; |
5572 | | } |
5573 | | } |
5574 | | #endif |
5575 | | |
5576 | | #else |
5577 | | |
5578 | | /************************************************************************/ |
5579 | | /* GDALDeinterleave3Byte() */ |
5580 | | /************************************************************************/ |
5581 | | |
5582 | | // TODO: Enabling below could help on non-Intel architectures where GCC knows |
5583 | | // how to auto-vectorize |
5584 | | // #if defined(__GNUC__) |
5585 | | //__attribute__((optimize("tree-vectorize"))) |
5586 | | // #endif |
5587 | | static void GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc, |
5588 | | GByte *CPL_RESTRICT pabyDest0, |
5589 | | GByte *CPL_RESTRICT pabyDest1, |
5590 | | GByte *CPL_RESTRICT pabyDest2, size_t nIters) |
5591 | | { |
5592 | | for (size_t i = 0; i < nIters; ++i) |
5593 | | { |
5594 | | pabyDest0[i] = pabySrc[3 * i + 0]; |
5595 | | pabyDest1[i] = pabySrc[3 * i + 1]; |
5596 | | pabyDest2[i] = pabySrc[3 * i + 2]; |
5597 | | } |
5598 | | } |
5599 | | |
5600 | | /************************************************************************/ |
5601 | | /* GDALDeinterleave4Byte() */ |
5602 | | /************************************************************************/ |
5603 | | |
5604 | | // TODO: Enabling below could help on non-Intel architectures where gcc knows |
5605 | | // how to auto-vectorize |
5606 | | // #if defined(__GNUC__) |
5607 | | //__attribute__((optimize("tree-vectorize"))) |
5608 | | // #endif |
5609 | | static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc, |
5610 | | GByte *CPL_RESTRICT pabyDest0, |
5611 | | GByte *CPL_RESTRICT pabyDest1, |
5612 | | GByte *CPL_RESTRICT pabyDest2, |
5613 | | GByte *CPL_RESTRICT pabyDest3, size_t nIters) |
5614 | | { |
5615 | | for (size_t i = 0; i < nIters; ++i) |
5616 | | { |
5617 | | pabyDest0[i] = pabySrc[4 * i + 0]; |
5618 | | pabyDest1[i] = pabySrc[4 * i + 1]; |
5619 | | pabyDest2[i] = pabySrc[4 * i + 2]; |
5620 | | pabyDest3[i] = pabySrc[4 * i + 3]; |
5621 | | } |
5622 | | } |
5623 | | |
5624 | | #endif |
5625 | | |
5626 | | /************************************************************************/ |
5627 | | /* GDALDeinterleave() */ |
5628 | | /************************************************************************/ |
5629 | | |
5630 | | /*! Copy values from a pixel-interleave buffer to multiple per-component |
5631 | | buffers. |
5632 | | |
5633 | | In pseudo-code |
5634 | | \verbatim |
5635 | | for(size_t i = 0; i < nIters; ++i) |
5636 | | for(int iComp = 0; iComp < nComponents; iComp++ ) |
5637 | | ppDestBuffer[iComp][i] = pSourceBuffer[nComponents * i + iComp] |
5638 | | \endverbatim |
5639 | | |
5640 | | The implementation is optimized for a few cases, like de-interleaving |
5641 | | of 3 or 4-components Byte buffers. |
5642 | | |
5643 | | \since GDAL 3.6 |
5644 | | */ |
5645 | | void GDALDeinterleave(const void *pSourceBuffer, GDALDataType eSourceDT, |
5646 | | int nComponents, void **ppDestBuffer, |
5647 | | GDALDataType eDestDT, size_t nIters) |
5648 | 0 | { |
5649 | 0 | if (eSourceDT == eDestDT) |
5650 | 0 | { |
5651 | 0 | if (eSourceDT == GDT_Byte || eSourceDT == GDT_Int8) |
5652 | 0 | { |
5653 | 0 | if (nComponents == 3) |
5654 | 0 | { |
5655 | 0 | const GByte *CPL_RESTRICT pabySrc = |
5656 | 0 | static_cast<const GByte *>(pSourceBuffer); |
5657 | 0 | GByte *CPL_RESTRICT pabyDest0 = |
5658 | 0 | static_cast<GByte *>(ppDestBuffer[0]); |
5659 | 0 | GByte *CPL_RESTRICT pabyDest1 = |
5660 | 0 | static_cast<GByte *>(ppDestBuffer[1]); |
5661 | 0 | GByte *CPL_RESTRICT pabyDest2 = |
5662 | 0 | static_cast<GByte *>(ppDestBuffer[2]); |
5663 | 0 | GDALDeinterleave3Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2, |
5664 | 0 | nIters); |
5665 | 0 | return; |
5666 | 0 | } |
5667 | 0 | else if (nComponents == 4) |
5668 | 0 | { |
5669 | 0 | const GByte *CPL_RESTRICT pabySrc = |
5670 | 0 | static_cast<const GByte *>(pSourceBuffer); |
5671 | 0 | GByte *CPL_RESTRICT pabyDest0 = |
5672 | 0 | static_cast<GByte *>(ppDestBuffer[0]); |
5673 | 0 | GByte *CPL_RESTRICT pabyDest1 = |
5674 | 0 | static_cast<GByte *>(ppDestBuffer[1]); |
5675 | 0 | GByte *CPL_RESTRICT pabyDest2 = |
5676 | 0 | static_cast<GByte *>(ppDestBuffer[2]); |
5677 | 0 | GByte *CPL_RESTRICT pabyDest3 = |
5678 | 0 | static_cast<GByte *>(ppDestBuffer[3]); |
5679 | 0 | GDALDeinterleave4Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2, |
5680 | 0 | pabyDest3, nIters); |
5681 | 0 | return; |
5682 | 0 | } |
5683 | 0 | } |
5684 | | #if ((defined(__GNUC__) && !defined(__clang__)) || \ |
5685 | | defined(__INTEL_CLANG_COMPILER)) && \ |
5686 | | defined(HAVE_SSE2) && defined(HAVE_SSSE3_AT_COMPILE_TIME) |
5687 | | else if ((eSourceDT == GDT_Int16 || eSourceDT == GDT_UInt16) && |
5688 | | CPLHaveRuntimeSSSE3()) |
5689 | | { |
5690 | | if (nComponents == 3) |
5691 | | { |
5692 | | const GUInt16 *CPL_RESTRICT panSrc = |
5693 | | static_cast<const GUInt16 *>(pSourceBuffer); |
5694 | | GUInt16 *CPL_RESTRICT panDest0 = |
5695 | | static_cast<GUInt16 *>(ppDestBuffer[0]); |
5696 | | GUInt16 *CPL_RESTRICT panDest1 = |
5697 | | static_cast<GUInt16 *>(ppDestBuffer[1]); |
5698 | | GUInt16 *CPL_RESTRICT panDest2 = |
5699 | | static_cast<GUInt16 *>(ppDestBuffer[2]); |
5700 | | GDALDeinterleave3UInt16_SSSE3(panSrc, panDest0, panDest1, |
5701 | | panDest2, nIters); |
5702 | | return; |
5703 | | } |
5704 | | #if !defined(__INTEL_CLANG_COMPILER) |
5705 | | // ICC autovectorizer doesn't do a good job, at least with icx |
5706 | | // 2022.1.0.20220316 |
5707 | | else if (nComponents == 4) |
5708 | | { |
5709 | | const GUInt16 *CPL_RESTRICT panSrc = |
5710 | | static_cast<const GUInt16 *>(pSourceBuffer); |
5711 | | GUInt16 *CPL_RESTRICT panDest0 = |
5712 | | static_cast<GUInt16 *>(ppDestBuffer[0]); |
5713 | | GUInt16 *CPL_RESTRICT panDest1 = |
5714 | | static_cast<GUInt16 *>(ppDestBuffer[1]); |
5715 | | GUInt16 *CPL_RESTRICT panDest2 = |
5716 | | static_cast<GUInt16 *>(ppDestBuffer[2]); |
5717 | | GUInt16 *CPL_RESTRICT panDest3 = |
5718 | | static_cast<GUInt16 *>(ppDestBuffer[3]); |
5719 | | GDALDeinterleave4UInt16_SSSE3(panSrc, panDest0, panDest1, |
5720 | | panDest2, panDest3, nIters); |
5721 | | return; |
5722 | | } |
5723 | | #endif |
5724 | | } |
5725 | | #endif |
5726 | 0 | } |
5727 | | |
5728 | 0 | const int nSourceDTSize = GDALGetDataTypeSizeBytes(eSourceDT); |
5729 | 0 | const int nDestDTSize = GDALGetDataTypeSizeBytes(eDestDT); |
5730 | 0 | for (int iComp = 0; iComp < nComponents; iComp++) |
5731 | 0 | { |
5732 | 0 | GDALCopyWords64(static_cast<const GByte *>(pSourceBuffer) + |
5733 | 0 | iComp * nSourceDTSize, |
5734 | 0 | eSourceDT, nComponents * nSourceDTSize, |
5735 | 0 | ppDestBuffer[iComp], eDestDT, nDestDTSize, nIters); |
5736 | 0 | } |
5737 | 0 | } |
5738 | | |
5739 | | /************************************************************************/ |
5740 | | /* GDALTranspose2DSingleToSingle() */ |
5741 | | /************************************************************************/ |
5742 | | /** |
5743 | | * Transpose a 2D array of non-complex values, in a efficient (cache-oblivious) way. |
5744 | | * |
5745 | | * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth. |
5746 | | * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight. |
5747 | | * @param nSrcWidth Width of pSrc array. |
5748 | | * @param nSrcHeight Height of pSrc array. |
5749 | | */ |
5750 | | |
5751 | | template <class DST, class SRC> |
5752 | | void GDALTranspose2DSingleToSingle(const SRC *CPL_RESTRICT pSrc, |
5753 | | DST *CPL_RESTRICT pDst, size_t nSrcWidth, |
5754 | | size_t nSrcHeight) |
5755 | 0 | { |
5756 | 0 | constexpr size_t blocksize = 32; |
5757 | 0 | for (size_t i = 0; i < nSrcHeight; i += blocksize) |
5758 | 0 | { |
5759 | 0 | const size_t max_k = std::min(i + blocksize, nSrcHeight); |
5760 | 0 | for (size_t j = 0; j < nSrcWidth; j += blocksize) |
5761 | 0 | { |
5762 | | // transpose the block beginning at [i,j] |
5763 | 0 | const size_t max_l = std::min(j + blocksize, nSrcWidth); |
5764 | 0 | for (size_t k = i; k < max_k; ++k) |
5765 | 0 | { |
5766 | 0 | for (size_t l = j; l < max_l; ++l) |
5767 | 0 | { |
5768 | 0 | GDALCopyWord(pSrc[l + k * nSrcWidth], |
5769 | 0 | pDst[k + l * nSrcHeight]); |
5770 | 0 | } |
5771 | 0 | } |
5772 | 0 | } |
5773 | 0 | } |
5774 | 0 | } Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned char>(unsigned char const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, signed char>(signed char const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned short>(unsigned short const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, short>(short const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned int>(unsigned int const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, int>(int const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned long>(unsigned long const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, long>(long const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, cpl::Float16>(cpl::Float16 const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, float>(float const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, double>(double const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned char>(unsigned char const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, signed char>(signed char const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned short>(unsigned short const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, short>(short const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned int>(unsigned int const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, int>(int const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned long>(unsigned long const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, long>(long const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, cpl::Float16>(cpl::Float16 const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, float>(float const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, double>(double const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned char>(unsigned char const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, signed char>(signed char const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned short>(unsigned short const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, short>(short const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned int>(unsigned int const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, int>(int const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned long>(unsigned long const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, long>(long const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, cpl::Float16>(cpl::Float16 const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, float>(float const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, double>(double const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned char>(unsigned char const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, signed char>(signed char const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned short>(unsigned short const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, short>(short const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned int>(unsigned int const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, int>(int const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned long>(unsigned long const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, long>(long const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, float>(float const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, double>(double const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned char>(unsigned char const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, signed char>(signed char const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned short>(unsigned short const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, short>(short const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned int>(unsigned int const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, int>(int const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned long>(unsigned long const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, long>(long const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, cpl::Float16>(cpl::Float16 const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, float>(float const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, double>(double const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned char>(unsigned char const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, signed char>(signed char const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned short>(unsigned short const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, short>(short const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned int>(unsigned int const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, int>(int const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned long>(unsigned long const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, long>(long const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, float>(float const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, double>(double const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned char>(unsigned char const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, signed char>(signed char const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned short>(unsigned short const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, short>(short const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned int>(unsigned int const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, int>(int const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned long>(unsigned long const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, long>(long const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, cpl::Float16>(cpl::Float16 const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, float>(float const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, double>(double const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned char>(unsigned char const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, signed char>(signed char const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned short>(unsigned short const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, short>(short const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned int>(unsigned int const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, int>(int const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned long>(unsigned long const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, long>(long const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, cpl::Float16>(cpl::Float16 const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, float>(float const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, double>(double const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned char>(unsigned char const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, signed char>(signed char const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned short>(unsigned short const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned int>(unsigned int const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned long>(unsigned long const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, long>(long const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned char>(unsigned char const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, signed char>(signed char const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned short>(unsigned short const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, short>(short const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned int>(unsigned int const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, int>(int const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned long>(unsigned long const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, long>(long const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, float>(float const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, double>(double const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned char>(unsigned char const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, signed char>(signed char const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned short>(unsigned short const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, short>(short const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned int>(unsigned int const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, int>(int const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned long>(unsigned long const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, long>(long const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, float>(float const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, double>(double const*, double*, unsigned long, unsigned long) |
5775 | | |
5776 | | /************************************************************************/ |
5777 | | /* GDALTranspose2DComplexToComplex() */ |
5778 | | /************************************************************************/ |
5779 | | /** |
5780 | | * Transpose a 2D array of complex values into an array of complex values, |
5781 | | * in a efficient (cache-oblivious) way. |
5782 | | * |
5783 | | * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth. |
5784 | | * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight. |
5785 | | * @param nSrcWidth Width of pSrc array. |
5786 | | * @param nSrcHeight Height of pSrc array. |
5787 | | */ |
5788 | | template <class DST, class SRC> |
5789 | | void GDALTranspose2DComplexToComplex(const SRC *CPL_RESTRICT pSrc, |
5790 | | DST *CPL_RESTRICT pDst, size_t nSrcWidth, |
5791 | | size_t nSrcHeight) |
5792 | 0 | { |
5793 | 0 | constexpr size_t blocksize = 32; |
5794 | 0 | for (size_t i = 0; i < nSrcHeight; i += blocksize) |
5795 | 0 | { |
5796 | 0 | const size_t max_k = std::min(i + blocksize, nSrcHeight); |
5797 | 0 | for (size_t j = 0; j < nSrcWidth; j += blocksize) |
5798 | 0 | { |
5799 | | // transpose the block beginning at [i,j] |
5800 | 0 | const size_t max_l = std::min(j + blocksize, nSrcWidth); |
5801 | 0 | for (size_t k = i; k < max_k; ++k) |
5802 | 0 | { |
5803 | 0 | for (size_t l = j; l < max_l; ++l) |
5804 | 0 | { |
5805 | 0 | GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0], |
5806 | 0 | pDst[2 * (k + l * nSrcHeight) + 0]); |
5807 | 0 | GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 1], |
5808 | 0 | pDst[2 * (k + l * nSrcHeight) + 1]); |
5809 | 0 | } |
5810 | 0 | } |
5811 | 0 | } |
5812 | 0 | } |
5813 | 0 | } Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, short>(short const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, int>(int const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, float>(float const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, double>(double const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, short>(short const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, int>(int const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, float>(float const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, double>(double const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, short>(short const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, int>(int const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, float>(float const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, double>(double const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, short>(short const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, int>(int const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, float>(float const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, double>(double const*, double*, unsigned long, unsigned long) |
5814 | | |
5815 | | /************************************************************************/ |
5816 | | /* GDALTranspose2DComplexToSingle() */ |
5817 | | /************************************************************************/ |
5818 | | /** |
5819 | | * Transpose a 2D array of complex values into an array of non-complex values, |
5820 | | * in a efficient (cache-oblivious) way. |
5821 | | * |
5822 | | * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth. |
5823 | | * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight. |
5824 | | * @param nSrcWidth Width of pSrc array. |
5825 | | * @param nSrcHeight Height of pSrc array. |
5826 | | */ |
5827 | | template <class DST, class SRC> |
5828 | | void GDALTranspose2DComplexToSingle(const SRC *CPL_RESTRICT pSrc, |
5829 | | DST *CPL_RESTRICT pDst, size_t nSrcWidth, |
5830 | | size_t nSrcHeight) |
5831 | 0 | { |
5832 | 0 | constexpr size_t blocksize = 32; |
5833 | 0 | for (size_t i = 0; i < nSrcHeight; i += blocksize) |
5834 | 0 | { |
5835 | 0 | const size_t max_k = std::min(i + blocksize, nSrcHeight); |
5836 | 0 | for (size_t j = 0; j < nSrcWidth; j += blocksize) |
5837 | 0 | { |
5838 | | // transpose the block beginning at [i,j] |
5839 | 0 | const size_t max_l = std::min(j + blocksize, nSrcWidth); |
5840 | 0 | for (size_t k = i; k < max_k; ++k) |
5841 | 0 | { |
5842 | 0 | for (size_t l = j; l < max_l; ++l) |
5843 | 0 | { |
5844 | 0 | GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0], |
5845 | 0 | pDst[k + l * nSrcHeight]); |
5846 | 0 | } |
5847 | 0 | } |
5848 | 0 | } |
5849 | 0 | } |
5850 | 0 | } Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, short>(short const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, int>(int const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, cpl::Float16>(cpl::Float16 const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, float>(float const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, double>(double const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, short>(short const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, int>(int const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, cpl::Float16>(cpl::Float16 const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, float>(float const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, double>(double const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, short>(short const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, int>(int const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, cpl::Float16>(cpl::Float16 const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, float>(float const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, double>(double const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, short>(short const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, int>(int const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, float>(float const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, double>(double const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, short>(short const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, int>(int const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, cpl::Float16>(cpl::Float16 const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, float>(float const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, double>(double const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, short>(short const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, int>(int const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, float>(float const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, double>(double const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, short>(short const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, int>(int const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, cpl::Float16>(cpl::Float16 const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, float>(float const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, double>(double const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, short>(short const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, int>(int const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, cpl::Float16>(cpl::Float16 const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, float>(float const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, double>(double const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, short>(short const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, int>(int const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, float>(float const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, double>(double const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, short>(short const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, int>(int const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, float>(float const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, double>(double const*, double*, unsigned long, unsigned long) |
5851 | | |
5852 | | /************************************************************************/ |
5853 | | /* GDALTranspose2DSingleToComplex() */ |
5854 | | /************************************************************************/ |
5855 | | /** |
5856 | | * Transpose a 2D array of non-complex values into an array of complex values, |
5857 | | * in a efficient (cache-oblivious) way. |
5858 | | * |
5859 | | * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth. |
5860 | | * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight. |
5861 | | * @param nSrcWidth Width of pSrc array. |
5862 | | * @param nSrcHeight Height of pSrc array. |
5863 | | */ |
5864 | | template <class DST, class SRC> |
5865 | | void GDALTranspose2DSingleToComplex(const SRC *CPL_RESTRICT pSrc, |
5866 | | DST *CPL_RESTRICT pDst, size_t nSrcWidth, |
5867 | | size_t nSrcHeight) |
5868 | 0 | { |
5869 | 0 | constexpr size_t blocksize = 32; |
5870 | 0 | for (size_t i = 0; i < nSrcHeight; i += blocksize) |
5871 | 0 | { |
5872 | 0 | const size_t max_k = std::min(i + blocksize, nSrcHeight); |
5873 | 0 | for (size_t j = 0; j < nSrcWidth; j += blocksize) |
5874 | 0 | { |
5875 | | // transpose the block beginning at [i,j] |
5876 | 0 | const size_t max_l = std::min(j + blocksize, nSrcWidth); |
5877 | 0 | for (size_t k = i; k < max_k; ++k) |
5878 | 0 | { |
5879 | 0 | for (size_t l = j; l < max_l; ++l) |
5880 | 0 | { |
5881 | 0 | GDALCopyWord(pSrc[l + k * nSrcWidth], |
5882 | 0 | pDst[2 * (k + l * nSrcHeight) + 0]); |
5883 | 0 | pDst[2 * (k + l * nSrcHeight) + 1] = 0; |
5884 | 0 | } |
5885 | 0 | } |
5886 | 0 | } |
5887 | 0 | } |
5888 | 0 | } Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned char>(unsigned char const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, signed char>(signed char const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned short>(unsigned short const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, short>(short const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned int>(unsigned int const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, int>(int const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned long>(unsigned long const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, long>(long const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, float>(float const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, double>(double const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned char>(unsigned char const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, signed char>(signed char const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned short>(unsigned short const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, short>(short const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned int>(unsigned int const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, int>(int const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned long>(unsigned long const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, long>(long const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, float>(float const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, double>(double const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned char>(unsigned char const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, signed char>(signed char const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned short>(unsigned short const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned int>(unsigned int const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned long>(unsigned long const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, long>(long const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned char>(unsigned char const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, signed char>(signed char const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned short>(unsigned short const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, short>(short const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned int>(unsigned int const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, int>(int const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned long>(unsigned long const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, long>(long const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, float>(float const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, double>(double const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned char>(unsigned char const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, signed char>(signed char const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned short>(unsigned short const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, short>(short const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned int>(unsigned int const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, int>(int const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned long>(unsigned long const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, long>(long const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, float>(float const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, double>(double const*, double*, unsigned long, unsigned long) |
5889 | | |
5890 | | /************************************************************************/ |
5891 | | /* GDALTranspose2D() */ |
5892 | | /************************************************************************/ |
5893 | | |
5894 | | template <class DST, bool DST_IS_COMPLEX> |
5895 | | static void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, DST *pDst, |
5896 | | size_t nSrcWidth, size_t nSrcHeight) |
5897 | 0 | { |
5898 | 0 | #define CALL_GDALTranspose2D_internal(SRC_TYPE) \ |
5899 | 0 | do \ |
5900 | 0 | { \ |
5901 | 0 | if constexpr (DST_IS_COMPLEX) \ |
5902 | 0 | { \ |
5903 | 0 | GDALTranspose2DSingleToComplex( \ |
5904 | 0 | static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth, \ |
5905 | 0 | nSrcHeight); \ |
5906 | 0 | } \ |
5907 | 0 | else \ |
5908 | 0 | { \ |
5909 | 0 | GDALTranspose2DSingleToSingle(static_cast<const SRC_TYPE *>(pSrc), \ |
5910 | 0 | pDst, nSrcWidth, nSrcHeight); \ |
5911 | 0 | } \ |
5912 | 0 | } while (0) |
5913 | |
|
5914 | 0 | #define CALL_GDALTranspose2DComplex_internal(SRC_TYPE) \ |
5915 | 0 | do \ |
5916 | 0 | { \ |
5917 | 0 | if constexpr (DST_IS_COMPLEX) \ |
5918 | 0 | { \ |
5919 | 0 | GDALTranspose2DComplexToComplex( \ |
5920 | 0 | static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth, \ |
5921 | 0 | nSrcHeight); \ |
5922 | 0 | } \ |
5923 | 0 | else \ |
5924 | 0 | { \ |
5925 | 0 | GDALTranspose2DComplexToSingle( \ |
5926 | 0 | static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth, \ |
5927 | 0 | nSrcHeight); \ |
5928 | 0 | } \ |
5929 | 0 | } while (0) |
5930 | | |
5931 | | // clang-format off |
5932 | 0 | switch (eSrcType) |
5933 | 0 | { |
5934 | 0 | case GDT_Byte: CALL_GDALTranspose2D_internal(uint8_t); break; |
5935 | 0 | case GDT_Int8: CALL_GDALTranspose2D_internal(int8_t); break; |
5936 | 0 | case GDT_UInt16: CALL_GDALTranspose2D_internal(uint16_t); break; |
5937 | 0 | case GDT_Int16: CALL_GDALTranspose2D_internal(int16_t); break; |
5938 | 0 | case GDT_UInt32: CALL_GDALTranspose2D_internal(uint32_t); break; |
5939 | 0 | case GDT_Int32: CALL_GDALTranspose2D_internal(int32_t); break; |
5940 | 0 | case GDT_UInt64: CALL_GDALTranspose2D_internal(uint64_t); break; |
5941 | 0 | case GDT_Int64: CALL_GDALTranspose2D_internal(int64_t); break; |
5942 | 0 | case GDT_Float16: CALL_GDALTranspose2D_internal(GFloat16); break; |
5943 | 0 | case GDT_Float32: CALL_GDALTranspose2D_internal(float); break; |
5944 | 0 | case GDT_Float64: CALL_GDALTranspose2D_internal(double); break; |
5945 | 0 | case GDT_CInt16: CALL_GDALTranspose2DComplex_internal(int16_t); break; |
5946 | 0 | case GDT_CInt32: CALL_GDALTranspose2DComplex_internal(int32_t); break; |
5947 | 0 | case GDT_CFloat16: CALL_GDALTranspose2DComplex_internal(GFloat16); break; |
5948 | 0 | case GDT_CFloat32: CALL_GDALTranspose2DComplex_internal(float); break; |
5949 | 0 | case GDT_CFloat64: CALL_GDALTranspose2DComplex_internal(double); break; |
5950 | 0 | case GDT_Unknown: |
5951 | 0 | case GDT_TypeCount: |
5952 | 0 | break; |
5953 | 0 | } |
5954 | | // clang-format on |
5955 | |
|
5956 | 0 | #undef CALL_GDALTranspose2D_internal |
5957 | 0 | #undef CALL_GDALTranspose2DComplex_internal |
5958 | 0 | } Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned char, false>(void const*, GDALDataType, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<signed char, false>(void const*, GDALDataType, signed char*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned short, false>(void const*, GDALDataType, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<short, false>(void const*, GDALDataType, short*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned int, false>(void const*, GDALDataType, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<int, false>(void const*, GDALDataType, int*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned long, false>(void const*, GDALDataType, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<long, false>(void const*, GDALDataType, long*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<cpl::Float16, false>(void const*, GDALDataType, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<float, false>(void const*, GDALDataType, float*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<double, false>(void const*, GDALDataType, double*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<short, true>(void const*, GDALDataType, short*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<int, true>(void const*, GDALDataType, int*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<cpl::Float16, true>(void const*, GDALDataType, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<float, true>(void const*, GDALDataType, float*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<double, true>(void const*, GDALDataType, double*, unsigned long, unsigned long) |
5959 | | |
5960 | | /************************************************************************/ |
5961 | | /* GDALInterleave2Byte() */ |
5962 | | /************************************************************************/ |
5963 | | |
5964 | | #if defined(HAVE_SSE2) && \ |
5965 | | (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER)) |
5966 | | |
5967 | | // ICC autovectorizer doesn't do a good job at generating good SSE code, |
5968 | | // at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop. |
5969 | | #if defined(__GNUC__) |
5970 | | __attribute__((noinline)) |
5971 | | #endif |
5972 | | static void |
5973 | | GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc, |
5974 | | uint8_t *CPL_RESTRICT pDst, size_t nIters) |
5975 | | { |
5976 | | size_t i = 0; |
5977 | | constexpr size_t VALS_PER_ITER = 16; |
5978 | | for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER) |
5979 | | { |
5980 | | __m128i xmm0 = |
5981 | | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + i)); |
5982 | | __m128i xmm1 = _mm_loadu_si128( |
5983 | | reinterpret_cast<__m128i const *>(pSrc + i + nIters)); |
5984 | | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDst + 2 * i), |
5985 | | _mm_unpacklo_epi8(xmm0, xmm1)); |
5986 | | _mm_storeu_si128( |
5987 | | reinterpret_cast<__m128i *>(pDst + 2 * i + VALS_PER_ITER), |
5988 | | _mm_unpackhi_epi8(xmm0, xmm1)); |
5989 | | } |
5990 | | #if defined(__clang__) |
5991 | | #pragma clang loop vectorize(disable) |
5992 | | #endif |
5993 | | for (; i < nIters; ++i) |
5994 | | { |
5995 | | pDst[2 * i + 0] = pSrc[i + 0 * nIters]; |
5996 | | pDst[2 * i + 1] = pSrc[i + 1 * nIters]; |
5997 | | } |
5998 | | } |
5999 | | |
6000 | | #else |
6001 | | |
6002 | | #if defined(__GNUC__) && !defined(__clang__) |
6003 | | __attribute__((optimize("tree-vectorize"))) |
6004 | | #endif |
6005 | | #if defined(__GNUC__) |
6006 | | __attribute__((noinline)) |
6007 | | #endif |
6008 | | #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
6009 | | // clang++ -O2 -fsanitize=undefined fails to vectorize, ignore that warning |
6010 | | #pragma clang diagnostic push |
6011 | | #pragma clang diagnostic ignored "-Wpass-failed" |
6012 | | #endif |
6013 | | static void |
6014 | | GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc, |
6015 | | uint8_t *CPL_RESTRICT pDst, size_t nIters) |
6016 | 0 | { |
6017 | 0 | #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
6018 | 0 | #pragma clang loop vectorize(enable) |
6019 | 0 | #endif |
6020 | 0 | for (size_t i = 0; i < nIters; ++i) |
6021 | 0 | { |
6022 | 0 | pDst[2 * i + 0] = pSrc[i + 0 * nIters]; |
6023 | 0 | pDst[2 * i + 1] = pSrc[i + 1 * nIters]; |
6024 | 0 | } |
6025 | 0 | } |
6026 | | #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
6027 | | #pragma clang diagnostic pop |
6028 | | #endif |
6029 | | |
6030 | | #endif |
6031 | | |
6032 | | /************************************************************************/ |
6033 | | /* GDALInterleave4Byte() */ |
6034 | | /************************************************************************/ |
6035 | | |
6036 | | #if defined(HAVE_SSE2) && \ |
6037 | | (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER)) |
6038 | | |
6039 | | // ICC autovectorizer doesn't do a good job at generating good SSE code, |
6040 | | // at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop. |
6041 | | #if defined(__GNUC__) |
6042 | | __attribute__((noinline)) |
6043 | | #endif |
6044 | | static void |
6045 | | GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc, |
6046 | | uint8_t *CPL_RESTRICT pDst, size_t nIters) |
6047 | | { |
6048 | | size_t i = 0; |
6049 | | constexpr size_t VALS_PER_ITER = 16; |
6050 | | for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER) |
6051 | | { |
6052 | | __m128i xmm0 = _mm_loadu_si128( |
6053 | | reinterpret_cast<__m128i const *>(pSrc + i + 0 * nIters)); |
6054 | | __m128i xmm1 = _mm_loadu_si128( |
6055 | | reinterpret_cast<__m128i const *>(pSrc + i + 1 * nIters)); |
6056 | | __m128i xmm2 = _mm_loadu_si128( |
6057 | | reinterpret_cast<__m128i const *>(pSrc + i + 2 * nIters)); |
6058 | | __m128i xmm3 = _mm_loadu_si128( |
6059 | | reinterpret_cast<__m128i const *>(pSrc + i + 3 * nIters)); |
6060 | | auto tmp0 = _mm_unpacklo_epi8( |
6061 | | xmm0, |
6062 | | xmm1); // (xmm0_0, xmm1_0, xmm0_1, xmm1_1, xmm0_2, xmm1_2, ...) |
6063 | | auto tmp1 = _mm_unpackhi_epi8( |
6064 | | xmm0, |
6065 | | xmm1); // (xmm0_8, xmm1_8, xmm0_9, xmm1_9, xmm0_10, xmm1_10, ...) |
6066 | | auto tmp2 = _mm_unpacklo_epi8( |
6067 | | xmm2, |
6068 | | xmm3); // (xmm2_0, xmm3_0, xmm2_1, xmm3_1, xmm2_2, xmm3_2, ...) |
6069 | | auto tmp3 = _mm_unpackhi_epi8( |
6070 | | xmm2, |
6071 | | xmm3); // (xmm2_8, xmm3_8, xmm2_9, xmm3_9, xmm2_10, xmm3_10, ...) |
6072 | | auto tmp2_0 = _mm_unpacklo_epi16( |
6073 | | tmp0, |
6074 | | tmp2); // (xmm0_0, xmm1_0, xmm2_0, xmm3_0, xmm0_1, xmm1_1, xmm2_1, xmm3_1, ...) |
6075 | | auto tmp2_1 = _mm_unpackhi_epi16(tmp0, tmp2); |
6076 | | auto tmp2_2 = _mm_unpacklo_epi16(tmp1, tmp3); |
6077 | | auto tmp2_3 = _mm_unpackhi_epi16(tmp1, tmp3); |
6078 | | _mm_storeu_si128( |
6079 | | reinterpret_cast<__m128i *>(pDst + 4 * i + 0 * VALS_PER_ITER), |
6080 | | tmp2_0); |
6081 | | _mm_storeu_si128( |
6082 | | reinterpret_cast<__m128i *>(pDst + 4 * i + 1 * VALS_PER_ITER), |
6083 | | tmp2_1); |
6084 | | _mm_storeu_si128( |
6085 | | reinterpret_cast<__m128i *>(pDst + 4 * i + 2 * VALS_PER_ITER), |
6086 | | tmp2_2); |
6087 | | _mm_storeu_si128( |
6088 | | reinterpret_cast<__m128i *>(pDst + 4 * i + 3 * VALS_PER_ITER), |
6089 | | tmp2_3); |
6090 | | } |
6091 | | #if defined(__clang__) |
6092 | | #pragma clang loop vectorize(disable) |
6093 | | #endif |
6094 | | for (; i < nIters; ++i) |
6095 | | { |
6096 | | pDst[4 * i + 0] = pSrc[i + 0 * nIters]; |
6097 | | pDst[4 * i + 1] = pSrc[i + 1 * nIters]; |
6098 | | pDst[4 * i + 2] = pSrc[i + 2 * nIters]; |
6099 | | pDst[4 * i + 3] = pSrc[i + 3 * nIters]; |
6100 | | } |
6101 | | } |
6102 | | |
6103 | | #else |
6104 | | |
6105 | | #if defined(__GNUC__) && !defined(__clang__) |
6106 | | __attribute__((optimize("tree-vectorize"))) |
6107 | | #endif |
6108 | | #if defined(__GNUC__) |
6109 | | __attribute__((noinline)) |
6110 | | #endif |
6111 | | #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
6112 | | // clang++ -O2 -fsanitize=undefined fails to vectorize, ignore that warning |
6113 | | #pragma clang diagnostic push |
6114 | | #pragma clang diagnostic ignored "-Wpass-failed" |
6115 | | #endif |
6116 | | static void |
6117 | | GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc, |
6118 | | uint8_t *CPL_RESTRICT pDst, size_t nIters) |
6119 | 0 | { |
6120 | 0 | #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
6121 | 0 | #pragma clang loop vectorize(enable) |
6122 | 0 | #endif |
6123 | 0 | for (size_t i = 0; i < nIters; ++i) |
6124 | 0 | { |
6125 | 0 | pDst[4 * i + 0] = pSrc[i + 0 * nIters]; |
6126 | 0 | pDst[4 * i + 1] = pSrc[i + 1 * nIters]; |
6127 | 0 | pDst[4 * i + 2] = pSrc[i + 2 * nIters]; |
6128 | 0 | pDst[4 * i + 3] = pSrc[i + 3 * nIters]; |
6129 | 0 | } |
6130 | 0 | } |
6131 | | #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
6132 | | #pragma clang diagnostic pop |
6133 | | #endif |
6134 | | |
6135 | | #endif |
6136 | | |
6137 | | /************************************************************************/ |
6138 | | /* GDALTranspose2D() */ |
6139 | | /************************************************************************/ |
6140 | | |
6141 | | /** |
6142 | | * Transpose a 2D array in a efficient (cache-oblivious) way. |
6143 | | * |
6144 | | * @param pSrc Source array of width = nSrcWidth and height = nSrcHeight. |
6145 | | * @param eSrcType Data type of pSrc. |
6146 | | * @param pDst Destination transposed array of width = nSrcHeight and height = nSrcWidth. |
6147 | | * @param eDstType Data type of pDst. |
6148 | | * @param nSrcWidth Width of pSrc array. |
6149 | | * @param nSrcHeight Height of pSrc array. |
6150 | | * @since GDAL 3.11 |
6151 | | */ |
6152 | | |
6153 | | void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, void *pDst, |
6154 | | GDALDataType eDstType, size_t nSrcWidth, size_t nSrcHeight) |
6155 | 0 | { |
6156 | 0 | if (eSrcType == eDstType && (eSrcType == GDT_Byte || eSrcType == GDT_Int8)) |
6157 | 0 | { |
6158 | 0 | if (nSrcHeight == 2) |
6159 | 0 | { |
6160 | 0 | GDALInterleave2Byte(static_cast<const uint8_t *>(pSrc), |
6161 | 0 | static_cast<uint8_t *>(pDst), nSrcWidth); |
6162 | 0 | return; |
6163 | 0 | } |
6164 | 0 | if (nSrcHeight == 4) |
6165 | 0 | { |
6166 | 0 | GDALInterleave4Byte(static_cast<const uint8_t *>(pSrc), |
6167 | 0 | static_cast<uint8_t *>(pDst), nSrcWidth); |
6168 | 0 | return; |
6169 | 0 | } |
6170 | 0 | #if (defined(HAVE_SSSE3_AT_COMPILE_TIME) && \ |
6171 | 0 | (defined(__x86_64) || defined(_M_X64))) |
6172 | 0 | if (CPLHaveRuntimeSSSE3()) |
6173 | 0 | { |
6174 | 0 | GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc), |
6175 | 0 | static_cast<uint8_t *>(pDst), nSrcWidth, |
6176 | 0 | nSrcHeight); |
6177 | 0 | return; |
6178 | 0 | } |
6179 | | #elif defined(USE_NEON_OPTIMIZATIONS) |
6180 | | { |
6181 | | GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc), |
6182 | | static_cast<uint8_t *>(pDst), nSrcWidth, |
6183 | | nSrcHeight); |
6184 | | return; |
6185 | | } |
6186 | | #endif |
6187 | 0 | } |
6188 | | |
6189 | 0 | #define CALL_GDALTranspose2D_internal(DST_TYPE, DST_IS_COMPLEX) \ |
6190 | 0 | GDALTranspose2D<DST_TYPE, DST_IS_COMPLEX>( \ |
6191 | 0 | pSrc, eSrcType, static_cast<DST_TYPE *>(pDst), nSrcWidth, nSrcHeight) |
6192 | | |
6193 | | // clang-format off |
6194 | 0 | switch (eDstType) |
6195 | 0 | { |
6196 | 0 | case GDT_Byte: CALL_GDALTranspose2D_internal(uint8_t, false); break; |
6197 | 0 | case GDT_Int8: CALL_GDALTranspose2D_internal(int8_t, false); break; |
6198 | 0 | case GDT_UInt16: CALL_GDALTranspose2D_internal(uint16_t, false); break; |
6199 | 0 | case GDT_Int16: CALL_GDALTranspose2D_internal(int16_t, false); break; |
6200 | 0 | case GDT_UInt32: CALL_GDALTranspose2D_internal(uint32_t, false); break; |
6201 | 0 | case GDT_Int32: CALL_GDALTranspose2D_internal(int32_t, false); break; |
6202 | 0 | case GDT_UInt64: CALL_GDALTranspose2D_internal(uint64_t, false); break; |
6203 | 0 | case GDT_Int64: CALL_GDALTranspose2D_internal(int64_t, false); break; |
6204 | 0 | case GDT_Float16: CALL_GDALTranspose2D_internal(GFloat16, false); break; |
6205 | 0 | case GDT_Float32: CALL_GDALTranspose2D_internal(float, false); break; |
6206 | 0 | case GDT_Float64: CALL_GDALTranspose2D_internal(double, false); break; |
6207 | 0 | case GDT_CInt16: CALL_GDALTranspose2D_internal(int16_t, true); break; |
6208 | 0 | case GDT_CInt32: CALL_GDALTranspose2D_internal(int32_t, true); break; |
6209 | 0 | case GDT_CFloat16: CALL_GDALTranspose2D_internal(GFloat16, true); break; |
6210 | 0 | case GDT_CFloat32: CALL_GDALTranspose2D_internal(float, true); break; |
6211 | 0 | case GDT_CFloat64: CALL_GDALTranspose2D_internal(double, true); break; |
6212 | 0 | case GDT_Unknown: |
6213 | 0 | case GDT_TypeCount: |
6214 | 0 | break; |
6215 | 0 | } |
6216 | | // clang-format on |
6217 | |
|
6218 | 0 | #undef CALL_GDALTranspose2D_internal |
6219 | 0 | } |
6220 | | |
6221 | | /************************************************************************/ |
6222 | | /* ExtractBitAndConvertTo255() */ |
6223 | | /************************************************************************/ |
6224 | | |
6225 | | #if defined(__GNUC__) || defined(_MSC_VER) |
6226 | | // Signedness of char implementation dependent, so be explicit. |
6227 | | // Assumes 2-complement integer types and sign extension of right shifting |
6228 | | // GCC guarantees such: |
6229 | | // https://gcc.gnu.org/onlinedocs/gcc/Integers-implementation.html#Integers-implementation |
6230 | | static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit) |
6231 | 0 | { |
6232 | 0 | return static_cast<GByte>(static_cast<signed char>(byVal << (7 - nBit)) >> |
6233 | 0 | 7); |
6234 | 0 | } |
6235 | | #else |
6236 | | // Portable way |
6237 | | static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit) |
6238 | | { |
6239 | | return (byVal & (1 << nBit)) ? 255 : 0; |
6240 | | } |
6241 | | #endif |
6242 | | |
6243 | | /************************************************************************/ |
6244 | | /* ExpandEightPackedBitsToByteAt255() */ |
6245 | | /************************************************************************/ |
6246 | | |
6247 | | static inline void ExpandEightPackedBitsToByteAt255(GByte byVal, |
6248 | | GByte abyOutput[8]) |
6249 | 0 | { |
6250 | 0 | abyOutput[0] = ExtractBitAndConvertTo255(byVal, 7); |
6251 | 0 | abyOutput[1] = ExtractBitAndConvertTo255(byVal, 6); |
6252 | 0 | abyOutput[2] = ExtractBitAndConvertTo255(byVal, 5); |
6253 | 0 | abyOutput[3] = ExtractBitAndConvertTo255(byVal, 4); |
6254 | 0 | abyOutput[4] = ExtractBitAndConvertTo255(byVal, 3); |
6255 | 0 | abyOutput[5] = ExtractBitAndConvertTo255(byVal, 2); |
6256 | 0 | abyOutput[6] = ExtractBitAndConvertTo255(byVal, 1); |
6257 | 0 | abyOutput[7] = ExtractBitAndConvertTo255(byVal, 0); |
6258 | 0 | } |
6259 | | |
6260 | | /************************************************************************/ |
6261 | | /* GDALExpandPackedBitsToByteAt0Or255() */ |
6262 | | /************************************************************************/ |
6263 | | |
6264 | | /** Expand packed-bits (ordered from most-significant bit to least one) |
6265 | | into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit |
6266 | | at 1 to a byte at 255. |
6267 | | |
6268 | | The function does (in a possibly more optimized way) the following: |
6269 | | \code{.cpp} |
6270 | | for (size_t i = 0; i < nInputBits; ++i ) |
6271 | | { |
6272 | | pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 255 : 0; |
6273 | | } |
6274 | | \endcode |
6275 | | |
6276 | | @param pabyInput Input array of (nInputBits + 7) / 8 bytes. |
6277 | | @param pabyOutput Output array of nInputBits bytes. |
6278 | | @param nInputBits Number of valid bits in pabyInput. |
6279 | | |
6280 | | @since 3.11 |
6281 | | */ |
6282 | | |
6283 | | void GDALExpandPackedBitsToByteAt0Or255(const GByte *CPL_RESTRICT pabyInput, |
6284 | | GByte *CPL_RESTRICT pabyOutput, |
6285 | | size_t nInputBits) |
6286 | 0 | { |
6287 | 0 | const size_t nInputWholeBytes = nInputBits / 8; |
6288 | 0 | size_t iByte = 0; |
6289 | |
|
6290 | 0 | #ifdef HAVE_SSE2 |
6291 | | // Mask to isolate each bit |
6292 | 0 | const __m128i bit_mask = _mm_set_epi8(1, 2, 4, 8, 16, 32, 64, -128, 1, 2, 4, |
6293 | 0 | 8, 16, 32, 64, -128); |
6294 | 0 | const __m128i zero = _mm_setzero_si128(); |
6295 | 0 | const __m128i all_ones = _mm_set1_epi8(-1); |
6296 | | #ifdef __SSSE3__ |
6297 | | const __m128i dispatch_two_bytes = |
6298 | | _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0); |
6299 | | #endif |
6300 | 0 | constexpr size_t SSE_REG_SIZE = sizeof(bit_mask); |
6301 | 0 | for (; iByte + SSE_REG_SIZE <= nInputWholeBytes; iByte += SSE_REG_SIZE) |
6302 | 0 | { |
6303 | 0 | __m128i reg_ori = _mm_loadu_si128( |
6304 | 0 | reinterpret_cast<const __m128i *>(pabyInput + iByte)); |
6305 | |
|
6306 | 0 | constexpr int NUM_PROCESSED_BYTES_PER_REG = 2; |
6307 | 0 | for (size_t k = 0; k < SSE_REG_SIZE / NUM_PROCESSED_BYTES_PER_REG; ++k) |
6308 | 0 | { |
6309 | | // Given reg_ori = (A, B, ... 14 other bytes ...), |
6310 | | // expand to (A, A, A, A, A, A, A, A, B, B, B, B, B, B, B, B) |
6311 | | #ifdef __SSSE3__ |
6312 | | __m128i reg = _mm_shuffle_epi8(reg_ori, dispatch_two_bytes); |
6313 | | #else |
6314 | 0 | __m128i reg = _mm_unpacklo_epi8(reg_ori, reg_ori); |
6315 | 0 | reg = _mm_unpacklo_epi16(reg, reg); |
6316 | 0 | reg = _mm_unpacklo_epi32(reg, reg); |
6317 | 0 | #endif |
6318 | | |
6319 | | // Test if bits of interest are set |
6320 | 0 | reg = _mm_and_si128(reg, bit_mask); |
6321 | | |
6322 | | // Now test if those bits are set, by comparing to zero. So the |
6323 | | // result will be that bytes where bits are set will be at 0, and |
6324 | | // ones where they are cleared will be at 0xFF. So the inverse of |
6325 | | // the end result we want! |
6326 | 0 | reg = _mm_cmpeq_epi8(reg, zero); |
6327 | | |
6328 | | // Invert the result |
6329 | 0 | reg = _mm_andnot_si128(reg, all_ones); |
6330 | |
|
6331 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pabyOutput), reg); |
6332 | |
|
6333 | 0 | pabyOutput += SSE_REG_SIZE; |
6334 | | |
6335 | | // Right-shift of 2 bytes |
6336 | 0 | reg_ori = _mm_bsrli_si128(reg_ori, NUM_PROCESSED_BYTES_PER_REG); |
6337 | 0 | } |
6338 | 0 | } |
6339 | |
|
6340 | 0 | #endif // HAVE_SSE2 |
6341 | |
|
6342 | 0 | for (; iByte < nInputWholeBytes; ++iByte) |
6343 | 0 | { |
6344 | 0 | ExpandEightPackedBitsToByteAt255(pabyInput[iByte], pabyOutput); |
6345 | 0 | pabyOutput += 8; |
6346 | 0 | } |
6347 | 0 | for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit) |
6348 | 0 | { |
6349 | 0 | *pabyOutput = ExtractBitAndConvertTo255(pabyInput[iByte], 7 - iBit); |
6350 | 0 | ++pabyOutput; |
6351 | 0 | } |
6352 | 0 | } |
6353 | | |
6354 | | /************************************************************************/ |
6355 | | /* ExpandEightPackedBitsToByteAt1() */ |
6356 | | /************************************************************************/ |
6357 | | |
6358 | | static inline void ExpandEightPackedBitsToByteAt1(GByte byVal, |
6359 | | GByte abyOutput[8]) |
6360 | 0 | { |
6361 | 0 | abyOutput[0] = (byVal >> 7) & 0x1; |
6362 | 0 | abyOutput[1] = (byVal >> 6) & 0x1; |
6363 | 0 | abyOutput[2] = (byVal >> 5) & 0x1; |
6364 | 0 | abyOutput[3] = (byVal >> 4) & 0x1; |
6365 | 0 | abyOutput[4] = (byVal >> 3) & 0x1; |
6366 | 0 | abyOutput[5] = (byVal >> 2) & 0x1; |
6367 | 0 | abyOutput[6] = (byVal >> 1) & 0x1; |
6368 | 0 | abyOutput[7] = (byVal >> 0) & 0x1; |
6369 | 0 | } |
6370 | | |
6371 | | /************************************************************************/ |
6372 | | /* GDALExpandPackedBitsToByteAt0Or1() */ |
6373 | | /************************************************************************/ |
6374 | | |
6375 | | /** Expand packed-bits (ordered from most-significant bit to least one) |
6376 | | into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit |
6377 | | at 1 to a byte at 1. |
6378 | | |
6379 | | The function does (in a possibly more optimized way) the following: |
6380 | | \code{.cpp} |
6381 | | for (size_t i = 0; i < nInputBits; ++i ) |
6382 | | { |
6383 | | pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0; |
6384 | | } |
6385 | | \endcode |
6386 | | |
6387 | | @param pabyInput Input array of (nInputBits + 7) / 8 bytes. |
6388 | | @param pabyOutput Output array of nInputBits bytes. |
6389 | | @param nInputBits Number of valid bits in pabyInput. |
6390 | | |
6391 | | @since 3.11 |
6392 | | */ |
6393 | | |
6394 | | void GDALExpandPackedBitsToByteAt0Or1(const GByte *CPL_RESTRICT pabyInput, |
6395 | | GByte *CPL_RESTRICT pabyOutput, |
6396 | | size_t nInputBits) |
6397 | 0 | { |
6398 | 0 | const size_t nInputWholeBytes = nInputBits / 8; |
6399 | 0 | size_t iByte = 0; |
6400 | 0 | for (; iByte < nInputWholeBytes; ++iByte) |
6401 | 0 | { |
6402 | 0 | ExpandEightPackedBitsToByteAt1(pabyInput[iByte], pabyOutput); |
6403 | 0 | pabyOutput += 8; |
6404 | 0 | } |
6405 | 0 | for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit) |
6406 | 0 | { |
6407 | 0 | *pabyOutput = (pabyInput[iByte] >> (7 - iBit)) & 0x1; |
6408 | 0 | ++pabyOutput; |
6409 | 0 | } |
6410 | 0 | } |