/src/gdal/apps/gdalalg_raster_blend.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: GDAL |
4 | | * Purpose: "blend" step of "raster pipeline" |
5 | | * Author: Even Rouault <even dot rouault at spatialys.com> |
6 | | * |
7 | | ****************************************************************************** |
8 | | * Copyright (c) 2025, Even Rouault <even dot rouault at spatialys.com> |
9 | | * Copyright (c) 2009, Frank Warmerdam |
10 | | |
11 | | * SPDX-License-Identifier: MIT |
12 | | ****************************************************************************/ |
13 | | |
14 | | #include "gdalalg_raster_blend.h" |
15 | | |
16 | | #include "cpl_conv.h" |
17 | | #include "gdal_priv.h" |
18 | | #include "gdal_utils.h" |
19 | | |
20 | | #include <algorithm> |
21 | | #include <array> |
22 | | #include <cassert> |
23 | | #include <limits> |
24 | | #include <utility> |
25 | | |
26 | | #if defined(__x86_64) || defined(_M_X64) |
27 | | #define HAVE_SSE2 |
28 | | #include <immintrin.h> |
29 | | #endif |
30 | | #ifdef HAVE_SSE2 |
31 | | #include "gdalsse_priv.h" |
32 | | #endif |
33 | | |
34 | | //! @cond Doxygen_Suppress |
35 | | |
36 | | #ifndef _ |
37 | 0 | #define _(x) (x) |
38 | | #endif |
39 | | |
40 | | /************************************************************************/ |
41 | | /* CompositionModes */ |
42 | | /************************************************************************/ |
43 | | std::map<CompositionMode, std::string> CompositionModes() |
44 | 0 | { |
45 | 0 | return { |
46 | 0 | {CompositionMode::SRC_OVER, "src-over"}, |
47 | 0 | {CompositionMode::HSV_VALUE, "hsv-value"}, |
48 | 0 | {CompositionMode::MULTIPLY, "multiply"}, |
49 | 0 | {CompositionMode::SCREEN, "screen"}, |
50 | 0 | {CompositionMode::OVERLAY, "overlay"}, |
51 | 0 | {CompositionMode::HARD_LIGHT, "hard-light"}, |
52 | 0 | {CompositionMode::DARKEN, "darken"}, |
53 | 0 | {CompositionMode::LIGHTEN, "lighten"}, |
54 | 0 | {CompositionMode::COLOR_BURN, "color-burn"}, |
55 | 0 | {CompositionMode::COLOR_DODGE, "color-dodge"}, |
56 | 0 | }; |
57 | 0 | } |
58 | | |
59 | | /************************************************************************/ |
60 | | /* CompositionModeToString() */ |
61 | | /************************************************************************/ |
62 | | |
63 | | std::string CompositionModeToString(CompositionMode mode) |
64 | 0 | { |
65 | 0 | const auto &modes = CompositionModes(); |
66 | 0 | const auto &iter = modes.find(mode); |
67 | 0 | if (iter != modes.end()) |
68 | 0 | { |
69 | 0 | return iter->second; |
70 | 0 | } |
71 | 0 | CPLError(CE_Failure, CPLE_IllegalArg, |
72 | 0 | "Invalid composition mode value: %d, returning 'src-over'", |
73 | 0 | static_cast<int>(mode)); |
74 | 0 | return "src-over"; |
75 | 0 | } |
76 | | |
77 | | /************************************************************************/ |
78 | | /* CompositionModesIdentifiers() */ |
79 | | /************************************************************************/ |
80 | | |
81 | | std::vector<std::string> CompositionModesIdentifiers() |
82 | 0 | { |
83 | 0 | const auto &modes = CompositionModes(); |
84 | 0 | std::vector<std::string> identifiers; |
85 | 0 | for (const auto &pair : modes) |
86 | 0 | { |
87 | 0 | identifiers.push_back(pair.second); |
88 | 0 | } |
89 | 0 | return identifiers; |
90 | 0 | } |
91 | | |
92 | | /************************************************************************/ |
93 | | /* CompositionModeFromString() */ |
94 | | /************************************************************************/ |
95 | | |
96 | | CompositionMode CompositionModeFromString(const std::string &str) |
97 | 0 | { |
98 | 0 | const auto &modes = CompositionModes(); |
99 | 0 | auto iter = |
100 | 0 | std::find_if(modes.begin(), modes.end(), |
101 | 0 | [&str](const auto &pair) { return pair.second == str; }); |
102 | 0 | if (iter != modes.end()) |
103 | 0 | { |
104 | 0 | return iter->first; |
105 | 0 | } |
106 | 0 | CPLError(CE_Failure, CPLE_IllegalArg, |
107 | 0 | "Invalid composition identifier: %s, returning SRC_OVER", |
108 | 0 | str.c_str()); |
109 | 0 | return CompositionMode::SRC_OVER; |
110 | 0 | } |
111 | | |
112 | | /************************************************************************/ |
113 | | /* MinBandCountForCompositionMode() */ |
114 | | /************************************************************************/ |
115 | | |
116 | | //! Returns the minimum number of bands required for the given composition mode |
117 | | int MinBandCountForCompositionMode(CompositionMode mode) |
118 | 0 | { |
119 | 0 | switch (mode) |
120 | 0 | { |
121 | 0 | case CompositionMode::HSV_VALUE: |
122 | 0 | return 3; |
123 | 0 | case CompositionMode::SRC_OVER: |
124 | 0 | case CompositionMode::MULTIPLY: |
125 | 0 | case CompositionMode::SCREEN: |
126 | 0 | case CompositionMode::OVERLAY: |
127 | 0 | case CompositionMode::HARD_LIGHT: |
128 | 0 | case CompositionMode::DARKEN: |
129 | 0 | case CompositionMode::LIGHTEN: |
130 | 0 | case CompositionMode::COLOR_BURN: |
131 | 0 | case CompositionMode::COLOR_DODGE: |
132 | 0 | return 1; |
133 | 0 | } |
134 | | // unreachable... |
135 | 0 | return 1; |
136 | 0 | } |
137 | | |
138 | | /************************************************************************/ |
139 | | /* MaxBandCountForCompositionMode() */ |
140 | | /************************************************************************/ |
141 | | |
142 | | /** |
143 | | * Returns the maximum number of bands allowed for the given composition mode |
144 | | */ |
145 | | int MaxBandCountForCompositionMode(CompositionMode mode) |
146 | 0 | { |
147 | 0 | switch (mode) |
148 | 0 | { |
149 | 0 | case CompositionMode::SRC_OVER: |
150 | 0 | case CompositionMode::HSV_VALUE: |
151 | 0 | case CompositionMode::MULTIPLY: |
152 | 0 | case CompositionMode::SCREEN: |
153 | 0 | case CompositionMode::OVERLAY: |
154 | 0 | case CompositionMode::HARD_LIGHT: |
155 | 0 | case CompositionMode::DARKEN: |
156 | 0 | case CompositionMode::LIGHTEN: |
157 | 0 | case CompositionMode::COLOR_BURN: |
158 | 0 | case CompositionMode::COLOR_DODGE: |
159 | 0 | return 4; |
160 | 0 | } |
161 | | // unreachable... |
162 | 0 | return 4; |
163 | 0 | } |
164 | | |
165 | | /************************************************************************/ |
166 | | /* BandCountIsCompatibleWithCompositionMode() */ |
167 | | /************************************************************************/ |
168 | | |
169 | | //! Checks whether the number of bands is compatible with the given composition mode |
170 | | bool BandCountIsCompatibleWithCompositionMode(int bandCount, |
171 | | CompositionMode mode) |
172 | 0 | { |
173 | 0 | const int minBands = MinBandCountForCompositionMode(mode); |
174 | 0 | const int maxBands = MaxBandCountForCompositionMode(mode); |
175 | 0 | return minBands <= bandCount && bandCount <= maxBands; |
176 | 0 | } |
177 | | |
178 | | /************************************************************************/ |
179 | | /* MulScale255() */ |
180 | | /************************************************************************/ |
181 | | |
182 | | /** Multiply 2 bytes considering them as ratios with 255 = 100%, and return their product unscaled to [0, 255], by ceiling */ |
183 | | inline GByte MulScale255(GByte a, GByte b) |
184 | 0 | { |
185 | 0 | return static_cast<GByte>((a * b + 255) / 256); |
186 | 0 | } |
187 | | |
188 | | /************************************************************************/ |
189 | | /* ProcessAlphaChannels */ |
190 | | /************************************************************************/ |
191 | | |
192 | | static inline void ProcessAlphaChannels(size_t i, |
193 | | const GByte *CPL_RESTRICT pabyA, |
194 | | const GByte *CPL_RESTRICT pabyOverlayA, |
195 | | int nOpacity, bool bSwappedOpacity, |
196 | | GByte &outA, GByte &outOverlaA, |
197 | | GByte &outFinalAlpha) |
198 | 0 | { |
199 | | // Apply opacity depending on whether overlay and base were swapped |
200 | 0 | const GByte byOpacity = static_cast<GByte>(nOpacity); |
201 | 0 | if (!bSwappedOpacity) |
202 | 0 | { |
203 | 0 | outOverlaA = |
204 | 0 | pabyOverlayA ? MulScale255(pabyOverlayA[i], byOpacity) : byOpacity; |
205 | |
|
206 | 0 | outA = pabyA ? pabyA[i] : 255; |
207 | 0 | } |
208 | 0 | else |
209 | 0 | { |
210 | 0 | outOverlaA = pabyOverlayA ? pabyOverlayA[i] : 255; |
211 | 0 | if (outOverlaA != 255) |
212 | 0 | { |
213 | 0 | outOverlaA = pabyOverlayA ? pabyOverlayA[i] : 255; |
214 | 0 | } |
215 | 0 | outA = pabyA ? MulScale255(pabyA[i], byOpacity) : byOpacity; |
216 | 0 | } |
217 | | |
218 | | // Da' = Sa + Da - Sa.Da |
219 | 0 | outFinalAlpha = |
220 | 0 | static_cast<GByte>(outOverlaA + outA - MulScale255(outOverlaA, outA)); |
221 | 0 | } |
222 | | |
223 | | /************************************************************************/ |
224 | | /* DivScale255() */ |
225 | | /************************************************************************/ |
226 | | |
227 | | /** Divide 2 bytes considering them as ratios with 255 = 100%, and return their quotient unscaled to [0, 255], by flooring |
228 | | * \warning Caution: this function does not check that the result actually fits on a byte, and just casts the computed value to byte. |
229 | | */ |
230 | | inline GByte DivScale255(GByte a, GByte b) |
231 | 0 | { |
232 | 0 | if (a == 0) |
233 | 0 | { |
234 | 0 | return 0; |
235 | 0 | } |
236 | 0 | else if (b == 0) |
237 | 0 | { |
238 | 0 | return 255; |
239 | 0 | } |
240 | 0 | else |
241 | 0 | { |
242 | 0 | const int nRes = (a * 255) / b; |
243 | 0 | #ifdef DEBUG |
244 | 0 | CPLAssert(nRes <= 255); |
245 | 0 | #endif |
246 | 0 | return static_cast<GByte>(nRes); |
247 | 0 | } |
248 | 0 | } |
249 | | |
250 | | /************************************************************************/ |
251 | | /* PremultiplyChannels */ |
252 | | /************************************************************************/ |
253 | | |
254 | | //! Premultiply RGB channels by alpha (A) |
255 | | static inline void PremultiplyChannels(size_t i, const GByte *pabyR, |
256 | | const GByte *pabyG, const GByte *pabyB, |
257 | | GByte &outR, GByte &outG, GByte &outB, |
258 | | const GByte &A) |
259 | | |
260 | 0 | { |
261 | 0 | if (A == 255) |
262 | 0 | { |
263 | 0 | outR = pabyR ? pabyR[i] : 255; |
264 | 0 | outG = pabyG ? pabyG[i] : outR; // in case only R is present |
265 | 0 | outB = pabyB ? pabyB[i] : outR; // in case only R is present |
266 | 0 | } |
267 | 0 | else |
268 | 0 | { |
269 | 0 | outR = pabyR ? MulScale255(pabyR[i], A) : A; |
270 | 0 | outG = pabyG ? MulScale255(pabyG[i], A) |
271 | 0 | : outR; // in case only R is present |
272 | 0 | outB = pabyB ? MulScale255(pabyB[i], A) |
273 | 0 | : outR; // in case only R is present |
274 | 0 | } |
275 | 0 | } |
276 | | |
277 | | /************************************************************************/ |
278 | | /* GDALRasterBlendAlgorithm::GDALRasterBlendAlgorithm() */ |
279 | | /************************************************************************/ |
280 | | |
281 | | GDALRasterBlendAlgorithm::GDALRasterBlendAlgorithm(bool standaloneStep) |
282 | 0 | : GDALRasterPipelineStepAlgorithm( |
283 | 0 | NAME, DESCRIPTION, HELP_URL, |
284 | 0 | ConstructorOptions() |
285 | 0 | .SetStandaloneStep(standaloneStep) |
286 | 0 | .SetAddDefaultArguments(false) |
287 | 0 | .SetInputDatasetHelpMsg(_("Input raster dataset")) |
288 | 0 | .SetInputDatasetAlias("color-input") |
289 | 0 | .SetInputDatasetMetaVar("COLOR-INPUT") |
290 | 0 | .SetOutputDatasetHelpMsg(_("Output raster dataset"))) |
291 | 0 | { |
292 | 0 | const auto AddOverlayDatasetArg = [this]() |
293 | 0 | { |
294 | 0 | auto &arg = AddArg("overlay", 0, _("Overlay dataset"), |
295 | 0 | &m_overlayDataset, GDAL_OF_RASTER) |
296 | 0 | .SetPositional() |
297 | 0 | .SetRequired(); |
298 | |
|
299 | 0 | SetAutoCompleteFunctionForFilename(arg, GDAL_OF_RASTER); |
300 | 0 | }; |
301 | |
|
302 | 0 | if (standaloneStep) |
303 | 0 | { |
304 | 0 | AddRasterInputArgs(false, false); |
305 | 0 | AddOverlayDatasetArg(); |
306 | 0 | AddProgressArg(); |
307 | 0 | AddRasterOutputArgs(false); |
308 | 0 | } |
309 | 0 | else |
310 | 0 | { |
311 | 0 | AddRasterHiddenInputDatasetArg(); |
312 | 0 | AddOverlayDatasetArg(); |
313 | 0 | } |
314 | |
|
315 | 0 | const std::vector<std::string> compositionModeChoices{ |
316 | 0 | CompositionModesIdentifiers()}; |
317 | 0 | AddArg("operator", 0, _("Composition operator"), &m_operatorIdentifier) |
318 | 0 | .SetChoices(compositionModeChoices) |
319 | 0 | .SetDefault(CompositionModeToString(CompositionMode::SRC_OVER)) |
320 | 0 | .AddAction( |
321 | 0 | [this]() |
322 | 0 | { m_operator = CompositionModeFromString(m_operatorIdentifier); }); |
323 | |
|
324 | 0 | AddArg("opacity", 0, |
325 | 0 | _("Opacity percentage to apply to the overlay dataset (0=fully " |
326 | 0 | "transparent, 100=full use of overlay opacity)"), |
327 | 0 | &m_opacity) |
328 | 0 | .SetDefault(m_opacity) |
329 | 0 | .SetMinValueIncluded(0) |
330 | 0 | .SetMaxValueIncluded(OPACITY_INPUT_RANGE); |
331 | |
|
332 | 0 | AddValidationAction([this]() { return ValidateGlobal(); }); |
333 | 0 | } |
334 | | |
335 | | namespace |
336 | | { |
337 | | |
338 | | /************************************************************************/ |
339 | | /* BlendDataset */ |
340 | | /************************************************************************/ |
341 | | |
342 | | class BlendDataset final : public GDALDataset |
343 | | { |
344 | | public: |
345 | | BlendDataset(GDALDataset &oColorDS, GDALDataset &oOverlayDS, |
346 | | const CompositionMode eOperator, int nOpacity255Scale, |
347 | | bool bSwappedOpacity); |
348 | | ~BlendDataset() override; |
349 | | |
350 | | CPLErr GetGeoTransform(GDALGeoTransform >) const override |
351 | 0 | { |
352 | 0 | return m_oColorDS.GetGeoTransform(gt); |
353 | 0 | } |
354 | | |
355 | | const OGRSpatialReference *GetSpatialRef() const override |
356 | 0 | { |
357 | 0 | return m_oColorDS.GetSpatialRef(); |
358 | 0 | } |
359 | | |
360 | | bool AcquireSourcePixels(int nXOff, int nYOff, int nXSize, int nYSize, |
361 | | int nBufXSize, int nBufYSize, |
362 | | GDALRasterIOExtraArg *psExtraArg); |
363 | | |
364 | | protected: |
365 | | CPLErr IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, |
366 | | int nYSize, void *pData, int nBufXSize, int nBufYSize, |
367 | | GDALDataType eBufType, int nBandCount, |
368 | | BANDMAP_TYPE panBandMap, GSpacing nPixelSpace, |
369 | | GSpacing nLineSpace, GSpacing nBandSpace, |
370 | | GDALRasterIOExtraArg *psExtraArg) override; |
371 | | |
372 | | private: |
373 | | friend class BlendBand; |
374 | | GDALDataset &m_oColorDS; |
375 | | GDALDataset &m_oOverlayDS; |
376 | | const CompositionMode m_operator; |
377 | | const int m_opacity255Scale; |
378 | | std::vector<std::unique_ptr<BlendDataset>> m_apoOverviews{}; |
379 | | int m_nCachedXOff = 0; |
380 | | int m_nCachedYOff = 0; |
381 | | int m_nCachedXSize = 0; |
382 | | int m_nCachedYSize = 0; |
383 | | int m_nCachedBufXSize = 0; |
384 | | int m_nCachedBufYSize = 0; |
385 | | GDALRasterIOExtraArg m_sCachedExtraArg{}; |
386 | | std::vector<GByte> m_abyBuffer{}; |
387 | | bool m_ioError = false; |
388 | | bool m_bSwappedOpacity = false; |
389 | | }; |
390 | | |
391 | | /************************************************************************/ |
392 | | /* rgb_to_hs() */ |
393 | | /************************************************************************/ |
394 | | |
395 | | // rgb comes in as [r,g,b] with values in the range [0,255]. The returned |
396 | | // values will be with hue and saturation in the range [0,1]. |
397 | | |
398 | | // Derived from hsv_merge.py |
399 | | |
400 | | static void rgb_to_hs(int r, int g, int b, float *h, float *s) |
401 | 0 | { |
402 | 0 | int minc, maxc; |
403 | 0 | if (r <= g) |
404 | 0 | { |
405 | 0 | if (r <= b) |
406 | 0 | { |
407 | 0 | minc = r; |
408 | 0 | maxc = std::max(g, b); |
409 | 0 | } |
410 | 0 | else /* b < r */ |
411 | 0 | { |
412 | 0 | minc = b; |
413 | 0 | maxc = g; |
414 | 0 | } |
415 | 0 | } |
416 | 0 | else /* g < r */ |
417 | 0 | { |
418 | 0 | if (g <= b) |
419 | 0 | { |
420 | 0 | minc = g; |
421 | 0 | maxc = std::max(r, b); |
422 | 0 | } |
423 | 0 | else /* b < g */ |
424 | 0 | { |
425 | 0 | minc = b; |
426 | 0 | maxc = r; |
427 | 0 | } |
428 | 0 | } |
429 | 0 | const int maxc_minus_minc = maxc - minc; |
430 | 0 | if (s) |
431 | 0 | *s = maxc_minus_minc / static_cast<float>(std::max(1, maxc)); |
432 | 0 | if (h) |
433 | 0 | { |
434 | 0 | const float maxc_minus_minc_times_6 = |
435 | 0 | maxc_minus_minc == 0 ? 1.0f : 6.0f * maxc_minus_minc; |
436 | 0 | if (maxc == b) |
437 | 0 | *h = 4.0f / 6.0f + (r - g) / maxc_minus_minc_times_6; |
438 | 0 | else if (maxc == g) |
439 | 0 | *h = 2.0f / 6.0f + (b - r) / maxc_minus_minc_times_6; |
440 | 0 | else |
441 | 0 | { |
442 | 0 | const float tmp = (g - b) / maxc_minus_minc_times_6; |
443 | 0 | *h = tmp < 0.0f ? tmp + 1.0f : tmp; |
444 | 0 | } |
445 | 0 | } |
446 | 0 | } |
447 | | |
448 | | /************************************************************************/ |
449 | | /* choose_among() */ |
450 | | /************************************************************************/ |
451 | | |
452 | | template <typename T> |
453 | | static inline T choose_among(int idx, T a0, T a1, T a2, T a3, T a4, T a5) |
454 | 0 | { |
455 | 0 | switch (idx) |
456 | 0 | { |
457 | 0 | case 0: |
458 | 0 | return a0; |
459 | 0 | case 1: |
460 | 0 | return a1; |
461 | 0 | case 2: |
462 | 0 | return a2; |
463 | 0 | case 3: |
464 | 0 | return a3; |
465 | 0 | case 4: |
466 | 0 | return a4; |
467 | 0 | default: |
468 | 0 | break; |
469 | 0 | } |
470 | 0 | return a5; |
471 | 0 | } |
472 | | |
473 | | /************************************************************************/ |
474 | | /* hsv_to_rgb() */ |
475 | | /************************************************************************/ |
476 | | |
477 | | // hsv comes in as [h,s,v] with hue and saturation in the range [0,1], |
478 | | // but value in the range [0,255]. |
479 | | |
480 | | // Derived from hsv_merge.py |
481 | | |
482 | | static void hsv_to_rgb(float h, float s, GByte v, GByte *r, GByte *g, GByte *b) |
483 | 0 | { |
484 | 0 | const int i = static_cast<int>(6.0f * h); |
485 | 0 | const float f = 6.0f * h - i; |
486 | 0 | const GByte p = static_cast<GByte>(v * (1.0f - s) + 0.5f); |
487 | 0 | const GByte q = static_cast<GByte>(v * (1.0f - s * f) + 0.5f); |
488 | 0 | const GByte t = static_cast<GByte>(v * (1.0f - s * (1.0f - f)) + 0.5f); |
489 | |
|
490 | 0 | if (r) |
491 | 0 | *r = choose_among(i, v, q, p, p, t, v); |
492 | 0 | if (g) |
493 | 0 | *g = choose_among(i, t, v, v, q, p, p); |
494 | 0 | if (b) |
495 | 0 | *b = choose_among(i, p, p, t, v, v, q); |
496 | 0 | } |
497 | | |
498 | | /************************************************************************/ |
499 | | /* XMM_RGB_to_HS() */ |
500 | | /************************************************************************/ |
501 | | |
502 | | #ifdef HAVE_SSE2 |
503 | | static inline void |
504 | | XMM_RGB_to_HS(const GByte *CPL_RESTRICT pInR, const GByte *CPL_RESTRICT pInG, |
505 | | const GByte *CPL_RESTRICT pInB, const XMMReg4Float &zero, |
506 | | const XMMReg4Float &one, const XMMReg4Float &six, |
507 | | const XMMReg4Float &two_over_six, |
508 | | const XMMReg4Float &four_over_six, XMMReg4Float &h, |
509 | | XMMReg4Float &s) |
510 | 0 | { |
511 | 0 | const auto r = XMMReg4Float::Load4Val(pInR); |
512 | 0 | const auto g = XMMReg4Float::Load4Val(pInG); |
513 | 0 | const auto b = XMMReg4Float::Load4Val(pInB); |
514 | 0 | const auto minc = XMMReg4Float::Min(XMMReg4Float::Min(r, g), b); |
515 | 0 | const auto maxc = XMMReg4Float::Max(XMMReg4Float::Max(r, g), b); |
516 | 0 | const auto max_minus_min = maxc - minc; |
517 | 0 | s = max_minus_min / XMMReg4Float::Max(one, maxc); |
518 | 0 | const auto inv_max_minus_min_times_6_0 = |
519 | 0 | XMMReg4Float::Ternary(XMMReg4Float::Equals(max_minus_min, zero), one, |
520 | 0 | six * max_minus_min) |
521 | 0 | .inverse(); |
522 | 0 | const auto tmp = (g - b) * inv_max_minus_min_times_6_0; |
523 | 0 | h = XMMReg4Float::Ternary( |
524 | 0 | XMMReg4Float::Equals(maxc, b), |
525 | 0 | four_over_six + (r - g) * inv_max_minus_min_times_6_0, |
526 | 0 | XMMReg4Float::Ternary( |
527 | 0 | XMMReg4Float::Equals(maxc, g), |
528 | 0 | two_over_six + (b - r) * inv_max_minus_min_times_6_0, |
529 | 0 | XMMReg4Float::Ternary(XMMReg4Float::Lesser(tmp, zero), tmp + one, |
530 | 0 | tmp))); |
531 | 0 | } |
532 | | #endif |
533 | | |
534 | | /************************************************************************/ |
535 | | /* patch_value_line() */ |
536 | | /************************************************************************/ |
537 | | |
538 | | static |
539 | | #ifdef __GNUC__ |
540 | | __attribute__((__noinline__)) |
541 | | #endif |
542 | | void patch_value_line(int nCount, const GByte *CPL_RESTRICT pInR, |
543 | | const GByte *CPL_RESTRICT pInG, |
544 | | const GByte *CPL_RESTRICT pInB, |
545 | | const GByte *CPL_RESTRICT pInGray, |
546 | | GByte *CPL_RESTRICT pOutR, GByte *CPL_RESTRICT pOutG, |
547 | | GByte *CPL_RESTRICT pOutB) |
548 | 0 | { |
549 | 0 | int i = 0; |
550 | 0 | #ifdef HAVE_SSE2 |
551 | 0 | const auto zero = XMMReg4Float::Zero(); |
552 | 0 | const auto one = XMMReg4Float::Set1(1.0f); |
553 | 0 | const auto six = XMMReg4Float::Set1(6.0f); |
554 | 0 | const auto two_over_six = XMMReg4Float::Set1(2.0f / 6.0f); |
555 | 0 | const auto four_over_six = two_over_six + two_over_six; |
556 | |
|
557 | 0 | constexpr int ELTS = 8; |
558 | 0 | for (; i + (ELTS - 1) < nCount; i += ELTS) |
559 | 0 | { |
560 | 0 | XMMReg4Float h0, s0; |
561 | 0 | XMM_RGB_to_HS(pInR + i, pInG + i, pInB + i, zero, one, six, |
562 | 0 | two_over_six, four_over_six, h0, s0); |
563 | 0 | XMMReg4Float h1, s1; |
564 | 0 | XMM_RGB_to_HS(pInR + i + ELTS / 2, pInG + i + ELTS / 2, |
565 | 0 | pInB + i + ELTS / 2, zero, one, six, two_over_six, |
566 | 0 | four_over_six, h1, s1); |
567 | |
|
568 | 0 | XMMReg4Float v0, v1; |
569 | 0 | XMMReg4Float::Load8Val(pInGray + i, v0, v1); |
570 | |
|
571 | 0 | const auto half = XMMReg4Float::Set1(0.5f); |
572 | 0 | const auto six_h0 = six * h0; |
573 | 0 | const auto idx0 = six_h0.truncate_to_int(); |
574 | 0 | const auto f0 = six_h0 - idx0.cast_to_float(); |
575 | 0 | const auto p0 = (v0 * (one - s0) + half).truncate_to_int(); |
576 | 0 | const auto q0 = (v0 * (one - s0 * f0) + half).truncate_to_int(); |
577 | 0 | const auto t0 = (v0 * (one - s0 * (one - f0)) + half).truncate_to_int(); |
578 | |
|
579 | 0 | const auto six_h1 = six * h1; |
580 | 0 | const auto idx1 = six_h1.truncate_to_int(); |
581 | 0 | const auto f1 = six_h1 - idx1.cast_to_float(); |
582 | 0 | const auto p1 = (v1 * (one - s1) + half).truncate_to_int(); |
583 | 0 | const auto q1 = (v1 * (one - s1 * f1) + half).truncate_to_int(); |
584 | 0 | const auto t1 = (v1 * (one - s1 * (one - f1)) + half).truncate_to_int(); |
585 | |
|
586 | 0 | const auto idx = XMMReg8Byte::Pack(idx0, idx1); |
587 | 0 | const auto v = |
588 | 0 | XMMReg8Byte::Pack(v0.truncate_to_int(), v1.truncate_to_int()); |
589 | 0 | const auto p = XMMReg8Byte::Pack(p0, p1); |
590 | 0 | const auto q = XMMReg8Byte::Pack(q0, q1); |
591 | 0 | const auto t = XMMReg8Byte::Pack(t0, t1); |
592 | |
|
593 | 0 | const auto equalsTo0 = XMMReg8Byte::Equals(idx, XMMReg8Byte::Zero()); |
594 | 0 | const auto one8Byte = XMMReg8Byte::Set1(1); |
595 | 0 | const auto equalsTo1 = XMMReg8Byte::Equals(idx, one8Byte); |
596 | 0 | const auto two8Byte = one8Byte + one8Byte; |
597 | 0 | const auto equalsTo2 = XMMReg8Byte::Equals(idx, two8Byte); |
598 | 0 | const auto four8Byte = two8Byte + two8Byte; |
599 | 0 | const auto equalsTo4 = XMMReg8Byte::Equals(idx, four8Byte); |
600 | 0 | const auto equalsTo3 = XMMReg8Byte::Equals(idx, four8Byte - one8Byte); |
601 | | // clang-format off |
602 | 0 | if (pOutR) |
603 | 0 | { |
604 | 0 | const auto out_r = |
605 | 0 | XMMReg8Byte::Ternary(equalsTo0, v, |
606 | 0 | XMMReg8Byte::Ternary(equalsTo1, q, |
607 | 0 | XMMReg8Byte::Ternary(XMMReg8Byte::Or(equalsTo2, equalsTo3), p, |
608 | 0 | XMMReg8Byte::Ternary(equalsTo4, t, v)))); |
609 | 0 | out_r.Store8Val(pOutR + i); |
610 | 0 | } |
611 | 0 | if (pOutG) |
612 | 0 | { |
613 | 0 | const auto out_g = |
614 | 0 | XMMReg8Byte::Ternary(equalsTo0, t, |
615 | 0 | XMMReg8Byte::Ternary(XMMReg8Byte::Or(equalsTo1, equalsTo2), v, |
616 | 0 | XMMReg8Byte::Ternary(equalsTo3, q, p))); |
617 | 0 | out_g.Store8Val(pOutG + i); |
618 | 0 | } |
619 | 0 | if (pOutB) |
620 | 0 | { |
621 | 0 | const auto out_b = |
622 | 0 | XMMReg8Byte::Ternary(XMMReg8Byte::Or(equalsTo0, equalsTo1), p, |
623 | 0 | XMMReg8Byte::Ternary(equalsTo2, t, |
624 | 0 | XMMReg8Byte::Ternary(XMMReg8Byte::Or(equalsTo3, equalsTo4), |
625 | 0 | v, q))); |
626 | 0 | out_b.Store8Val(pOutB + i); |
627 | 0 | } |
628 | | // clang-format on |
629 | 0 | } |
630 | 0 | #endif |
631 | |
|
632 | 0 | for (; i < nCount; ++i) |
633 | 0 | { |
634 | 0 | float h, s; |
635 | 0 | rgb_to_hs(pInR[i], pInG[i], pInB[i], &h, &s); |
636 | 0 | hsv_to_rgb(h, s, pInGray[i], pOutR ? pOutR + i : nullptr, |
637 | 0 | pOutG ? pOutG + i : nullptr, pOutB ? pOutB + i : nullptr); |
638 | 0 | } |
639 | 0 | } |
640 | | |
641 | | /************************************************************************/ |
642 | | /* BlendBand */ |
643 | | /************************************************************************/ |
644 | | |
645 | | class BlendBand final : public GDALRasterBand |
646 | | { |
647 | | public: |
648 | | BlendBand(BlendDataset &oBlendDataset, int nBandIn) |
649 | 0 | : m_oBlendDataset(oBlendDataset) |
650 | 0 | { |
651 | 0 | nBand = nBandIn; |
652 | 0 | nRasterXSize = oBlendDataset.GetRasterXSize(); |
653 | 0 | nRasterYSize = oBlendDataset.GetRasterYSize(); |
654 | 0 | oBlendDataset.m_oColorDS.GetRasterBand(1)->GetBlockSize(&nBlockXSize, |
655 | 0 | &nBlockYSize); |
656 | 0 | eDataType = GDT_UInt8; |
657 | 0 | } |
658 | | |
659 | | GDALColorInterp GetColorInterpretation() override |
660 | 0 | { |
661 | 0 | if (m_oBlendDataset.GetRasterCount() <= 2 && nBand == 1) |
662 | 0 | return GCI_GrayIndex; |
663 | 0 | else if (m_oBlendDataset.GetRasterCount() == 2 || nBand == 4) |
664 | 0 | return GCI_AlphaBand; |
665 | 0 | else |
666 | 0 | return static_cast<GDALColorInterp>(GCI_RedBand + nBand - 1); |
667 | 0 | } |
668 | | |
669 | | int GetOverviewCount() override |
670 | 0 | { |
671 | 0 | return static_cast<int>(m_oBlendDataset.m_apoOverviews.size()); |
672 | 0 | } |
673 | | |
674 | | GDALRasterBand *GetOverview(int idx) override |
675 | 0 | { |
676 | 0 | return idx >= 0 && idx < GetOverviewCount() |
677 | 0 | ? m_oBlendDataset.m_apoOverviews[idx]->GetRasterBand(nBand) |
678 | 0 | : nullptr; |
679 | 0 | } |
680 | | |
681 | | protected: |
682 | | CPLErr IReadBlock(int nBlockXOff, int nBlockYOff, void *pData) override |
683 | 0 | { |
684 | 0 | int nReqXSize = 0; |
685 | 0 | int nReqYSize = 0; |
686 | 0 | GetActualBlockSize(nBlockXOff, nBlockYOff, &nReqXSize, &nReqYSize); |
687 | 0 | return RasterIO(GF_Read, nBlockXOff * nBlockXSize, |
688 | 0 | nBlockYOff * nBlockYSize, nReqXSize, nReqYSize, pData, |
689 | 0 | nReqXSize, nReqYSize, GDT_UInt8, 1, nBlockXSize, |
690 | 0 | nullptr); |
691 | 0 | } |
692 | | |
693 | | CPLErr IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, |
694 | | int nYSize, void *pData, int nBufXSize, int nBufYSize, |
695 | | GDALDataType eBufType, GSpacing nPixelSpace, |
696 | | GSpacing nLineSpace, |
697 | | GDALRasterIOExtraArg *psExtraArg) override; |
698 | | |
699 | | private: |
700 | | BlendDataset &m_oBlendDataset; |
701 | | }; |
702 | | |
703 | | /************************************************************************/ |
704 | | /* BlendDataset::BlendDataset() */ |
705 | | /************************************************************************/ |
706 | | |
707 | | BlendDataset::BlendDataset(GDALDataset &oColorDS, GDALDataset &oOverlayDS, |
708 | | const CompositionMode eOperator, |
709 | | int nOpacity255Scale, bool bSwappedOpacity) |
710 | 0 | : m_oColorDS(oColorDS), m_oOverlayDS(oOverlayDS), m_operator(eOperator), |
711 | 0 | m_opacity255Scale(nOpacity255Scale), m_bSwappedOpacity(bSwappedOpacity) |
712 | 0 | { |
713 | 0 | m_oColorDS.Reference(); |
714 | 0 | m_oOverlayDS.Reference(); |
715 | |
|
716 | 0 | CPLAssert(oColorDS.GetRasterXSize() == oOverlayDS.GetRasterXSize()); |
717 | 0 | CPLAssert(oColorDS.GetRasterYSize() == oOverlayDS.GetRasterYSize()); |
718 | 0 | nRasterXSize = oColorDS.GetRasterXSize(); |
719 | 0 | nRasterYSize = oColorDS.GetRasterYSize(); |
720 | 0 | const int nOvrCount = oOverlayDS.GetRasterBand(1)->GetOverviewCount(); |
721 | 0 | bool bCanCreateOvr = true; |
722 | 0 | for (int iBand = 1; iBand <= oColorDS.GetRasterCount(); ++iBand) |
723 | 0 | { |
724 | 0 | SetBand(iBand, std::make_unique<BlendBand>(*this, iBand)); |
725 | 0 | bCanCreateOvr = |
726 | 0 | bCanCreateOvr && |
727 | 0 | (iBand > oColorDS.GetRasterCount() || |
728 | 0 | oColorDS.GetRasterBand(iBand)->GetOverviewCount() == nOvrCount) && |
729 | 0 | (iBand > oOverlayDS.GetRasterCount() || |
730 | 0 | oOverlayDS.GetRasterBand(iBand)->GetOverviewCount() == nOvrCount); |
731 | 0 | const int nColorBandxIdx = |
732 | 0 | iBand <= oColorDS.GetRasterCount() ? iBand : 1; |
733 | 0 | const int nOverlayBandIdx = |
734 | 0 | iBand <= oOverlayDS.GetRasterCount() ? iBand : 1; |
735 | 0 | for (int iOvr = 0; iOvr < nOvrCount && bCanCreateOvr; ++iOvr) |
736 | 0 | { |
737 | 0 | const auto poColorOvrBand = |
738 | 0 | oColorDS.GetRasterBand(nColorBandxIdx)->GetOverview(iOvr); |
739 | 0 | const auto poGSOvrBand = |
740 | 0 | oOverlayDS.GetRasterBand(nOverlayBandIdx)->GetOverview(iOvr); |
741 | 0 | bCanCreateOvr = |
742 | 0 | poColorOvrBand->GetDataset() != &oColorDS && |
743 | 0 | poColorOvrBand->GetDataset() == oColorDS.GetRasterBand(1) |
744 | 0 | ->GetOverview(iOvr) |
745 | 0 | ->GetDataset() && |
746 | 0 | poGSOvrBand->GetDataset() != &oOverlayDS && |
747 | 0 | poGSOvrBand->GetDataset() == oOverlayDS.GetRasterBand(1) |
748 | 0 | ->GetOverview(iOvr) |
749 | 0 | ->GetDataset() && |
750 | 0 | poColorOvrBand->GetXSize() == poGSOvrBand->GetXSize() && |
751 | 0 | poColorOvrBand->GetYSize() == poGSOvrBand->GetYSize(); |
752 | 0 | } |
753 | 0 | } |
754 | |
|
755 | 0 | SetDescription(CPLSPrintf("Blend %s width %s", m_oColorDS.GetDescription(), |
756 | 0 | m_oOverlayDS.GetDescription())); |
757 | 0 | if (nBands > 1) |
758 | 0 | { |
759 | 0 | SetMetadataItem("INTERLEAVE", "PIXEL", "IMAGE_STRUCTURE"); |
760 | 0 | } |
761 | |
|
762 | 0 | if (bCanCreateOvr) |
763 | 0 | { |
764 | 0 | for (int iOvr = 0; iOvr < nOvrCount; ++iOvr) |
765 | 0 | { |
766 | 0 | m_apoOverviews.push_back(std::make_unique<BlendDataset>( |
767 | 0 | *(oColorDS.GetRasterBand(1)->GetOverview(iOvr)->GetDataset()), |
768 | 0 | *(oOverlayDS.GetRasterBand(1)->GetOverview(iOvr)->GetDataset()), |
769 | 0 | m_operator, nOpacity255Scale, bSwappedOpacity)); |
770 | 0 | } |
771 | 0 | } |
772 | 0 | } |
773 | | |
774 | | /************************************************************************/ |
775 | | /* ~BlendDataset::BlendDataset() */ |
776 | | /************************************************************************/ |
777 | | |
778 | | BlendDataset::~BlendDataset() |
779 | 0 | { |
780 | 0 | m_oColorDS.ReleaseRef(); |
781 | 0 | m_oOverlayDS.ReleaseRef(); |
782 | 0 | } |
783 | | |
784 | | /************************************************************************/ |
785 | | /* BlendDataset::AcquireSourcePixels() */ |
786 | | /************************************************************************/ |
787 | | |
788 | | bool BlendDataset::AcquireSourcePixels(int nXOff, int nYOff, int nXSize, |
789 | | int nYSize, int nBufXSize, int nBufYSize, |
790 | | GDALRasterIOExtraArg *psExtraArg) |
791 | 0 | { |
792 | 0 | if (nXOff == m_nCachedXOff && nYOff == m_nCachedYOff && |
793 | 0 | nXSize == m_nCachedXSize && nYSize == m_nCachedYSize && |
794 | 0 | nBufXSize == m_nCachedBufXSize && nBufYSize == m_nCachedBufYSize && |
795 | 0 | psExtraArg->eResampleAlg == m_sCachedExtraArg.eResampleAlg && |
796 | 0 | psExtraArg->bFloatingPointWindowValidity == |
797 | 0 | m_sCachedExtraArg.bFloatingPointWindowValidity && |
798 | 0 | (!psExtraArg->bFloatingPointWindowValidity || |
799 | 0 | (psExtraArg->dfXOff == m_sCachedExtraArg.dfXOff && |
800 | 0 | psExtraArg->dfYOff == m_sCachedExtraArg.dfYOff && |
801 | 0 | psExtraArg->dfXSize == m_sCachedExtraArg.dfXSize && |
802 | 0 | psExtraArg->dfYSize == m_sCachedExtraArg.dfYSize))) |
803 | 0 | { |
804 | 0 | return !m_abyBuffer.empty(); |
805 | 0 | } |
806 | | |
807 | 0 | const int nColorCount = m_oColorDS.GetRasterCount(); |
808 | 0 | const int nOverlayCount = m_oOverlayDS.GetRasterCount(); |
809 | 0 | const int nCompsInBuffer = nColorCount + nOverlayCount; |
810 | 0 | assert(nCompsInBuffer > 0); |
811 | | |
812 | 0 | if (static_cast<size_t>(nBufXSize) > |
813 | 0 | std::numeric_limits<size_t>::max() / nBufYSize / nCompsInBuffer) |
814 | 0 | { |
815 | 0 | CPLError(CE_Failure, CPLE_OutOfMemory, |
816 | 0 | "Out of memory allocating temporary buffer"); |
817 | 0 | m_abyBuffer.clear(); |
818 | 0 | m_ioError = true; |
819 | 0 | return false; |
820 | 0 | } |
821 | | |
822 | 0 | const size_t nPixelCount = static_cast<size_t>(nBufXSize) * nBufYSize; |
823 | 0 | try |
824 | 0 | { |
825 | 0 | if (m_abyBuffer.size() < nPixelCount * nCompsInBuffer) |
826 | 0 | m_abyBuffer.resize(nPixelCount * nCompsInBuffer); |
827 | 0 | } |
828 | 0 | catch (const std::exception &) |
829 | 0 | { |
830 | 0 | CPLError(CE_Failure, CPLE_OutOfMemory, |
831 | 0 | "Out of memory allocating temporary buffer"); |
832 | 0 | m_abyBuffer.clear(); |
833 | 0 | m_ioError = true; |
834 | 0 | return false; |
835 | 0 | } |
836 | | |
837 | 0 | const bool bOK = |
838 | 0 | (m_oColorDS.RasterIO(GF_Read, nXOff, nYOff, nXSize, nYSize, |
839 | 0 | m_abyBuffer.data(), nBufXSize, nBufYSize, |
840 | 0 | GDT_UInt8, nColorCount, nullptr, 1, nBufXSize, |
841 | 0 | static_cast<GSpacing>(nPixelCount), |
842 | 0 | psExtraArg) == CE_None && |
843 | 0 | m_oOverlayDS.RasterIO( |
844 | 0 | GF_Read, nXOff, nYOff, nXSize, nYSize, |
845 | 0 | m_abyBuffer.data() + nPixelCount * nColorCount, nBufXSize, |
846 | 0 | nBufYSize, GDT_UInt8, nOverlayCount, nullptr, 1, nBufXSize, |
847 | 0 | static_cast<GSpacing>(nPixelCount), psExtraArg) == CE_None); |
848 | 0 | if (bOK) |
849 | 0 | { |
850 | 0 | m_nCachedXOff = nXOff; |
851 | 0 | m_nCachedYOff = nYOff; |
852 | 0 | m_nCachedXSize = nXSize; |
853 | 0 | m_nCachedYSize = nYSize; |
854 | 0 | m_nCachedBufXSize = nBufXSize; |
855 | 0 | m_nCachedBufYSize = nBufYSize; |
856 | 0 | m_sCachedExtraArg = *psExtraArg; |
857 | 0 | } |
858 | 0 | else |
859 | 0 | { |
860 | 0 | m_abyBuffer.clear(); |
861 | 0 | m_ioError = true; |
862 | 0 | } |
863 | 0 | return bOK; |
864 | 0 | } |
865 | | |
866 | | /************************************************************************/ |
867 | | /* gTabInvDstA */ |
868 | | /************************************************************************/ |
869 | | |
870 | | constexpr int SHIFT_DIV_DSTA = 8; |
871 | | |
872 | | // Table of (255 * 256 + k/2) / k values for k in [0,255] |
873 | | constexpr auto gTabInvDstA = []() |
874 | | { |
875 | | std::array<uint16_t, 256> arr{}; |
876 | | |
877 | | arr[0] = 0; |
878 | | for (int k = 1; k <= 255; ++k) |
879 | | { |
880 | | arr[k] = static_cast<uint16_t>(((255 << SHIFT_DIV_DSTA) + (k / 2)) / k); |
881 | | } |
882 | | |
883 | | return arr; |
884 | | }(); |
885 | | |
886 | | /************************************************************************/ |
887 | | /* BlendMultiply_Generic */ |
888 | | /************************************************************************/ |
889 | | |
890 | | static void BlendMultiply_Generic( |
891 | | const GByte *CPL_RESTRICT pabyR, const GByte *CPL_RESTRICT pabyG, |
892 | | const GByte *CPL_RESTRICT pabyB, const GByte *CPL_RESTRICT pabyA, |
893 | | const GByte *CPL_RESTRICT pabyOverlayR, |
894 | | const GByte *CPL_RESTRICT pabyOverlayG, |
895 | | const GByte *CPL_RESTRICT pabyOverlayB, |
896 | | const GByte *CPL_RESTRICT pabyOverlayA, GByte *CPL_RESTRICT pabyDst, |
897 | | GSpacing nPixelSpace, GSpacing nBandSpace, size_t i, size_t N, |
898 | | GByte nOpacity, int nOutputBands, bool bSwappedOpacity) |
899 | 0 | { |
900 | | |
901 | | // Generic formulas from Mapserver |
902 | | // Dca' = Sca.Dca + Sca.(1 - Da) + Dca.(1 - Sa) |
903 | | // Da' = Sa + Da - Sa.Da |
904 | | |
905 | | // TODO: optimize for the various cases (with/without alpha, grayscale/RGB) |
906 | | // TODO: optimize mathematically to avoid redundant computations |
907 | |
|
908 | 0 | GSpacing nDstOffset = 0; |
909 | |
|
910 | 0 | for (; i < N; ++i) |
911 | 0 | { |
912 | |
|
913 | 0 | GByte nOverlayR, nOverlayG, nOverlayB, nOverlayA; |
914 | 0 | GByte nR, nG, nB, nA; |
915 | 0 | GByte nFinalAlpha; |
916 | 0 | ProcessAlphaChannels(i, pabyA, pabyOverlayA, nOpacity, bSwappedOpacity, |
917 | 0 | nA, nOverlayA, nFinalAlpha); |
918 | 0 | PremultiplyChannels(i, pabyR, pabyG, pabyB, nR, nG, nB, nA); |
919 | 0 | PremultiplyChannels(i, pabyOverlayR, pabyOverlayG, pabyOverlayB, |
920 | 0 | nOverlayR, nOverlayG, nOverlayB, nOverlayA); |
921 | | |
922 | | // Result |
923 | 0 | auto processComponent = [&nFinalAlpha](GByte C, GByte A, GByte OverlayC, |
924 | 0 | GByte OverlayA) -> GByte |
925 | 0 | { |
926 | 0 | return DivScale255((MulScale255(C, OverlayC) + |
927 | 0 | MulScale255(C, 255 - OverlayA) + |
928 | 0 | MulScale255(OverlayC, 255 - A)), |
929 | 0 | nFinalAlpha); |
930 | 0 | }; |
931 | |
|
932 | 0 | pabyDst[nDstOffset] = processComponent(nR, nA, nOverlayR, nOverlayA); |
933 | | |
934 | | // Grayscale with alpha |
935 | 0 | if (nOutputBands == 2) |
936 | 0 | { |
937 | 0 | pabyDst[nDstOffset + nBandSpace] = nFinalAlpha; |
938 | 0 | } |
939 | 0 | else |
940 | 0 | { |
941 | | // RBG and RGBA |
942 | 0 | if (nOutputBands >= 3) |
943 | 0 | { |
944 | 0 | pabyDst[nDstOffset + nBandSpace] = |
945 | 0 | processComponent(nG, nA, nOverlayG, nOverlayA); |
946 | 0 | pabyDst[nDstOffset + 2 * nBandSpace] = |
947 | 0 | processComponent(nB, nA, nOverlayB, nOverlayA); |
948 | 0 | } |
949 | | |
950 | | // RGBA |
951 | 0 | if (nOutputBands == 4) |
952 | 0 | { |
953 | 0 | pabyDst[nDstOffset + 3 * nBandSpace] = nFinalAlpha; |
954 | 0 | } |
955 | 0 | } |
956 | 0 | nDstOffset += nPixelSpace; |
957 | 0 | } |
958 | 0 | } |
959 | | |
960 | | /************************************************************************/ |
961 | | /* BlendScreen_Generic */ |
962 | | /************************************************************************/ |
963 | | |
964 | | static void BlendScreen_Generic( |
965 | | const GByte *CPL_RESTRICT pabyR, const GByte *CPL_RESTRICT pabyG, |
966 | | const GByte *CPL_RESTRICT pabyB, const GByte *CPL_RESTRICT pabyA, |
967 | | const GByte *CPL_RESTRICT pabyOverlayR, |
968 | | const GByte *CPL_RESTRICT pabyOverlayG, |
969 | | const GByte *CPL_RESTRICT pabyOverlayB, |
970 | | const GByte *CPL_RESTRICT pabyOverlayA, GByte *CPL_RESTRICT pabyDst, |
971 | | GSpacing nPixelSpace, GSpacing nBandSpace, size_t i, size_t N, |
972 | | GByte nOpacity, int nOutputBands, bool bSwappedOpacity) |
973 | 0 | { |
974 | | |
975 | | // Generic formulas from Mapserver |
976 | | // Dca' = Sca + Dca - Sca.Dca |
977 | | // Da' = Sa + Da - Sa.Da |
978 | | |
979 | | // TODO: optimize for the various cases (with/without alpha, grayscale/RGB) |
980 | | // TODO: optimize mathematically to avoid redundant computations |
981 | |
|
982 | 0 | GSpacing nDstOffset = 0; |
983 | |
|
984 | 0 | for (; i < N; ++i) |
985 | 0 | { |
986 | |
|
987 | 0 | GByte nOverlayR, nOverlayG, nOverlayB, nOverlayA; |
988 | 0 | GByte nR, nG, nB, nA; |
989 | 0 | GByte nFinalAlpha; |
990 | 0 | ProcessAlphaChannels(i, pabyA, pabyOverlayA, nOpacity, bSwappedOpacity, |
991 | 0 | nA, nOverlayA, nFinalAlpha); |
992 | 0 | PremultiplyChannels(i, pabyR, pabyG, pabyB, nR, nG, nB, nA); |
993 | 0 | PremultiplyChannels(i, pabyOverlayR, pabyOverlayG, pabyOverlayB, |
994 | 0 | nOverlayR, nOverlayG, nOverlayB, nOverlayA); |
995 | | |
996 | | // Result |
997 | |
|
998 | 0 | auto processComponent = [&nFinalAlpha](GByte C, GByte OverlayC) -> GByte |
999 | 0 | { |
1000 | 0 | return DivScale255(C + OverlayC - MulScale255(C, OverlayC), |
1001 | 0 | nFinalAlpha); |
1002 | 0 | }; |
1003 | |
|
1004 | 0 | pabyDst[nDstOffset] = processComponent(nR, nOverlayR); |
1005 | | |
1006 | | // Grayscale with alpha |
1007 | 0 | if (nOutputBands == 2) |
1008 | 0 | { |
1009 | 0 | pabyDst[nDstOffset + nBandSpace] = nFinalAlpha; |
1010 | 0 | } |
1011 | 0 | else |
1012 | 0 | { |
1013 | | // RBG and RGBA |
1014 | 0 | if (nOutputBands >= 3) |
1015 | 0 | { |
1016 | 0 | pabyDst[nDstOffset + nBandSpace] = |
1017 | 0 | processComponent(nG, nOverlayG); |
1018 | 0 | pabyDst[nDstOffset + 2 * nBandSpace] = |
1019 | 0 | processComponent(nB, nOverlayB); |
1020 | 0 | } |
1021 | | |
1022 | | // RGBA |
1023 | 0 | if (nOutputBands == 4) |
1024 | 0 | { |
1025 | 0 | pabyDst[nDstOffset + 3 * nBandSpace] = nFinalAlpha; |
1026 | 0 | } |
1027 | 0 | } |
1028 | 0 | nDstOffset += nPixelSpace; |
1029 | 0 | } |
1030 | 0 | } |
1031 | | |
1032 | | /************************************************************************/ |
1033 | | /* BlendOverlay_Generic */ |
1034 | | /************************************************************************/ |
1035 | | |
1036 | | static void BlendOverlay_Generic( |
1037 | | const GByte *CPL_RESTRICT pabyR, const GByte *CPL_RESTRICT pabyG, |
1038 | | const GByte *CPL_RESTRICT pabyB, const GByte *CPL_RESTRICT pabyA, |
1039 | | const GByte *CPL_RESTRICT pabyOverlayR, |
1040 | | const GByte *CPL_RESTRICT pabyOverlayG, |
1041 | | const GByte *CPL_RESTRICT pabyOverlayB, |
1042 | | const GByte *CPL_RESTRICT pabyOverlayA, GByte *CPL_RESTRICT pabyDst, |
1043 | | GSpacing nPixelSpace, GSpacing nBandSpace, size_t i, size_t N, |
1044 | | GByte nOpacity, int nOutputBands, bool bSwappedOpacity) |
1045 | 0 | { |
1046 | | |
1047 | | // Generic formulas from Mapserver |
1048 | | // Where "D" is destination, "S" is source (overlay) |
1049 | | // if 2.Dca < Da |
1050 | | // Dca' = 2.Sca.Dca + Sca.(1 - Da) + Dca.(1 - Sa) |
1051 | | // otherwise |
1052 | | // Dca' = Sa.Da - 2.(Da - Dca).(Sa - Sca) + Sca.(1 - Da) + Dca.(1 - Sa) |
1053 | | // |
1054 | | // Da' = Sa + Da - Sa.Da |
1055 | | |
1056 | | // TODO: optimize for the various cases (with/without alpha, grayscale/RGB) |
1057 | | // TODO: optimize mathematically to avoid redundant computations |
1058 | |
|
1059 | 0 | GSpacing nDstOffset = 0; |
1060 | |
|
1061 | 0 | for (; i < N; ++i) |
1062 | 0 | { |
1063 | |
|
1064 | 0 | GByte nOverlayR, nOverlayG, nOverlayB, nOverlayA; |
1065 | 0 | GByte nR, nG, nB, nA; |
1066 | 0 | GByte nFinalAlpha; |
1067 | 0 | ProcessAlphaChannels(i, pabyA, pabyOverlayA, nOpacity, bSwappedOpacity, |
1068 | 0 | nA, nOverlayA, nFinalAlpha); |
1069 | 0 | PremultiplyChannels(i, pabyR, pabyG, pabyB, nR, nG, nB, nA); |
1070 | 0 | PremultiplyChannels(i, pabyOverlayR, pabyOverlayG, pabyOverlayB, |
1071 | 0 | nOverlayR, nOverlayG, nOverlayB, nOverlayA); |
1072 | | |
1073 | | // Sa.Da |
1074 | 0 | const GByte nAlphaMul{MulScale255(nOverlayA, nA)}; |
1075 | |
|
1076 | 0 | auto processComponent_lessThan = [&nFinalAlpha](GByte C, GByte A, |
1077 | 0 | GByte OverlayC, |
1078 | 0 | GByte OverlayA) -> GByte |
1079 | 0 | { |
1080 | 0 | return DivScale255(2 * MulScale255(C, OverlayC) + |
1081 | 0 | MulScale255(C, 255 - OverlayA) + |
1082 | 0 | MulScale255(OverlayC, 255 - A), |
1083 | 0 | nFinalAlpha); |
1084 | 0 | }; |
1085 | |
|
1086 | 0 | auto processComponent_greaterEqual = |
1087 | 0 | [&nFinalAlpha, &nAlphaMul](GByte C, GByte A, GByte OverlayC, |
1088 | 0 | GByte OverlayA) -> GByte |
1089 | 0 | { |
1090 | 0 | return DivScale255(nAlphaMul - |
1091 | 0 | 2 * MulScale255(A - C, OverlayA - OverlayC) + |
1092 | 0 | MulScale255(C, 255 - OverlayA) + |
1093 | 0 | MulScale255(OverlayC, 255 - A), |
1094 | 0 | nFinalAlpha); |
1095 | 0 | }; |
1096 | |
|
1097 | 0 | if (2 * nR < nA) |
1098 | 0 | { |
1099 | 0 | pabyDst[nDstOffset] = |
1100 | 0 | processComponent_lessThan(nR, nA, nOverlayR, nOverlayA); |
1101 | 0 | } |
1102 | 0 | else |
1103 | 0 | { |
1104 | 0 | pabyDst[nDstOffset] = |
1105 | 0 | processComponent_greaterEqual(nR, nA, nOverlayR, nOverlayA); |
1106 | 0 | } |
1107 | | |
1108 | | // Grayscale with alpha |
1109 | 0 | if (nOutputBands == 2) |
1110 | 0 | { |
1111 | 0 | pabyDst[nDstOffset + nBandSpace] = nFinalAlpha; |
1112 | 0 | } |
1113 | 0 | else |
1114 | 0 | { |
1115 | | // RBG and RGBA |
1116 | 0 | if (nOutputBands >= 3) |
1117 | 0 | { |
1118 | |
|
1119 | 0 | if (2 * nG < nA) |
1120 | 0 | { |
1121 | 0 | pabyDst[nDstOffset + nBandSpace] = |
1122 | 0 | processComponent_lessThan(nG, nA, nOverlayG, nOverlayA); |
1123 | 0 | } |
1124 | 0 | else |
1125 | 0 | { |
1126 | 0 | pabyDst[nDstOffset + nBandSpace] = |
1127 | 0 | processComponent_greaterEqual(nG, nA, nOverlayG, |
1128 | 0 | nOverlayA); |
1129 | 0 | } |
1130 | |
|
1131 | 0 | if (2 * nB < nA) |
1132 | 0 | { |
1133 | 0 | pabyDst[nDstOffset + 2 * nBandSpace] = |
1134 | 0 | processComponent_lessThan(nB, nA, nOverlayB, nOverlayA); |
1135 | 0 | } |
1136 | 0 | else |
1137 | 0 | { |
1138 | 0 | pabyDst[nDstOffset + 2 * nBandSpace] = |
1139 | 0 | processComponent_greaterEqual(nB, nA, nOverlayB, |
1140 | 0 | nOverlayA); |
1141 | 0 | } |
1142 | 0 | } |
1143 | | |
1144 | | // RGBA |
1145 | 0 | if (nOutputBands == 4) |
1146 | 0 | { |
1147 | 0 | pabyDst[nDstOffset + 3 * nBandSpace] = nFinalAlpha; |
1148 | 0 | } |
1149 | 0 | } |
1150 | 0 | nDstOffset += nPixelSpace; |
1151 | 0 | } |
1152 | 0 | } |
1153 | | |
1154 | | /************************************************************************/ |
1155 | | /* BlendHardLight_Generic */ |
1156 | | /************************************************************************/ |
1157 | | |
1158 | | static void BlendHardLight_Generic( |
1159 | | const GByte *CPL_RESTRICT pabyR, const GByte *CPL_RESTRICT pabyG, |
1160 | | const GByte *CPL_RESTRICT pabyB, const GByte *CPL_RESTRICT pabyA, |
1161 | | const GByte *CPL_RESTRICT pabyOverlayR, |
1162 | | const GByte *CPL_RESTRICT pabyOverlayG, |
1163 | | const GByte *CPL_RESTRICT pabyOverlayB, |
1164 | | const GByte *CPL_RESTRICT pabyOverlayA, GByte *CPL_RESTRICT pabyDst, |
1165 | | GSpacing nPixelSpace, GSpacing nBandSpace, size_t i, size_t N, |
1166 | | GByte nOpacity, int nOutputBands, bool bSwappedOpacity) |
1167 | 0 | { |
1168 | | // Hard Light is Overlay with roles of source and overlay swapped |
1169 | 0 | BlendOverlay_Generic(pabyOverlayR, pabyOverlayG, pabyOverlayB, pabyOverlayA, |
1170 | 0 | pabyR, pabyG, pabyB, pabyA, pabyDst, nPixelSpace, |
1171 | 0 | nBandSpace, i, N, nOpacity, nOutputBands, |
1172 | 0 | !bSwappedOpacity); |
1173 | 0 | } |
1174 | | |
1175 | | /************************************************************************/ |
1176 | | /* BlendDarken_Generic */ |
1177 | | /************************************************************************/ |
1178 | | |
1179 | | static void BlendDarken_Generic( |
1180 | | const GByte *CPL_RESTRICT pabyR, const GByte *CPL_RESTRICT pabyG, |
1181 | | const GByte *CPL_RESTRICT pabyB, const GByte *CPL_RESTRICT pabyA, |
1182 | | const GByte *CPL_RESTRICT pabyOverlayR, |
1183 | | const GByte *CPL_RESTRICT pabyOverlayG, |
1184 | | const GByte *CPL_RESTRICT pabyOverlayB, |
1185 | | const GByte *CPL_RESTRICT pabyOverlayA, GByte *CPL_RESTRICT pabyDst, |
1186 | | GSpacing nPixelSpace, GSpacing nBandSpace, size_t i, size_t N, |
1187 | | GByte nOpacity, int nOutputBands, bool bSwappedOpacity) |
1188 | 0 | { |
1189 | | // Generic formulas from Mapserver |
1190 | | // Dca' = min(Sca.Da, Dca.Sa) + Sca.(1 - Da) + Dca.(1 - Sa) |
1191 | | // Da' = Sa + Da - Sa.Da |
1192 | | |
1193 | | // TODO: optimize for the various cases (with/without alpha, grayscale/RGB) |
1194 | | // TODO: optimize mathematically to avoid redundant computations |
1195 | |
|
1196 | 0 | GSpacing nDstOffset = 0; |
1197 | |
|
1198 | 0 | for (; i < N; ++i) |
1199 | 0 | { |
1200 | |
|
1201 | 0 | GByte nOverlayR, nOverlayG, nOverlayB, nOverlayA; |
1202 | 0 | GByte nR, nG, nB, nA; |
1203 | 0 | GByte nFinalAlpha; |
1204 | 0 | ProcessAlphaChannels(i, pabyA, pabyOverlayA, nOpacity, bSwappedOpacity, |
1205 | 0 | nA, nOverlayA, nFinalAlpha); |
1206 | 0 | PremultiplyChannels(i, pabyR, pabyG, pabyB, nR, nG, nB, nA); |
1207 | 0 | PremultiplyChannels(i, pabyOverlayR, pabyOverlayG, pabyOverlayB, |
1208 | 0 | nOverlayR, nOverlayG, nOverlayB, nOverlayA); |
1209 | | |
1210 | | // Result |
1211 | |
|
1212 | 0 | auto processComponent = [&nFinalAlpha](GByte C, GByte A, GByte OverlayC, |
1213 | 0 | GByte OverlayA) -> GByte |
1214 | 0 | { |
1215 | 0 | return DivScale255( |
1216 | 0 | std::min(MulScale255(OverlayC, A), MulScale255(C, OverlayA)) + |
1217 | 0 | MulScale255(C, 255 - OverlayA) + |
1218 | 0 | MulScale255(OverlayC, 255 - A), |
1219 | 0 | nFinalAlpha); |
1220 | 0 | }; |
1221 | |
|
1222 | 0 | pabyDst[nDstOffset] = processComponent(nR, nA, nOverlayR, nOverlayA); |
1223 | | |
1224 | | // Grayscale with alpha |
1225 | 0 | if (nOutputBands == 2) |
1226 | 0 | { |
1227 | 0 | pabyDst[nDstOffset + nBandSpace] = nFinalAlpha; |
1228 | 0 | } |
1229 | 0 | else |
1230 | 0 | { |
1231 | | // RBG and RGBA |
1232 | 0 | if (nOutputBands >= 3) |
1233 | 0 | { |
1234 | 0 | pabyDst[nDstOffset + nBandSpace] = |
1235 | 0 | processComponent(nG, nA, nOverlayG, nOverlayA); |
1236 | 0 | pabyDst[nDstOffset + 2 * nBandSpace] = |
1237 | 0 | processComponent(nB, nA, nOverlayB, nOverlayA); |
1238 | 0 | } |
1239 | | |
1240 | | // RGBA |
1241 | 0 | if (nOutputBands == 4) |
1242 | 0 | { |
1243 | 0 | pabyDst[nDstOffset + 3 * nBandSpace] = nFinalAlpha; |
1244 | 0 | } |
1245 | 0 | } |
1246 | 0 | nDstOffset += nPixelSpace; |
1247 | 0 | } |
1248 | 0 | } |
1249 | | |
1250 | | /************************************************************************/ |
1251 | | /* BlendLighten_Generic */ |
1252 | | /************************************************************************/ |
1253 | | |
1254 | | static void BlendLighten_Generic( |
1255 | | const GByte *CPL_RESTRICT pabyR, const GByte *CPL_RESTRICT pabyG, |
1256 | | const GByte *CPL_RESTRICT pabyB, const GByte *CPL_RESTRICT pabyA, |
1257 | | const GByte *CPL_RESTRICT pabyOverlayR, |
1258 | | const GByte *CPL_RESTRICT pabyOverlayG, |
1259 | | const GByte *CPL_RESTRICT pabyOverlayB, |
1260 | | const GByte *CPL_RESTRICT pabyOverlayA, GByte *CPL_RESTRICT pabyDst, |
1261 | | GSpacing nPixelSpace, GSpacing nBandSpace, size_t i, size_t N, |
1262 | | GByte nOpacity, int nOutputBands, bool bSwappedOpacity) |
1263 | 0 | { |
1264 | | |
1265 | | // Generic formulas from Mapserver |
1266 | | // Dca' = max(Sca.Da, Dca.Sa) + Sca.(1 - Da) + Dca.(1 - Sa) |
1267 | | // Da' = Sa + Da - Sa.Da |
1268 | | |
1269 | | // TODO: optimize for the various cases (with/without alpha, grayscale/RGB) |
1270 | | // TODO: optimize mathematically to avoid redundant computations |
1271 | |
|
1272 | 0 | GSpacing nDstOffset = 0; |
1273 | |
|
1274 | 0 | for (; i < N; ++i) |
1275 | 0 | { |
1276 | |
|
1277 | 0 | GByte nOverlayR, nOverlayG, nOverlayB, nOverlayA; |
1278 | 0 | GByte nR, nG, nB, nA; |
1279 | 0 | GByte nFinalAlpha; |
1280 | 0 | ProcessAlphaChannels(i, pabyA, pabyOverlayA, nOpacity, bSwappedOpacity, |
1281 | 0 | nA, nOverlayA, nFinalAlpha); |
1282 | 0 | PremultiplyChannels(i, pabyR, pabyG, pabyB, nR, nG, nB, nA); |
1283 | 0 | PremultiplyChannels(i, pabyOverlayR, pabyOverlayG, pabyOverlayB, |
1284 | 0 | nOverlayR, nOverlayG, nOverlayB, nOverlayA); |
1285 | | |
1286 | | // Result |
1287 | |
|
1288 | 0 | auto processComponent = [&nFinalAlpha](GByte C, GByte A, GByte OverlayC, |
1289 | 0 | GByte OverlayA) -> GByte |
1290 | 0 | { |
1291 | 0 | return DivScale255( |
1292 | 0 | std::max(MulScale255(OverlayC, A), MulScale255(C, OverlayA)) + |
1293 | 0 | MulScale255(C, 255 - OverlayA) + |
1294 | 0 | MulScale255(OverlayC, 255 - A), |
1295 | 0 | nFinalAlpha); |
1296 | 0 | }; |
1297 | |
|
1298 | 0 | pabyDst[nDstOffset] = processComponent(nR, nA, nOverlayR, nOverlayA); |
1299 | | |
1300 | | // Grayscale with alpha |
1301 | 0 | if (nOutputBands == 2) |
1302 | 0 | { |
1303 | 0 | pabyDst[nDstOffset + nBandSpace] = nFinalAlpha; |
1304 | 0 | } |
1305 | 0 | else |
1306 | 0 | { |
1307 | | // RBG and RGBA |
1308 | 0 | if (nOutputBands >= 3) |
1309 | 0 | { |
1310 | 0 | pabyDst[nDstOffset + nBandSpace] = |
1311 | 0 | processComponent(nG, nA, nOverlayG, nOverlayA); |
1312 | 0 | pabyDst[nDstOffset + 2 * nBandSpace] = |
1313 | 0 | processComponent(nB, nA, nOverlayB, nOverlayA); |
1314 | 0 | } |
1315 | | |
1316 | | // RGBA |
1317 | 0 | if (nOutputBands == 4) |
1318 | 0 | { |
1319 | 0 | pabyDst[nDstOffset + 3 * nBandSpace] = nFinalAlpha; |
1320 | 0 | } |
1321 | 0 | } |
1322 | 0 | nDstOffset += nPixelSpace; |
1323 | 0 | } |
1324 | 0 | } |
1325 | | |
1326 | | /************************************************************************/ |
1327 | | /* BlendColorDodge_Generic */ |
1328 | | /************************************************************************/ |
1329 | | |
1330 | | static void BlendColorDodge_Generic( |
1331 | | const GByte *CPL_RESTRICT pabyR, const GByte *CPL_RESTRICT pabyG, |
1332 | | const GByte *CPL_RESTRICT pabyB, const GByte *CPL_RESTRICT pabyA, |
1333 | | const GByte *CPL_RESTRICT pabyOverlayR, |
1334 | | const GByte *CPL_RESTRICT pabyOverlayG, |
1335 | | const GByte *CPL_RESTRICT pabyOverlayB, |
1336 | | const GByte *CPL_RESTRICT pabyOverlayA, GByte *CPL_RESTRICT pabyDst, |
1337 | | GSpacing nPixelSpace, GSpacing nBandSpace, size_t i, size_t N, |
1338 | | GByte nOpacity, int nOutputBands, bool bSwappedOpacity) |
1339 | 0 | { |
1340 | | |
1341 | | // Generic formulas from Mapserver |
1342 | | // if Sca.Da + Dca.Sa >= Sa.Da |
1343 | | // Dca' = Sa.Da + Sca.(1 - Da) + Dca.(1 - Sa) |
1344 | | // otherwise |
1345 | | // Dca' = Dca.Sa/(1-Sca/Sa) + Sca.(1 - Da) + Dca.(1 - Sa) |
1346 | | // |
1347 | | // Da' = Sa + Da - Sa.Da |
1348 | | |
1349 | | // TODO: optimize for the various cases (with/without alpha, grayscale/RGB) |
1350 | | // TODO: optimize mathematically to avoid redundant computations |
1351 | |
|
1352 | 0 | GSpacing nDstOffset = 0; |
1353 | |
|
1354 | 0 | for (; i < N; ++i) |
1355 | 0 | { |
1356 | |
|
1357 | 0 | GByte nOverlayR, nOverlayG, nOverlayB, nOverlayA; |
1358 | 0 | GByte nR, nG, nB, nA; |
1359 | 0 | GByte nFinalAlpha; |
1360 | 0 | ProcessAlphaChannels(i, pabyA, pabyOverlayA, nOpacity, bSwappedOpacity, |
1361 | 0 | nA, nOverlayA, nFinalAlpha); |
1362 | 0 | PremultiplyChannels(i, pabyR, pabyG, pabyB, nR, nG, nB, nA); |
1363 | 0 | PremultiplyChannels(i, pabyOverlayR, pabyOverlayG, pabyOverlayB, |
1364 | 0 | nOverlayR, nOverlayG, nOverlayB, nOverlayA); |
1365 | | |
1366 | | // Result |
1367 | |
|
1368 | 0 | const GByte alphaMul255{MulScale255(nOverlayA, nA)}; |
1369 | | |
1370 | | // Dca' = Sa.Da + Sca.(1 - Da) + Dca.(1 - Sa) |
1371 | 0 | auto processComponent_greaterEqual = |
1372 | 0 | [&nFinalAlpha, &alphaMul255](GByte C, GByte A, GByte OverlayC, |
1373 | 0 | GByte OverlayA) -> GByte |
1374 | 0 | { |
1375 | 0 | return DivScale255(alphaMul255 + MulScale255(C, 255 - OverlayA) + |
1376 | 0 | MulScale255(OverlayC, 255 - A), |
1377 | 0 | nFinalAlpha); |
1378 | 0 | }; |
1379 | | |
1380 | | // Dca.Sa/(1-Sca/Sa) + Sca.(1 - Da) + Dca.(1 - Sa) |
1381 | 0 | auto processComponent_lessThan = [&nFinalAlpha](GByte C, GByte A, |
1382 | 0 | GByte OverlayC, |
1383 | 0 | GByte OverlayA) -> GByte |
1384 | 0 | { |
1385 | 0 | return DivScale255( |
1386 | 0 | DivScale255(MulScale255(C, OverlayA), |
1387 | 0 | 255 - DivScale255(OverlayC, OverlayA)) + |
1388 | 0 | MulScale255(C, 255 - OverlayA) + |
1389 | 0 | MulScale255(OverlayC, 255 - A), |
1390 | 0 | nFinalAlpha); |
1391 | 0 | }; |
1392 | |
|
1393 | 0 | auto branchingCondition = [&alphaMul255](GByte C, GByte A, |
1394 | 0 | GByte OverlayC, |
1395 | 0 | GByte OverlayA) -> bool |
1396 | 0 | { |
1397 | 0 | return MulScale255(OverlayC, A) + MulScale255(C, OverlayA) >= |
1398 | 0 | alphaMul255; |
1399 | 0 | }; |
1400 | |
|
1401 | 0 | if (branchingCondition(nR, nA, nOverlayR, nOverlayA)) |
1402 | 0 | { |
1403 | 0 | pabyDst[nDstOffset] = |
1404 | 0 | processComponent_greaterEqual(nR, nA, nOverlayR, nOverlayA); |
1405 | 0 | } |
1406 | 0 | else |
1407 | 0 | { |
1408 | 0 | pabyDst[nDstOffset] = |
1409 | 0 | processComponent_lessThan(nR, nA, nOverlayR, nOverlayA); |
1410 | 0 | } |
1411 | | |
1412 | | // Grayscale with alpha |
1413 | 0 | if (nOutputBands == 2) |
1414 | 0 | { |
1415 | 0 | pabyDst[nDstOffset + nBandSpace] = nFinalAlpha; |
1416 | 0 | } |
1417 | 0 | else |
1418 | 0 | { |
1419 | | // RBG and RGBA |
1420 | 0 | if (nOutputBands >= 3) |
1421 | 0 | { |
1422 | 0 | if (branchingCondition(nG, nA, nOverlayG, nOverlayA)) |
1423 | 0 | { |
1424 | 0 | pabyDst[nDstOffset + nBandSpace] = |
1425 | 0 | processComponent_greaterEqual(nG, nA, nOverlayG, |
1426 | 0 | nOverlayA); |
1427 | 0 | } |
1428 | 0 | else |
1429 | 0 | { |
1430 | 0 | pabyDst[nDstOffset + nBandSpace] = |
1431 | 0 | processComponent_lessThan(nG, nA, nOverlayG, nOverlayA); |
1432 | 0 | } |
1433 | |
|
1434 | 0 | if (branchingCondition(nB, nA, nOverlayB, nOverlayA)) |
1435 | 0 | { |
1436 | 0 | pabyDst[nDstOffset + 2 * nBandSpace] = |
1437 | 0 | processComponent_greaterEqual(nB, nA, nOverlayB, |
1438 | 0 | nOverlayA); |
1439 | 0 | } |
1440 | 0 | else |
1441 | 0 | { |
1442 | 0 | pabyDst[nDstOffset + 2 * nBandSpace] = |
1443 | 0 | processComponent_lessThan(nB, nA, nOverlayB, nOverlayA); |
1444 | 0 | } |
1445 | 0 | } |
1446 | | |
1447 | | // RGBA |
1448 | 0 | if (nOutputBands == 4) |
1449 | 0 | { |
1450 | 0 | pabyDst[nDstOffset + 3 * nBandSpace] = nFinalAlpha; |
1451 | 0 | } |
1452 | 0 | } |
1453 | 0 | nDstOffset += nPixelSpace; |
1454 | 0 | } |
1455 | 0 | } |
1456 | | |
1457 | | /************************************************************************/ |
1458 | | /* BlendColorBurn_Generic */ |
1459 | | /************************************************************************/ |
1460 | | |
1461 | | static void BlendColorBurn_Generic( |
1462 | | const GByte *CPL_RESTRICT pabyR, const GByte *CPL_RESTRICT pabyG, |
1463 | | const GByte *CPL_RESTRICT pabyB, const GByte *CPL_RESTRICT pabyA, |
1464 | | const GByte *CPL_RESTRICT pabyOverlayR, |
1465 | | const GByte *CPL_RESTRICT pabyOverlayG, |
1466 | | const GByte *CPL_RESTRICT pabyOverlayB, |
1467 | | const GByte *CPL_RESTRICT pabyOverlayA, GByte *CPL_RESTRICT pabyDst, |
1468 | | GSpacing nPixelSpace, GSpacing nBandSpace, size_t i, size_t N, |
1469 | | GByte nOpacity, int nOutputBands, bool bSwappedOpacity) |
1470 | 0 | { |
1471 | | |
1472 | | // if Sca.Da + Dca.Sa <= Sa.Da |
1473 | | // Dca' = Sca.(1 - Da) + Dca.(1 - Sa) |
1474 | | // otherwise |
1475 | | // Dca' = Sa.(Sca.Da + Dca.Sa - Sa.Da)/Sca + Sca.(1 - Da) + Dca.(1 - Sa) |
1476 | | // Da' = Sa + Da - Sa.Da |
1477 | | |
1478 | | // TODO: optimize for the various cases (with/without alpha, grayscale/RGB) |
1479 | | // TODO: optimize mathematically to avoid redundant computations |
1480 | |
|
1481 | 0 | GSpacing nDstOffset = 0; |
1482 | |
|
1483 | 0 | for (; i < N; ++i) |
1484 | 0 | { |
1485 | |
|
1486 | 0 | GByte nOverlayR, nOverlayG, nOverlayB, nOverlayA; |
1487 | 0 | GByte nR, nG, nB, nA; |
1488 | 0 | GByte nFinalAlpha; |
1489 | 0 | ProcessAlphaChannels(i, pabyA, pabyOverlayA, nOpacity, bSwappedOpacity, |
1490 | 0 | nA, nOverlayA, nFinalAlpha); |
1491 | 0 | PremultiplyChannels(i, pabyR, pabyG, pabyB, nR, nG, nB, nA); |
1492 | 0 | PremultiplyChannels(i, pabyOverlayR, pabyOverlayG, pabyOverlayB, |
1493 | 0 | nOverlayR, nOverlayG, nOverlayB, nOverlayA); |
1494 | | |
1495 | | // Result |
1496 | |
|
1497 | 0 | const GByte alphaMul255{MulScale255(nOverlayA, nA)}; |
1498 | |
|
1499 | 0 | auto processComponent_lessEqual = |
1500 | 0 | [&nFinalAlpha](GByte C, GByte A, GByte OverlayC, |
1501 | 0 | GByte OverlayA) -> GByte |
1502 | 0 | { |
1503 | 0 | return DivScale255(MulScale255(C, 255 - OverlayA) + |
1504 | 0 | MulScale255(OverlayC, 255 - A), |
1505 | 0 | nFinalAlpha); |
1506 | 0 | }; |
1507 | | |
1508 | | // The simplified formula below was tested and verified with the floating point equivalent [0..1] normalized version |
1509 | 0 | auto processComponent_greater = |
1510 | 0 | [&nFinalAlpha, &alphaMul255](GByte C, GByte A, GByte OverlayC, |
1511 | 0 | GByte OverlayA) -> GByte |
1512 | 0 | { |
1513 | 0 | const GByte C_unpremultiplied = DivScale255(C, A); |
1514 | 0 | const GByte Overlay_C_unpremultiplied = |
1515 | 0 | DivScale255(OverlayC, OverlayA); |
1516 | 0 | return DivScale255( |
1517 | 0 | MulScale255(alphaMul255, (C_unpremultiplied + |
1518 | 0 | Overlay_C_unpremultiplied - 255)) + |
1519 | 0 | MulScale255(C, 255 - OverlayA) + |
1520 | 0 | MulScale255(OverlayC, 255 - A), |
1521 | 0 | nFinalAlpha); |
1522 | 0 | }; |
1523 | |
|
1524 | 0 | auto branchingCondition = [&alphaMul255](GByte C, GByte A, |
1525 | 0 | GByte OverlayC, |
1526 | 0 | GByte OverlayA) -> bool |
1527 | 0 | { |
1528 | 0 | return MulScale255(OverlayC, A) + MulScale255(C, OverlayA) <= |
1529 | 0 | alphaMul255; |
1530 | 0 | }; |
1531 | |
|
1532 | 0 | if (branchingCondition(nR, nA, nOverlayR, nOverlayA)) |
1533 | 0 | { |
1534 | 0 | pabyDst[nDstOffset] = |
1535 | 0 | processComponent_lessEqual(nR, nA, nOverlayR, nOverlayA); |
1536 | 0 | } |
1537 | 0 | else |
1538 | 0 | { |
1539 | 0 | pabyDst[nDstOffset] = |
1540 | 0 | processComponent_greater(nR, nA, nOverlayR, nOverlayA); |
1541 | 0 | } |
1542 | | |
1543 | | // Grayscale with alpha |
1544 | 0 | if (nOutputBands == 2) |
1545 | 0 | { |
1546 | 0 | pabyDst[nDstOffset + nBandSpace] = nFinalAlpha; |
1547 | 0 | } |
1548 | 0 | else |
1549 | 0 | { |
1550 | | // RBG and RGBA |
1551 | 0 | if (nOutputBands >= 3) |
1552 | 0 | { |
1553 | 0 | if (branchingCondition(nG, nA, nOverlayG, nOverlayA)) |
1554 | 0 | { |
1555 | 0 | pabyDst[nDstOffset + nBandSpace] = |
1556 | 0 | processComponent_lessEqual(nG, nA, nOverlayG, |
1557 | 0 | nOverlayA); |
1558 | 0 | } |
1559 | 0 | else |
1560 | 0 | { |
1561 | 0 | pabyDst[nDstOffset + nBandSpace] = |
1562 | 0 | processComponent_greater(nG, nA, nOverlayG, nOverlayA); |
1563 | 0 | } |
1564 | |
|
1565 | 0 | if (branchingCondition(nB, nA, nOverlayB, nOverlayA)) |
1566 | 0 | { |
1567 | 0 | pabyDst[nDstOffset + 2 * nBandSpace] = |
1568 | 0 | processComponent_lessEqual(nB, nA, nOverlayB, |
1569 | 0 | nOverlayA); |
1570 | 0 | } |
1571 | 0 | else |
1572 | 0 | { |
1573 | 0 | pabyDst[nDstOffset + 2 * nBandSpace] = |
1574 | 0 | processComponent_greater(nB, nA, nOverlayB, nOverlayA); |
1575 | 0 | } |
1576 | 0 | } |
1577 | | |
1578 | | // RGBA |
1579 | 0 | if (nOutputBands == 4) |
1580 | 0 | { |
1581 | 0 | pabyDst[nDstOffset + 3 * nBandSpace] = nFinalAlpha; |
1582 | 0 | } |
1583 | 0 | } |
1584 | 0 | nDstOffset += nPixelSpace; |
1585 | 0 | } |
1586 | 0 | } |
1587 | | |
1588 | | /************************************************************************/ |
1589 | | /* BlendSrcOverRGBA_SSE2() */ |
1590 | | /************************************************************************/ |
1591 | | |
1592 | | #ifdef HAVE_SSE2 |
1593 | | #if defined(__GNUC__) |
1594 | | __attribute__((noinline)) |
1595 | | #endif |
1596 | | static int BlendSrcOverRGBA_SSE2(const GByte *CPL_RESTRICT pabyR, |
1597 | | const GByte *CPL_RESTRICT pabyG, |
1598 | | const GByte *CPL_RESTRICT pabyB, |
1599 | | const GByte *CPL_RESTRICT pabyA, |
1600 | | const GByte *CPL_RESTRICT pabyOverlayR, |
1601 | | const GByte *CPL_RESTRICT pabyOverlayG, |
1602 | | const GByte *CPL_RESTRICT pabyOverlayB, |
1603 | | const GByte *CPL_RESTRICT pabyOverlayA, |
1604 | | GByte *CPL_RESTRICT pabyDst, |
1605 | | GSpacing nBandSpace, int N, GByte nOpacity) |
1606 | 0 | { |
1607 | | // See scalar code after call to BlendSrcOverRGBA_SSE2() below for the |
1608 | | // non-obfuscated formulas... |
1609 | |
|
1610 | 0 | const auto load_and_unpack = [](const void *p) |
1611 | 0 | { |
1612 | 0 | const auto zero = _mm_setzero_si128(); |
1613 | 0 | auto overlayA = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p)); |
1614 | 0 | return std::make_pair(_mm_unpacklo_epi8(overlayA, zero), |
1615 | 0 | _mm_unpackhi_epi8(overlayA, zero)); |
1616 | 0 | }; |
1617 | |
|
1618 | 0 | const auto pack_and_store = [](void *p, __m128i lo, __m128i hi) |
1619 | 0 | { |
1620 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(p), |
1621 | 0 | _mm_packus_epi16(lo, hi)); |
1622 | 0 | }; |
1623 | |
|
1624 | 0 | const auto mul16bit_8bit_result = [](__m128i a, __m128i b) |
1625 | 0 | { |
1626 | 0 | const auto r255 = _mm_set1_epi16(255); |
1627 | 0 | return _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(a, b), r255), 8); |
1628 | 0 | }; |
1629 | |
|
1630 | 0 | const auto opacity = _mm_set1_epi16(nOpacity); |
1631 | 0 | const auto r255 = _mm_set1_epi16(255); |
1632 | 0 | const int16_t *tabInvDstASigned = |
1633 | 0 | reinterpret_cast<const int16_t *>(gTabInvDstA.data()); |
1634 | 0 | constexpr int REG_WIDTH = static_cast<int>(sizeof(opacity)); |
1635 | |
|
1636 | 0 | int i = 0; |
1637 | 0 | for (; i <= N - REG_WIDTH; i += REG_WIDTH) |
1638 | 0 | { |
1639 | 0 | auto [overlayA_lo, overlayA_hi] = load_and_unpack(pabyOverlayA + i); |
1640 | 0 | auto [srcA_lo, srcA_hi] = load_and_unpack(pabyA + i); |
1641 | 0 | overlayA_lo = mul16bit_8bit_result(overlayA_lo, opacity); |
1642 | 0 | overlayA_hi = mul16bit_8bit_result(overlayA_hi, opacity); |
1643 | 0 | auto srcAMul255MinusOverlayA_lo = |
1644 | 0 | mul16bit_8bit_result(srcA_lo, _mm_sub_epi16(r255, overlayA_lo)); |
1645 | 0 | auto srcAMul255MinusOverlayA_hi = |
1646 | 0 | mul16bit_8bit_result(srcA_hi, _mm_sub_epi16(r255, overlayA_hi)); |
1647 | 0 | auto dstA_lo = _mm_add_epi16(overlayA_lo, srcAMul255MinusOverlayA_lo); |
1648 | 0 | auto dstA_hi = _mm_add_epi16(overlayA_hi, srcAMul255MinusOverlayA_hi); |
1649 | | |
1650 | | // This would be the equivalent of a "_mm_i16gather_epi16" operation |
1651 | | // which does not exist... |
1652 | | // invDstA_{i} = [tabInvDstASigned[dstA_{i}] for i in range(8)] |
1653 | 0 | auto invDstA_lo = _mm_undefined_si128(); |
1654 | 0 | auto invDstA_hi = _mm_undefined_si128(); |
1655 | 0 | #define SET_INVDSTA(k) \ |
1656 | 0 | do \ |
1657 | 0 | { \ |
1658 | 0 | const int idxLo = _mm_extract_epi16(dstA_lo, k); \ |
1659 | 0 | const int idxHi = _mm_extract_epi16(dstA_hi, k); \ |
1660 | 0 | invDstA_lo = _mm_insert_epi16(invDstA_lo, tabInvDstASigned[idxLo], k); \ |
1661 | 0 | invDstA_hi = _mm_insert_epi16(invDstA_hi, tabInvDstASigned[idxHi], k); \ |
1662 | 0 | } while (0) |
1663 | |
|
1664 | 0 | SET_INVDSTA(0); |
1665 | 0 | SET_INVDSTA(1); |
1666 | 0 | SET_INVDSTA(2); |
1667 | 0 | SET_INVDSTA(3); |
1668 | 0 | SET_INVDSTA(4); |
1669 | 0 | SET_INVDSTA(5); |
1670 | 0 | SET_INVDSTA(6); |
1671 | 0 | SET_INVDSTA(7); |
1672 | |
|
1673 | 0 | pack_and_store(pabyDst + i + 3 * nBandSpace, dstA_lo, dstA_hi); |
1674 | |
|
1675 | 0 | #define PROCESS_COMPONENT(pabySrc, pabyOverlay, iBand) \ |
1676 | 0 | do \ |
1677 | 0 | { \ |
1678 | 0 | auto [src_lo, src_hi] = load_and_unpack((pabySrc) + i); \ |
1679 | 0 | auto [overlay_lo, overlay_hi] = load_and_unpack((pabyOverlay) + i); \ |
1680 | 0 | auto dst_lo = _mm_srli_epi16( \ |
1681 | 0 | _mm_add_epi16( \ |
1682 | 0 | _mm_add_epi16( \ |
1683 | 0 | _mm_mullo_epi16(overlay_lo, overlayA_lo), \ |
1684 | 0 | _mm_mullo_epi16(src_lo, srcAMul255MinusOverlayA_lo)), \ |
1685 | 0 | r255), \ |
1686 | 0 | 8); \ |
1687 | 0 | auto dst_hi = _mm_srli_epi16( \ |
1688 | 0 | _mm_add_epi16( \ |
1689 | 0 | _mm_add_epi16( \ |
1690 | 0 | _mm_mullo_epi16(overlay_hi, overlayA_hi), \ |
1691 | 0 | _mm_mullo_epi16(src_hi, srcAMul255MinusOverlayA_hi)), \ |
1692 | 0 | r255), \ |
1693 | 0 | 8); \ |
1694 | 0 | dst_lo = mul16bit_8bit_result(dst_lo, invDstA_lo); \ |
1695 | 0 | dst_hi = mul16bit_8bit_result(dst_hi, invDstA_hi); \ |
1696 | 0 | pack_and_store(pabyDst + i + (iBand) * nBandSpace, dst_lo, dst_hi); \ |
1697 | 0 | } while (0) |
1698 | |
|
1699 | 0 | PROCESS_COMPONENT(pabyR, pabyOverlayR, 0); |
1700 | 0 | PROCESS_COMPONENT(pabyG, pabyOverlayG, 1); |
1701 | 0 | PROCESS_COMPONENT(pabyB, pabyOverlayB, 2); |
1702 | 0 | } |
1703 | 0 | return i; |
1704 | 0 | } |
1705 | | #endif |
1706 | | |
1707 | | template <bool bPixelSpaceIsOne> |
1708 | | #if defined(__GNUC__) |
1709 | | __attribute__((noinline)) |
1710 | | #endif |
1711 | | static void BlendSrcOverRGBA_Generic( |
1712 | | const GByte *CPL_RESTRICT pabyR, const GByte *CPL_RESTRICT pabyG, |
1713 | | const GByte *CPL_RESTRICT pabyB, const GByte *CPL_RESTRICT pabyA, |
1714 | | const GByte *CPL_RESTRICT pabyOverlayR, |
1715 | | const GByte *CPL_RESTRICT pabyOverlayG, |
1716 | | const GByte *CPL_RESTRICT pabyOverlayB, |
1717 | | const GByte *CPL_RESTRICT pabyOverlayA, GByte *CPL_RESTRICT pabyDst, |
1718 | | [[maybe_unused]] GSpacing nPixelSpace, GSpacing nBandSpace, int i, int N, |
1719 | | GByte nOpacity) |
1720 | 0 | { |
1721 | | #if !(defined(HAVE_SSE2) && defined(__GNUC__)) |
1722 | | if constexpr (!bPixelSpaceIsOne) |
1723 | | { |
1724 | | assert(nPixelSpace != 1); |
1725 | | } |
1726 | | #endif |
1727 | 0 | [[maybe_unused]] GSpacing nDstOffset = 0; |
1728 | 0 | #if defined(HAVE_SSE2) && defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
1729 | | // No need to vectorize for SSE2 and clang |
1730 | 0 | #pragma clang loop vectorize(disable) |
1731 | 0 | #endif |
1732 | 0 | for (; i < N; ++i) |
1733 | 0 | { |
1734 | 0 | const GByte nOverlayR = pabyOverlayR[i]; |
1735 | 0 | const GByte nOverlayG = pabyOverlayG[i]; |
1736 | 0 | const GByte nOverlayB = pabyOverlayB[i]; |
1737 | 0 | const GByte nOverlayA = |
1738 | 0 | static_cast<GByte>((pabyOverlayA[i] * nOpacity + 255) / 256); |
1739 | 0 | const GByte nR = pabyR[i]; |
1740 | 0 | const GByte nG = pabyG[i]; |
1741 | 0 | const GByte nB = pabyB[i]; |
1742 | 0 | const GByte nA = pabyA[i]; |
1743 | 0 | const GByte nSrcAMul255MinusOverlayA = |
1744 | 0 | static_cast<GByte>((nA * (255 - nOverlayA) + 255) / 256); |
1745 | 0 | const GByte nDstA = |
1746 | 0 | static_cast<GByte>(nOverlayA + nSrcAMul255MinusOverlayA); |
1747 | 0 | GByte nDstR = static_cast<GByte>( |
1748 | 0 | (nOverlayR * nOverlayA + nR * nSrcAMul255MinusOverlayA + 255) / |
1749 | 0 | 256); |
1750 | 0 | GByte nDstG = static_cast<GByte>( |
1751 | 0 | (nOverlayG * nOverlayA + nG * nSrcAMul255MinusOverlayA + 255) / |
1752 | 0 | 256); |
1753 | 0 | GByte nDstB = static_cast<GByte>( |
1754 | 0 | (nOverlayB * nOverlayA + nB * nSrcAMul255MinusOverlayA + 255) / |
1755 | 0 | 256); |
1756 | | // nInvDstA = (255 << SHIFT_DIV_DSTA) / nDstA; |
1757 | 0 | const uint16_t nInvDstA = gTabInvDstA[nDstA]; |
1758 | 0 | constexpr unsigned ROUND_OFFSET_DIV_DSTA = ((1 << SHIFT_DIV_DSTA) - 1); |
1759 | 0 | nDstR = static_cast<GByte>((nDstR * nInvDstA + ROUND_OFFSET_DIV_DSTA) >> |
1760 | 0 | SHIFT_DIV_DSTA); |
1761 | 0 | nDstG = static_cast<GByte>((nDstG * nInvDstA + ROUND_OFFSET_DIV_DSTA) >> |
1762 | 0 | SHIFT_DIV_DSTA); |
1763 | 0 | nDstB = static_cast<GByte>((nDstB * nInvDstA + ROUND_OFFSET_DIV_DSTA) >> |
1764 | 0 | SHIFT_DIV_DSTA); |
1765 | | if constexpr (bPixelSpaceIsOne) |
1766 | | { |
1767 | | pabyDst[i] = nDstR; |
1768 | | pabyDst[i + nBandSpace] = nDstG; |
1769 | | pabyDst[i + 2 * nBandSpace] = nDstB; |
1770 | | pabyDst[i + 3 * nBandSpace] = nDstA; |
1771 | | } |
1772 | | else |
1773 | 0 | { |
1774 | 0 | pabyDst[nDstOffset] = nDstR; |
1775 | 0 | pabyDst[nDstOffset + nBandSpace] = nDstG; |
1776 | 0 | pabyDst[nDstOffset + 2 * nBandSpace] = nDstB; |
1777 | 0 | pabyDst[nDstOffset + 3 * nBandSpace] = nDstA; |
1778 | 0 | nDstOffset += nPixelSpace; |
1779 | 0 | } |
1780 | 0 | } |
1781 | 0 | } |
1782 | | |
1783 | | /************************************************************************/ |
1784 | | /* BlendDataset::IRasterIO() */ |
1785 | | /************************************************************************/ |
1786 | | |
1787 | | CPLErr BlendDataset::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff, |
1788 | | int nXSize, int nYSize, void *pData, |
1789 | | int nBufXSize, int nBufYSize, |
1790 | | GDALDataType eBufType, int nBandCount, |
1791 | | BANDMAP_TYPE panBandMap, GSpacing nPixelSpace, |
1792 | | GSpacing nLineSpace, GSpacing nBandSpace, |
1793 | | GDALRasterIOExtraArg *psExtraArg) |
1794 | 0 | { |
1795 | | // Try to pass the request to the most appropriate overview dataset. |
1796 | 0 | if (nBufXSize < nXSize && nBufYSize < nYSize) |
1797 | 0 | { |
1798 | 0 | int bTried = FALSE; |
1799 | 0 | const CPLErr eErr = TryOverviewRasterIO( |
1800 | 0 | eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize, nBufYSize, |
1801 | 0 | eBufType, nBandCount, panBandMap, nPixelSpace, nLineSpace, |
1802 | 0 | nBandSpace, psExtraArg, &bTried); |
1803 | 0 | if (bTried) |
1804 | 0 | return eErr; |
1805 | 0 | } |
1806 | | |
1807 | 0 | GByte *const CPL_RESTRICT pabyDst = static_cast<GByte *>(pData); |
1808 | 0 | const int nColorCount = m_oColorDS.GetRasterCount(); |
1809 | 0 | const int nOverlayCount = m_oOverlayDS.GetRasterCount(); |
1810 | | |
1811 | | /************************************************************************/ |
1812 | | /* HSV_VALUE */ |
1813 | | /*************************************************************************/ |
1814 | 0 | if (nOverlayCount == 1 && m_opacity255Scale == 255 && |
1815 | 0 | m_operator == CompositionMode::HSV_VALUE && eRWFlag == GF_Read && |
1816 | 0 | eBufType == GDT_UInt8 && nBandCount == nBands && |
1817 | 0 | IsAllBands(nBands, panBandMap) && |
1818 | 0 | AcquireSourcePixels(nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, |
1819 | 0 | psExtraArg)) |
1820 | 0 | { |
1821 | 0 | const size_t nPixelCount = static_cast<size_t>(nBufXSize) * nBufYSize; |
1822 | 0 | const GByte *pabyR = m_abyBuffer.data(); |
1823 | 0 | const GByte *pabyG = m_abyBuffer.data() + nPixelCount; |
1824 | 0 | const GByte *pabyB = m_abyBuffer.data() + nPixelCount * 2; |
1825 | 0 | const GByte *pabyValue = m_abyBuffer.data() + nPixelCount * nColorCount; |
1826 | 0 | size_t nSrcIdx = 0; |
1827 | 0 | for (int j = 0; j < nBufYSize; ++j) |
1828 | 0 | { |
1829 | 0 | auto nDstOffset = j * nLineSpace; |
1830 | 0 | if (nPixelSpace == 1 && nLineSpace >= nPixelSpace * nBufXSize && |
1831 | 0 | nBandSpace >= nLineSpace * nBufYSize) |
1832 | 0 | { |
1833 | 0 | patch_value_line(nBufXSize, pabyR + nSrcIdx, pabyG + nSrcIdx, |
1834 | 0 | pabyB + nSrcIdx, pabyValue + nSrcIdx, |
1835 | 0 | pabyDst + nDstOffset, |
1836 | 0 | pabyDst + nDstOffset + nBandSpace, |
1837 | 0 | pabyDst + nDstOffset + 2 * nBandSpace); |
1838 | 0 | nSrcIdx += nBufXSize; |
1839 | 0 | } |
1840 | 0 | else |
1841 | 0 | { |
1842 | 0 | for (int i = 0; i < nBufXSize; |
1843 | 0 | ++i, ++nSrcIdx, nDstOffset += nPixelSpace) |
1844 | 0 | { |
1845 | 0 | float h, s; |
1846 | 0 | rgb_to_hs(pabyR[nSrcIdx], pabyG[nSrcIdx], pabyB[nSrcIdx], |
1847 | 0 | &h, &s); |
1848 | 0 | hsv_to_rgb(h, s, pabyValue[nSrcIdx], |
1849 | 0 | &pabyDst[nDstOffset + 0 * nBandSpace], |
1850 | 0 | &pabyDst[nDstOffset + 1 * nBandSpace], |
1851 | 0 | &pabyDst[nDstOffset + 2 * nBandSpace]); |
1852 | 0 | } |
1853 | 0 | } |
1854 | 0 | } |
1855 | 0 | if (nColorCount == 4) |
1856 | 0 | { |
1857 | 0 | for (int j = 0; j < nBufYSize; ++j) |
1858 | 0 | { |
1859 | 0 | auto nDstOffset = 3 * nBandSpace + j * nLineSpace; |
1860 | 0 | const GByte *pabyA = m_abyBuffer.data() + nPixelCount * 3; |
1861 | 0 | GDALCopyWords64(pabyA, GDT_UInt8, 1, pabyDst + nDstOffset, |
1862 | 0 | GDT_UInt8, static_cast<int>(nPixelSpace), |
1863 | 0 | nBufXSize); |
1864 | 0 | } |
1865 | 0 | } |
1866 | |
|
1867 | 0 | return CE_None; |
1868 | 0 | } |
1869 | | |
1870 | | /************************************************************************/ |
1871 | | /* SRC_OVER */ |
1872 | | /************************************************************************/ |
1873 | 0 | else if (nOverlayCount == 4 && nColorCount == 4 && |
1874 | 0 | m_operator == CompositionMode::SRC_OVER && eRWFlag == GF_Read && |
1875 | 0 | eBufType == GDT_UInt8 && nBandCount == nBands && |
1876 | 0 | IsAllBands(nBands, panBandMap) && |
1877 | 0 | AcquireSourcePixels(nXOff, nYOff, nXSize, nYSize, nBufXSize, |
1878 | 0 | nBufYSize, psExtraArg)) |
1879 | 0 | { |
1880 | 0 | const GByte nOpacity = static_cast<GByte>(m_opacity255Scale); |
1881 | 0 | const size_t nPixelCount = static_cast<size_t>(nBufXSize) * nBufYSize; |
1882 | 0 | const GByte *CPL_RESTRICT pabyR = m_abyBuffer.data(); |
1883 | 0 | const GByte *CPL_RESTRICT pabyG = m_abyBuffer.data() + nPixelCount; |
1884 | 0 | const GByte *CPL_RESTRICT pabyB = m_abyBuffer.data() + nPixelCount * 2; |
1885 | 0 | const GByte *CPL_RESTRICT pabyA = m_abyBuffer.data() + nPixelCount * 3; |
1886 | 0 | const GByte *CPL_RESTRICT pabyOverlayR = |
1887 | 0 | m_abyBuffer.data() + nPixelCount * nColorCount; |
1888 | 0 | const GByte *CPL_RESTRICT pabyOverlayG = |
1889 | 0 | m_abyBuffer.data() + nPixelCount * (nColorCount + 1); |
1890 | 0 | const GByte *CPL_RESTRICT pabyOverlayB = |
1891 | 0 | m_abyBuffer.data() + nPixelCount * (nColorCount + 2); |
1892 | 0 | const GByte *CPL_RESTRICT pabyOverlayA = |
1893 | 0 | m_abyBuffer.data() + nPixelCount * (nColorCount + 3); |
1894 | 0 | size_t nSrcIdx = 0; |
1895 | 0 | for (int j = 0; j < nBufYSize; ++j) |
1896 | 0 | { |
1897 | 0 | auto nDstOffset = j * nLineSpace; |
1898 | 0 | int i = 0; |
1899 | 0 | #ifdef HAVE_SSE2 |
1900 | 0 | if (nPixelSpace == 1) |
1901 | 0 | { |
1902 | 0 | i = BlendSrcOverRGBA_SSE2( |
1903 | 0 | pabyR + nSrcIdx, pabyG + nSrcIdx, pabyB + nSrcIdx, |
1904 | 0 | pabyA + nSrcIdx, pabyOverlayR + nSrcIdx, |
1905 | 0 | pabyOverlayG + nSrcIdx, pabyOverlayB + nSrcIdx, |
1906 | 0 | pabyOverlayA + nSrcIdx, pabyDst + nDstOffset, nBandSpace, |
1907 | 0 | nBufXSize, nOpacity); |
1908 | 0 | nSrcIdx += i; |
1909 | 0 | nDstOffset += i; |
1910 | 0 | } |
1911 | 0 | #endif |
1912 | | #if !(defined(HAVE_SSE2) && defined(__GNUC__)) |
1913 | | if (nPixelSpace == 1) |
1914 | | { |
1915 | | // Note: clang 20 does a rather good job at autovectorizing |
1916 | | // for SSE2, but BlendSrcOverRGBA_SSE2() performs better. |
1917 | | BlendSrcOverRGBA_Generic</* bPixelSpaceIsOne = */ true>( |
1918 | | pabyR + nSrcIdx, pabyG + nSrcIdx, pabyB + nSrcIdx, |
1919 | | pabyA + nSrcIdx, pabyOverlayR + nSrcIdx, |
1920 | | pabyOverlayG + nSrcIdx, pabyOverlayB + nSrcIdx, |
1921 | | pabyOverlayA + nSrcIdx, pabyDst + nDstOffset, |
1922 | | /*nPixelSpace = */ 1, nBandSpace, i, nBufXSize, nOpacity); |
1923 | | } |
1924 | | else |
1925 | | #endif |
1926 | 0 | { |
1927 | 0 | BlendSrcOverRGBA_Generic</* bPixelSpaceIsOne = */ false>( |
1928 | 0 | pabyR + nSrcIdx, pabyG + nSrcIdx, pabyB + nSrcIdx, |
1929 | 0 | pabyA + nSrcIdx, pabyOverlayR + nSrcIdx, |
1930 | 0 | pabyOverlayG + nSrcIdx, pabyOverlayB + nSrcIdx, |
1931 | 0 | pabyOverlayA + nSrcIdx, pabyDst + nDstOffset, nPixelSpace, |
1932 | 0 | nBandSpace, i, nBufXSize, nOpacity); |
1933 | 0 | } |
1934 | 0 | nSrcIdx += nBufXSize - i; |
1935 | 0 | } |
1936 | 0 | return CE_None; |
1937 | 0 | } |
1938 | | |
1939 | | /************************************************************************/ |
1940 | | /* OTHER OPERATORS */ |
1941 | | /************************************************************************/ |
1942 | | |
1943 | 0 | else if ((m_operator == CompositionMode::MULTIPLY || |
1944 | 0 | m_operator == CompositionMode::OVERLAY || |
1945 | 0 | m_operator == CompositionMode::SCREEN || |
1946 | 0 | m_operator == CompositionMode::HARD_LIGHT || |
1947 | 0 | m_operator == CompositionMode::DARKEN || |
1948 | 0 | m_operator == CompositionMode::LIGHTEN || |
1949 | 0 | m_operator == CompositionMode::COLOR_BURN || |
1950 | 0 | m_operator == CompositionMode::COLOR_DODGE) && |
1951 | 0 | eRWFlag == GF_Read && eBufType == GDT_UInt8 && |
1952 | 0 | nBandCount == nBands && IsAllBands(nBands, panBandMap) && |
1953 | 0 | AcquireSourcePixels(nXOff, nYOff, nXSize, nYSize, nBufXSize, |
1954 | 0 | nBufYSize, psExtraArg)) |
1955 | 0 | { |
1956 | | // We should have optimized paths for 1, 2, 3 and 4 bands on input and overlay |
1957 | | // permutations but let's keep it simple for now. |
1958 | 0 | const GByte nOpacity = static_cast<GByte>(m_opacity255Scale); |
1959 | 0 | const size_t nPixelCount = static_cast<size_t>(nBufXSize) * nBufYSize; |
1960 | 0 | const GByte *CPL_RESTRICT pabyR = m_abyBuffer.data(); |
1961 | 0 | GByte *CPL_RESTRICT pabyG = nullptr; |
1962 | 0 | GByte *CPL_RESTRICT pabyB = nullptr; |
1963 | 0 | GByte *CPL_RESTRICT pabyA = nullptr; |
1964 | 0 | switch (nColorCount) |
1965 | 0 | { |
1966 | 0 | case 2: |
1967 | 0 | pabyA = m_abyBuffer.data() + nPixelCount; |
1968 | 0 | break; |
1969 | 0 | case 3: |
1970 | 0 | pabyG = m_abyBuffer.data() + nPixelCount; |
1971 | 0 | pabyB = m_abyBuffer.data() + nPixelCount * 2; |
1972 | 0 | break; |
1973 | 0 | case 4: |
1974 | 0 | pabyG = m_abyBuffer.data() + nPixelCount; |
1975 | 0 | pabyB = m_abyBuffer.data() + nPixelCount * 2; |
1976 | 0 | pabyA = m_abyBuffer.data() + nPixelCount * 3; |
1977 | 0 | break; |
1978 | 0 | } |
1979 | | |
1980 | 0 | const GByte *CPL_RESTRICT pabyOverlayR = |
1981 | 0 | m_abyBuffer.data() + nPixelCount * nColorCount; |
1982 | 0 | GByte *CPL_RESTRICT pabyOverlayG = nullptr; |
1983 | 0 | GByte *CPL_RESTRICT pabyOverlayB = nullptr; |
1984 | 0 | GByte *CPL_RESTRICT pabyOverlayA = nullptr; |
1985 | 0 | switch (nOverlayCount) |
1986 | 0 | { |
1987 | 0 | case 2: |
1988 | 0 | pabyOverlayA = |
1989 | 0 | m_abyBuffer.data() + nPixelCount * (nColorCount + 1); |
1990 | 0 | break; |
1991 | 0 | case 3: |
1992 | 0 | pabyOverlayG = |
1993 | 0 | m_abyBuffer.data() + nPixelCount * (nColorCount + 1); |
1994 | 0 | pabyOverlayB = |
1995 | 0 | m_abyBuffer.data() + nPixelCount * (nColorCount + 2); |
1996 | 0 | break; |
1997 | 0 | case 4: |
1998 | 0 | pabyOverlayG = |
1999 | 0 | m_abyBuffer.data() + nPixelCount * (nColorCount + 1); |
2000 | 0 | pabyOverlayB = |
2001 | 0 | m_abyBuffer.data() + nPixelCount * (nColorCount + 2); |
2002 | 0 | pabyOverlayA = |
2003 | 0 | m_abyBuffer.data() + nPixelCount * (nColorCount + 3); |
2004 | 0 | break; |
2005 | 0 | } |
2006 | | |
2007 | 0 | size_t nSrcIdx = 0; |
2008 | 0 | for (int j = 0; j < nBufYSize; ++j) |
2009 | 0 | { |
2010 | 0 | auto nDstOffset = j * nLineSpace; |
2011 | 0 | int i = 0; |
2012 | |
|
2013 | 0 | const GByte *CPL_RESTRICT pabyOverlayG_current = |
2014 | 0 | pabyOverlayG ? pabyOverlayG + nSrcIdx : nullptr; |
2015 | 0 | const GByte *CPL_RESTRICT pabyOverlayB_current = |
2016 | 0 | pabyOverlayB ? pabyOverlayB + nSrcIdx : nullptr; |
2017 | 0 | const GByte *CPL_RESTRICT pabyOverlayA_current = |
2018 | 0 | pabyOverlayA ? pabyOverlayA + nSrcIdx : nullptr; |
2019 | |
|
2020 | 0 | const GByte *CPL_RESTRICT pabyG_current = |
2021 | 0 | pabyG ? pabyG + nSrcIdx : nullptr; |
2022 | 0 | const GByte *CPL_RESTRICT pabyB_current = |
2023 | 0 | pabyB ? pabyB + nSrcIdx : nullptr; |
2024 | 0 | const GByte *CPL_RESTRICT pabyA_current = |
2025 | 0 | pabyA ? pabyA + nSrcIdx : nullptr; |
2026 | | |
2027 | | // Determine the number of bands |
2028 | 0 | const int nInputBands{1 + (pabyG ? 2 : 0) + (pabyA ? 1 : 0)}; |
2029 | 0 | const int nOverlayBands{1 + (pabyOverlayG ? 2 : 0) + |
2030 | 0 | (pabyOverlayA ? 1 : 0)}; |
2031 | 0 | const int nOutputBands = std::max(nInputBands, nOverlayBands); |
2032 | |
|
2033 | 0 | if (m_operator == CompositionMode::SCREEN) |
2034 | 0 | BlendScreen_Generic( |
2035 | 0 | pabyR + nSrcIdx, pabyG_current, pabyB_current, |
2036 | 0 | pabyA_current, pabyOverlayR + nSrcIdx, pabyOverlayG_current, |
2037 | 0 | pabyOverlayB_current, pabyOverlayA_current, |
2038 | 0 | pabyDst + nDstOffset, nPixelSpace, nBandSpace, i, nBufXSize, |
2039 | 0 | nOpacity, nOutputBands, m_bSwappedOpacity); |
2040 | 0 | else if (m_operator == CompositionMode::MULTIPLY) |
2041 | 0 | BlendMultiply_Generic( |
2042 | 0 | pabyR + nSrcIdx, pabyG_current, pabyB_current, |
2043 | 0 | pabyA_current, pabyOverlayR + nSrcIdx, pabyOverlayG_current, |
2044 | 0 | pabyOverlayB_current, pabyOverlayA_current, |
2045 | 0 | pabyDst + nDstOffset, nPixelSpace, nBandSpace, i, nBufXSize, |
2046 | 0 | nOpacity, nOutputBands, m_bSwappedOpacity); |
2047 | 0 | else if (m_operator == CompositionMode::HARD_LIGHT) |
2048 | 0 | BlendHardLight_Generic( |
2049 | 0 | pabyR + nSrcIdx, pabyG_current, pabyB_current, |
2050 | 0 | pabyA_current, pabyOverlayR + nSrcIdx, pabyOverlayG_current, |
2051 | 0 | pabyOverlayB_current, pabyOverlayA_current, |
2052 | 0 | pabyDst + nDstOffset, nPixelSpace, nBandSpace, i, nBufXSize, |
2053 | 0 | nOpacity, nOutputBands, m_bSwappedOpacity); |
2054 | 0 | else if (m_operator == CompositionMode::OVERLAY) |
2055 | 0 | BlendOverlay_Generic( |
2056 | 0 | pabyR + nSrcIdx, pabyG_current, pabyB_current, |
2057 | 0 | pabyA_current, pabyOverlayR + nSrcIdx, pabyOverlayG_current, |
2058 | 0 | pabyOverlayB_current, pabyOverlayA_current, |
2059 | 0 | pabyDst + nDstOffset, nPixelSpace, nBandSpace, i, nBufXSize, |
2060 | 0 | nOpacity, nOutputBands, m_bSwappedOpacity); |
2061 | 0 | else if (m_operator == CompositionMode::DARKEN) |
2062 | 0 | BlendDarken_Generic( |
2063 | 0 | pabyR + nSrcIdx, pabyG_current, pabyB_current, |
2064 | 0 | pabyA_current, pabyOverlayR + nSrcIdx, pabyOverlayG_current, |
2065 | 0 | pabyOverlayB_current, pabyOverlayA_current, |
2066 | 0 | pabyDst + nDstOffset, nPixelSpace, nBandSpace, i, nBufXSize, |
2067 | 0 | nOpacity, nOutputBands, m_bSwappedOpacity); |
2068 | 0 | else if (m_operator == CompositionMode::LIGHTEN) |
2069 | 0 | BlendLighten_Generic( |
2070 | 0 | pabyR + nSrcIdx, pabyG_current, pabyB_current, |
2071 | 0 | pabyA_current, pabyOverlayR + nSrcIdx, pabyOverlayG_current, |
2072 | 0 | pabyOverlayB_current, pabyOverlayA_current, |
2073 | 0 | pabyDst + nDstOffset, nPixelSpace, nBandSpace, i, nBufXSize, |
2074 | 0 | nOpacity, nOutputBands, m_bSwappedOpacity); |
2075 | 0 | else if (m_operator == CompositionMode::COLOR_BURN) |
2076 | 0 | BlendColorBurn_Generic( |
2077 | 0 | pabyR + nSrcIdx, pabyG_current, pabyB_current, |
2078 | 0 | pabyA_current, pabyOverlayR + nSrcIdx, pabyOverlayG_current, |
2079 | 0 | pabyOverlayB_current, pabyOverlayA_current, |
2080 | 0 | pabyDst + nDstOffset, nPixelSpace, nBandSpace, i, nBufXSize, |
2081 | 0 | nOpacity, nOutputBands, m_bSwappedOpacity); |
2082 | 0 | else if (m_operator == CompositionMode::COLOR_DODGE) |
2083 | 0 | BlendColorDodge_Generic( |
2084 | 0 | pabyR + nSrcIdx, pabyG_current, pabyB_current, |
2085 | 0 | pabyA_current, pabyOverlayR + nSrcIdx, pabyOverlayG_current, |
2086 | 0 | pabyOverlayB_current, pabyOverlayA_current, |
2087 | 0 | pabyDst + nDstOffset, nPixelSpace, nBandSpace, i, nBufXSize, |
2088 | 0 | nOpacity, nOutputBands, m_bSwappedOpacity); |
2089 | 0 | else |
2090 | 0 | { |
2091 | 0 | CPLAssert(false); |
2092 | 0 | } |
2093 | | |
2094 | 0 | nSrcIdx += nBufXSize - i; |
2095 | 0 | } |
2096 | 0 | return CE_None; |
2097 | 0 | } |
2098 | | |
2099 | | /************************************************************************/ |
2100 | | /* ERRORS */ |
2101 | | /************************************************************************/ |
2102 | 0 | else if (m_ioError) |
2103 | 0 | { |
2104 | 0 | return CE_Failure; |
2105 | 0 | } |
2106 | 0 | else |
2107 | 0 | { |
2108 | 0 | const CPLErr eErr = GDALDataset::IRasterIO( |
2109 | 0 | eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize, nBufYSize, |
2110 | 0 | eBufType, nBandCount, panBandMap, nPixelSpace, nLineSpace, |
2111 | 0 | nBandSpace, psExtraArg); |
2112 | 0 | m_ioError = eErr != CE_None; |
2113 | 0 | return eErr; |
2114 | 0 | } |
2115 | 0 | } |
2116 | | |
2117 | | /************************************************************************/ |
2118 | | /* SrcOverRGBOneComponent() */ |
2119 | | /************************************************************************/ |
2120 | | |
2121 | | // GCC and clang do a god job a auto vectorizing the below function |
2122 | | #if defined(__GNUC__) && !defined(__clang__) |
2123 | | __attribute__((optimize("tree-vectorize"))) |
2124 | | #endif |
2125 | | static void SrcOverRGB(const uint8_t *const __restrict pabyOverlay, |
2126 | | const uint8_t *const __restrict pabySrc, |
2127 | | uint8_t *const __restrict pabyDst, const size_t N, |
2128 | | const uint8_t nOpacity) |
2129 | 0 | { |
2130 | 0 | for (size_t i = 0; i < N; ++i) |
2131 | 0 | { |
2132 | 0 | const uint8_t nOverlay = pabyOverlay[i]; |
2133 | 0 | const uint8_t nSrc = pabySrc[i]; |
2134 | 0 | pabyDst[i] = static_cast<uint8_t>( |
2135 | 0 | (nOverlay * nOpacity + nSrc * (255 - nOpacity) + 255) / 256); |
2136 | 0 | } |
2137 | 0 | } |
2138 | | |
2139 | | /************************************************************************/ |
2140 | | /* BlendBand::IRasterIO() */ |
2141 | | /************************************************************************/ |
2142 | | |
2143 | | CPLErr BlendBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff, |
2144 | | int nXSize, int nYSize, void *pData, int nBufXSize, |
2145 | | int nBufYSize, GDALDataType eBufType, |
2146 | | GSpacing nPixelSpace, GSpacing nLineSpace, |
2147 | | GDALRasterIOExtraArg *psExtraArg) |
2148 | 0 | { |
2149 | | // Try to pass the request to the most appropriate overview dataset. |
2150 | 0 | if (nBufXSize < nXSize && nBufYSize < nYSize) |
2151 | 0 | { |
2152 | 0 | int bTried = FALSE; |
2153 | 0 | const CPLErr eErr = TryOverviewRasterIO( |
2154 | 0 | eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize, nBufYSize, |
2155 | 0 | eBufType, nPixelSpace, nLineSpace, psExtraArg, &bTried); |
2156 | 0 | if (bTried) |
2157 | 0 | return eErr; |
2158 | 0 | } |
2159 | | |
2160 | 0 | const size_t nPixelCount = static_cast<size_t>(nBufXSize) * nBufYSize; |
2161 | 0 | const int nColorCount = m_oBlendDataset.m_oColorDS.GetRasterCount(); |
2162 | 0 | const int nOverlayCount = m_oBlendDataset.m_oOverlayDS.GetRasterCount(); |
2163 | 0 | if (nBand == 4 && m_oBlendDataset.m_operator == CompositionMode::HSV_VALUE) |
2164 | 0 | { |
2165 | 0 | if (nColorCount == 3) |
2166 | 0 | { |
2167 | 0 | GByte ch = 255; |
2168 | 0 | for (int iY = 0; iY < nBufYSize; ++iY) |
2169 | 0 | { |
2170 | 0 | GDALCopyWords64(&ch, GDT_UInt8, 0, |
2171 | 0 | static_cast<GByte *>(pData) + iY * nLineSpace, |
2172 | 0 | eBufType, static_cast<int>(nPixelSpace), |
2173 | 0 | nBufXSize); |
2174 | 0 | } |
2175 | 0 | return CE_None; |
2176 | 0 | } |
2177 | 0 | else |
2178 | 0 | { |
2179 | 0 | CPLAssert(nColorCount == 4); |
2180 | 0 | return m_oBlendDataset.m_oColorDS.GetRasterBand(4)->RasterIO( |
2181 | 0 | eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize, |
2182 | 0 | nBufYSize, eBufType, nPixelSpace, nLineSpace, psExtraArg); |
2183 | 0 | } |
2184 | 0 | } |
2185 | 0 | else if (nOverlayCount == 3 && nColorCount == 3 && |
2186 | 0 | m_oBlendDataset.m_operator == CompositionMode::SRC_OVER && |
2187 | 0 | eRWFlag == GF_Read && eBufType == GDT_UInt8 && |
2188 | 0 | m_oBlendDataset.AcquireSourcePixels(nXOff, nYOff, nXSize, nYSize, |
2189 | 0 | nBufXSize, nBufYSize, |
2190 | 0 | psExtraArg)) |
2191 | 0 | { |
2192 | 0 | const int nOpacity = m_oBlendDataset.m_opacity255Scale; |
2193 | 0 | const GByte *const CPL_RESTRICT pabySrc = |
2194 | 0 | m_oBlendDataset.m_abyBuffer.data() + nPixelCount * (nBand - 1); |
2195 | 0 | const GByte *const CPL_RESTRICT pabyOverlay = |
2196 | 0 | m_oBlendDataset.m_abyBuffer.data() + |
2197 | 0 | nPixelCount * (nColorCount + nBand - 1); |
2198 | 0 | GByte *const CPL_RESTRICT pabyDst = static_cast<GByte *>(pData); |
2199 | 0 | size_t nSrcIdx = 0; |
2200 | 0 | for (int j = 0; j < nBufYSize; ++j) |
2201 | 0 | { |
2202 | 0 | auto nDstOffset = j * nLineSpace; |
2203 | 0 | if (nPixelSpace == 1) |
2204 | 0 | { |
2205 | 0 | SrcOverRGB(pabyOverlay + nSrcIdx, pabySrc + nSrcIdx, |
2206 | 0 | pabyDst + nDstOffset, nBufXSize, |
2207 | 0 | static_cast<uint8_t>(nOpacity)); |
2208 | 0 | nSrcIdx += nBufXSize; |
2209 | 0 | } |
2210 | 0 | else |
2211 | 0 | { |
2212 | 0 | for (int i = 0; i < nBufXSize; |
2213 | 0 | ++i, ++nSrcIdx, nDstOffset += nPixelSpace) |
2214 | 0 | { |
2215 | 0 | const int nOverlay = pabyOverlay[nSrcIdx]; |
2216 | 0 | const int nSrc = pabySrc[nSrcIdx]; |
2217 | 0 | pabyDst[nDstOffset] = static_cast<GByte>( |
2218 | 0 | (nOverlay * nOpacity + nSrc * (255 - nOpacity) + 255) / |
2219 | 0 | 256); |
2220 | 0 | } |
2221 | 0 | } |
2222 | 0 | } |
2223 | 0 | return CE_None; |
2224 | 0 | } |
2225 | 0 | else if (eRWFlag == GF_Read && eBufType == GDT_UInt8 && |
2226 | 0 | m_oBlendDataset.AcquireSourcePixels(nXOff, nYOff, nXSize, nYSize, |
2227 | 0 | nBufXSize, nBufYSize, |
2228 | 0 | psExtraArg)) |
2229 | 0 | { |
2230 | 0 | GByte *pabyDst = static_cast<GByte *>(pData); |
2231 | |
|
2232 | 0 | if (m_oBlendDataset.m_operator == CompositionMode::MULTIPLY || |
2233 | 0 | m_oBlendDataset.m_operator == CompositionMode::SCREEN || |
2234 | 0 | m_oBlendDataset.m_operator == CompositionMode::HARD_LIGHT || |
2235 | 0 | m_oBlendDataset.m_operator == CompositionMode::OVERLAY || |
2236 | 0 | m_oBlendDataset.m_operator == CompositionMode::DARKEN || |
2237 | 0 | m_oBlendDataset.m_operator == CompositionMode::LIGHTEN || |
2238 | 0 | m_oBlendDataset.m_operator == CompositionMode::COLOR_BURN || |
2239 | 0 | m_oBlendDataset.m_operator == CompositionMode::COLOR_DODGE) |
2240 | 0 | { |
2241 | 0 | CPLAssert(nBand <= 4); |
2242 | 0 | const GByte *pabyR = m_oBlendDataset.m_abyBuffer.data(); |
2243 | 0 | const GByte *pabyG = |
2244 | 0 | nColorCount >= 3 |
2245 | 0 | ? m_oBlendDataset.m_abyBuffer.data() + nPixelCount |
2246 | 0 | : nullptr; |
2247 | 0 | const GByte *pabyB = |
2248 | 0 | nColorCount >= 3 |
2249 | 0 | ? m_oBlendDataset.m_abyBuffer.data() + nPixelCount * 2 |
2250 | 0 | : nullptr; |
2251 | |
|
2252 | 0 | GByte *pabyA = nullptr; |
2253 | |
|
2254 | 0 | if (nColorCount == 2) |
2255 | 0 | { |
2256 | 0 | pabyA = m_oBlendDataset.m_abyBuffer.data() + nPixelCount; |
2257 | 0 | } |
2258 | 0 | else if (nColorCount == 4) |
2259 | 0 | { |
2260 | 0 | pabyA = m_oBlendDataset.m_abyBuffer.data() + nPixelCount * 3; |
2261 | 0 | } |
2262 | | |
2263 | | // Retrieve single band value as R |
2264 | 0 | const GByte *pabyOverlayR = |
2265 | 0 | m_oBlendDataset.m_abyBuffer.data() + nPixelCount * nColorCount; |
2266 | |
|
2267 | 0 | const GByte *pabyOverlayG = |
2268 | 0 | nOverlayCount >= 3 ? m_oBlendDataset.m_abyBuffer.data() + |
2269 | 0 | nPixelCount * (nColorCount + 1) |
2270 | 0 | : nullptr; |
2271 | 0 | const GByte *pabyOverlayB = |
2272 | 0 | nOverlayCount >= 3 ? m_oBlendDataset.m_abyBuffer.data() + |
2273 | 0 | nPixelCount * (nColorCount + 2) |
2274 | 0 | : nullptr; |
2275 | 0 | const GByte *pabyOverlayA = |
2276 | 0 | (nOverlayCount == 2 || nOverlayCount == 4) |
2277 | 0 | ? m_oBlendDataset.m_abyBuffer.data() + |
2278 | 0 | nPixelCount * (nColorCount + nOverlayCount - 1) |
2279 | 0 | : nullptr; |
2280 | | |
2281 | | // Determine the number of bands |
2282 | 0 | const int nInputBands{1 + (pabyG ? 2 : 0) + (pabyA ? 1 : 0)}; |
2283 | 0 | const int nOverlayBands{1 + (pabyOverlayG ? 2 : 0) + |
2284 | 0 | (pabyOverlayA ? 1 : 0)}; |
2285 | 0 | const int nOutputBands = std::max(nInputBands, nOverlayBands); |
2286 | 0 | const bool bSwappedOpacity{m_oBlendDataset.m_bSwappedOpacity}; |
2287 | |
|
2288 | 0 | size_t nSrcIdx = 0; |
2289 | 0 | for (int j = 0; j < nBufYSize; ++j) |
2290 | 0 | { |
2291 | 0 | auto nDstOffset = j * nLineSpace; |
2292 | 0 | for (int i = 0; i < nBufXSize; |
2293 | 0 | ++i, ++nSrcIdx, nDstOffset += nPixelSpace) |
2294 | 0 | { |
2295 | | // TODO: This need to be optimized for requesting a single band |
2296 | 0 | std::vector<GByte> byteBuffer; |
2297 | 0 | byteBuffer.resize(std::max(nColorCount, nOverlayCount)); |
2298 | 0 | if (m_oBlendDataset.m_operator == CompositionMode::SCREEN) |
2299 | 0 | BlendScreen_Generic( |
2300 | 0 | pabyR, pabyG, pabyB, pabyA, pabyOverlayR, |
2301 | 0 | pabyOverlayG, pabyOverlayB, pabyOverlayA, |
2302 | 0 | byteBuffer.data(), nPixelSpace, 1, nSrcIdx, 1, |
2303 | 0 | static_cast<GByte>( |
2304 | 0 | m_oBlendDataset.m_opacity255Scale), |
2305 | 0 | nOutputBands, bSwappedOpacity); |
2306 | 0 | else if (m_oBlendDataset.m_operator == |
2307 | 0 | CompositionMode::HARD_LIGHT) |
2308 | 0 | BlendHardLight_Generic( |
2309 | 0 | pabyR, pabyG, pabyB, pabyA, pabyOverlayR, |
2310 | 0 | pabyOverlayG, pabyOverlayB, pabyOverlayA, |
2311 | 0 | byteBuffer.data(), nPixelSpace, 1, nSrcIdx, 1, |
2312 | 0 | static_cast<GByte>( |
2313 | 0 | m_oBlendDataset.m_opacity255Scale), |
2314 | 0 | nOutputBands, bSwappedOpacity); |
2315 | 0 | else if (m_oBlendDataset.m_operator == |
2316 | 0 | CompositionMode::OVERLAY) |
2317 | 0 | BlendOverlay_Generic( |
2318 | 0 | pabyR, pabyG, pabyB, pabyA, pabyOverlayR, |
2319 | 0 | pabyOverlayG, pabyOverlayB, pabyOverlayA, |
2320 | 0 | byteBuffer.data(), nPixelSpace, 1, nSrcIdx, 1, |
2321 | 0 | static_cast<GByte>( |
2322 | 0 | m_oBlendDataset.m_opacity255Scale), |
2323 | 0 | nOutputBands, bSwappedOpacity); |
2324 | 0 | else if (m_oBlendDataset.m_operator == |
2325 | 0 | CompositionMode::MULTIPLY) |
2326 | 0 | BlendMultiply_Generic( |
2327 | 0 | pabyR, pabyG, pabyB, pabyA, pabyOverlayR, |
2328 | 0 | pabyOverlayG, pabyOverlayB, pabyOverlayA, |
2329 | 0 | byteBuffer.data(), nPixelSpace, 1, nSrcIdx, 1, |
2330 | 0 | static_cast<GByte>( |
2331 | 0 | m_oBlendDataset.m_opacity255Scale), |
2332 | 0 | nOutputBands, bSwappedOpacity); |
2333 | 0 | else if (m_oBlendDataset.m_operator == |
2334 | 0 | CompositionMode::DARKEN) |
2335 | 0 | BlendDarken_Generic( |
2336 | 0 | pabyR, pabyG, pabyB, pabyA, pabyOverlayR, |
2337 | 0 | pabyOverlayG, pabyOverlayB, pabyOverlayA, |
2338 | 0 | byteBuffer.data(), nPixelSpace, 1, nSrcIdx, 1, |
2339 | 0 | static_cast<GByte>( |
2340 | 0 | m_oBlendDataset.m_opacity255Scale), |
2341 | 0 | nOutputBands, bSwappedOpacity); |
2342 | 0 | else if (m_oBlendDataset.m_operator == |
2343 | 0 | CompositionMode::LIGHTEN) |
2344 | 0 | BlendLighten_Generic( |
2345 | 0 | pabyR, pabyG, pabyB, pabyA, pabyOverlayR, |
2346 | 0 | pabyOverlayG, pabyOverlayB, pabyOverlayA, |
2347 | 0 | byteBuffer.data(), nPixelSpace, 1, nSrcIdx, 1, |
2348 | 0 | static_cast<GByte>( |
2349 | 0 | m_oBlendDataset.m_opacity255Scale), |
2350 | 0 | nOutputBands, bSwappedOpacity); |
2351 | 0 | else if (m_oBlendDataset.m_operator == |
2352 | 0 | CompositionMode::COLOR_BURN) |
2353 | 0 | BlendColorBurn_Generic( |
2354 | 0 | pabyR, pabyG, pabyB, pabyA, pabyOverlayR, |
2355 | 0 | pabyOverlayG, pabyOverlayB, pabyOverlayA, |
2356 | 0 | byteBuffer.data(), nPixelSpace, 1, nSrcIdx, 1, |
2357 | 0 | static_cast<GByte>( |
2358 | 0 | m_oBlendDataset.m_opacity255Scale), |
2359 | 0 | nOutputBands, bSwappedOpacity); |
2360 | 0 | else if (m_oBlendDataset.m_operator == |
2361 | 0 | CompositionMode::COLOR_DODGE) |
2362 | 0 | BlendColorDodge_Generic( |
2363 | 0 | pabyR, pabyG, pabyB, pabyA, pabyOverlayR, |
2364 | 0 | pabyOverlayG, pabyOverlayB, pabyOverlayA, |
2365 | 0 | byteBuffer.data(), nPixelSpace, 1, nSrcIdx, 1, |
2366 | 0 | static_cast<GByte>( |
2367 | 0 | m_oBlendDataset.m_opacity255Scale), |
2368 | 0 | nOutputBands, bSwappedOpacity); |
2369 | 0 | else |
2370 | 0 | { |
2371 | 0 | CPLAssert(false); |
2372 | 0 | } |
2373 | | |
2374 | 0 | pabyDst[nDstOffset] = byteBuffer[nBand - 1]; |
2375 | 0 | } |
2376 | 0 | } |
2377 | 0 | } |
2378 | 0 | else if (m_oBlendDataset.m_operator == CompositionMode::SRC_OVER) |
2379 | 0 | { |
2380 | 0 | const auto RGBToGrayScale = [](int R, int G, int B) |
2381 | 0 | { |
2382 | | // Equivalent to R * 0.299 + G * 0.587 + B * 0.114 |
2383 | | // but using faster computation |
2384 | 0 | return (R * 306 + G * 601 + B * 117) / 1024; |
2385 | 0 | }; |
2386 | |
|
2387 | 0 | const GByte *paby = |
2388 | 0 | (nBand <= nColorCount) ? m_oBlendDataset.m_abyBuffer.data() + |
2389 | 0 | nPixelCount * (nBand - 1) |
2390 | 0 | : (nBand == 4 && nColorCount == 2) |
2391 | 0 | ? m_oBlendDataset.m_abyBuffer.data() + nPixelCount |
2392 | 0 | : nullptr; |
2393 | 0 | const GByte *pabyA = |
2394 | 0 | (nColorCount == 4) |
2395 | 0 | ? m_oBlendDataset.m_abyBuffer.data() + nPixelCount * 3 |
2396 | 0 | : nColorCount == 2 |
2397 | 0 | ? m_oBlendDataset.m_abyBuffer.data() + nPixelCount |
2398 | 0 | : nullptr; |
2399 | 0 | const GByte *pabyOverlay = |
2400 | 0 | (nBand <= nOverlayCount) |
2401 | 0 | ? m_oBlendDataset.m_abyBuffer.data() + |
2402 | 0 | nPixelCount * (nColorCount + nBand - 1) |
2403 | 0 | : (nBand <= 3) ? m_oBlendDataset.m_abyBuffer.data() + |
2404 | 0 | nPixelCount * nColorCount |
2405 | 0 | : nullptr; |
2406 | 0 | const GByte *pabyOverlayA = |
2407 | 0 | (nOverlayCount == 2 || nOverlayCount == 4) |
2408 | 0 | ? m_oBlendDataset.m_abyBuffer.data() + |
2409 | 0 | nPixelCount * (nColorCount + nOverlayCount - 1) |
2410 | 0 | : nullptr; |
2411 | 0 | const GByte *pabyOverlayR = |
2412 | 0 | (nOverlayCount >= 3 && nColorCount < 3 && nBand <= 3) |
2413 | 0 | ? m_oBlendDataset.m_abyBuffer.data() + |
2414 | 0 | nPixelCount * nColorCount |
2415 | 0 | : nullptr; |
2416 | 0 | const GByte *pabyOverlayG = |
2417 | 0 | (nOverlayCount >= 3 && nColorCount < 3 && nBand <= 3) |
2418 | 0 | ? m_oBlendDataset.m_abyBuffer.data() + |
2419 | 0 | nPixelCount * (nColorCount + 1) |
2420 | 0 | : nullptr; |
2421 | 0 | const GByte *pabyOverlayB = |
2422 | 0 | (nOverlayCount >= 3 && nColorCount < 3 && nBand <= 3) |
2423 | 0 | ? m_oBlendDataset.m_abyBuffer.data() + |
2424 | 0 | nPixelCount * (nColorCount + 2) |
2425 | 0 | : nullptr; |
2426 | |
|
2427 | 0 | size_t nSrcIdx = 0; |
2428 | 0 | for (int j = 0; j < nBufYSize; ++j) |
2429 | 0 | { |
2430 | 0 | auto nDstOffset = j * nLineSpace; |
2431 | 0 | for (int i = 0; i < nBufXSize; |
2432 | 0 | ++i, ++nSrcIdx, nDstOffset += nPixelSpace) |
2433 | 0 | { |
2434 | | // Corrected to take into account m_opacity255Scale |
2435 | 0 | const int nOverlayA = |
2436 | 0 | pabyOverlayA ? ((pabyOverlayA[nSrcIdx] * |
2437 | 0 | m_oBlendDataset.m_opacity255Scale + |
2438 | 0 | 255) / |
2439 | 0 | 256) |
2440 | 0 | : m_oBlendDataset.m_opacity255Scale; |
2441 | |
|
2442 | 0 | const int nSrcA = pabyA ? pabyA[nSrcIdx] : 255; |
2443 | |
|
2444 | 0 | const int nSrcAMul255MinusOverlayA = |
2445 | 0 | (nSrcA * (255 - nOverlayA) + 255) / 256; |
2446 | 0 | const int nDstA = nOverlayA + nSrcAMul255MinusOverlayA; |
2447 | 0 | if (nBand != 4) |
2448 | 0 | { |
2449 | 0 | const int nOverlay = |
2450 | 0 | (pabyOverlayR && pabyOverlayG && pabyOverlayB) |
2451 | 0 | ? RGBToGrayScale(pabyOverlayR[nSrcIdx], |
2452 | 0 | pabyOverlayG[nSrcIdx], |
2453 | 0 | pabyOverlayB[nSrcIdx]) |
2454 | 0 | : pabyOverlay ? pabyOverlay[nSrcIdx] |
2455 | 0 | : 255; |
2456 | |
|
2457 | 0 | const int nSrc = paby ? paby[nSrcIdx] : 255; |
2458 | 0 | int nDst = (nOverlay * nOverlayA + |
2459 | 0 | nSrc * nSrcAMul255MinusOverlayA + 255) / |
2460 | 0 | 256; |
2461 | 0 | if (nDstA != 0 && nDstA != 255) |
2462 | 0 | nDst = (nDst * 255 + (nDstA / 2)) / nDstA; |
2463 | 0 | pabyDst[nDstOffset] = |
2464 | 0 | static_cast<GByte>(std::min(nDst, 255)); |
2465 | 0 | } |
2466 | 0 | else |
2467 | 0 | { |
2468 | 0 | pabyDst[nDstOffset] = static_cast<GByte>(nDstA); |
2469 | 0 | } |
2470 | 0 | } |
2471 | 0 | } |
2472 | 0 | } |
2473 | 0 | else if (nOverlayCount == 1 && m_oBlendDataset.m_opacity255Scale == 255) |
2474 | 0 | { |
2475 | 0 | const GByte *pabyR = m_oBlendDataset.m_abyBuffer.data(); |
2476 | 0 | const GByte *pabyG = |
2477 | 0 | m_oBlendDataset.m_abyBuffer.data() + nPixelCount; |
2478 | 0 | const GByte *pabyB = |
2479 | 0 | m_oBlendDataset.m_abyBuffer.data() + nPixelCount * 2; |
2480 | 0 | CPLAssert(m_oBlendDataset.m_operator == CompositionMode::HSV_VALUE); |
2481 | 0 | size_t nSrcIdx = 0; |
2482 | 0 | const GByte *pabyValue = |
2483 | 0 | m_oBlendDataset.m_abyBuffer.data() + nPixelCount * nColorCount; |
2484 | 0 | for (int j = 0; j < nBufYSize; ++j) |
2485 | 0 | { |
2486 | 0 | auto nDstOffset = j * nLineSpace; |
2487 | 0 | if (nPixelSpace == 1 && nLineSpace >= nPixelSpace * nBufXSize) |
2488 | 0 | { |
2489 | 0 | patch_value_line( |
2490 | 0 | nBufXSize, pabyR + nSrcIdx, pabyG + nSrcIdx, |
2491 | 0 | pabyB + nSrcIdx, pabyValue + nSrcIdx, |
2492 | 0 | nBand == 1 ? pabyDst + nDstOffset : nullptr, |
2493 | 0 | nBand == 2 ? pabyDst + nDstOffset : nullptr, |
2494 | 0 | nBand == 3 ? pabyDst + nDstOffset : nullptr); |
2495 | 0 | nSrcIdx += nBufXSize; |
2496 | 0 | } |
2497 | 0 | else |
2498 | 0 | { |
2499 | 0 | for (int i = 0; i < nBufXSize; |
2500 | 0 | ++i, ++nSrcIdx, nDstOffset += nPixelSpace) |
2501 | 0 | { |
2502 | 0 | float h, s; |
2503 | 0 | rgb_to_hs(pabyR[nSrcIdx], pabyG[nSrcIdx], |
2504 | 0 | pabyB[nSrcIdx], &h, &s); |
2505 | 0 | if (nBand == 1) |
2506 | 0 | { |
2507 | 0 | hsv_to_rgb(h, s, pabyValue[nSrcIdx], |
2508 | 0 | &pabyDst[nDstOffset], nullptr, nullptr); |
2509 | 0 | } |
2510 | 0 | else if (nBand == 2) |
2511 | 0 | { |
2512 | 0 | hsv_to_rgb(h, s, pabyValue[nSrcIdx], nullptr, |
2513 | 0 | &pabyDst[nDstOffset], nullptr); |
2514 | 0 | } |
2515 | 0 | else |
2516 | 0 | { |
2517 | 0 | CPLAssert(nBand == 3); |
2518 | 0 | hsv_to_rgb(h, s, pabyValue[nSrcIdx], nullptr, |
2519 | 0 | nullptr, &pabyDst[nDstOffset]); |
2520 | 0 | } |
2521 | 0 | } |
2522 | 0 | } |
2523 | 0 | } |
2524 | 0 | } |
2525 | 0 | else |
2526 | 0 | { |
2527 | 0 | CPLAssert(m_oBlendDataset.m_operator == CompositionMode::HSV_VALUE); |
2528 | 0 | CPLAssert(nBand <= 3); |
2529 | 0 | const GByte *pabyR = m_oBlendDataset.m_abyBuffer.data(); |
2530 | 0 | const GByte *pabyG = |
2531 | 0 | m_oBlendDataset.m_abyBuffer.data() + nPixelCount; |
2532 | 0 | const GByte *pabyB = |
2533 | 0 | m_oBlendDataset.m_abyBuffer.data() + nPixelCount * 2; |
2534 | 0 | const GByte *pabyValue = |
2535 | 0 | m_oBlendDataset.m_abyBuffer.data() + nPixelCount * nColorCount; |
2536 | 0 | const GByte *pabyOverlayR = |
2537 | 0 | nOverlayCount >= 3 ? m_oBlendDataset.m_abyBuffer.data() + |
2538 | 0 | nPixelCount * nColorCount |
2539 | 0 | : nullptr; |
2540 | 0 | const GByte *pabyOverlayG = |
2541 | 0 | nOverlayCount >= 3 ? m_oBlendDataset.m_abyBuffer.data() + |
2542 | 0 | nPixelCount * (nColorCount + 1) |
2543 | 0 | : nullptr; |
2544 | 0 | const GByte *pabyOverlayB = |
2545 | 0 | nOverlayCount >= 3 ? m_oBlendDataset.m_abyBuffer.data() + |
2546 | 0 | nPixelCount * (nColorCount + 2) |
2547 | 0 | : nullptr; |
2548 | 0 | const GByte *pabyOverlayA = |
2549 | 0 | (nOverlayCount == 2 || nOverlayCount == 4) |
2550 | 0 | ? m_oBlendDataset.m_abyBuffer.data() + |
2551 | 0 | nPixelCount * (nColorCount + nOverlayCount - 1) |
2552 | 0 | : nullptr; |
2553 | |
|
2554 | 0 | size_t nSrcIdx = 0; |
2555 | 0 | for (int j = 0; j < nBufYSize; ++j) |
2556 | 0 | { |
2557 | 0 | auto nDstOffset = j * nLineSpace; |
2558 | 0 | for (int i = 0; i < nBufXSize; |
2559 | 0 | ++i, ++nSrcIdx, nDstOffset += nPixelSpace) |
2560 | 0 | { |
2561 | 0 | const int nColorR = pabyR[nSrcIdx]; |
2562 | 0 | const int nColorG = pabyG[nSrcIdx]; |
2563 | 0 | const int nColorB = pabyB[nSrcIdx]; |
2564 | 0 | const int nOverlayV = |
2565 | 0 | (pabyOverlayR && pabyOverlayG && pabyOverlayB) |
2566 | 0 | ? std::max({pabyOverlayR[nSrcIdx], |
2567 | 0 | pabyOverlayG[nSrcIdx], |
2568 | 0 | pabyOverlayB[nSrcIdx]}) |
2569 | 0 | : pabyValue[nSrcIdx]; |
2570 | 0 | const int nOverlayA = |
2571 | 0 | pabyOverlayA ? ((pabyOverlayA[nSrcIdx] * |
2572 | 0 | m_oBlendDataset.m_opacity255Scale + |
2573 | 0 | 255) / |
2574 | 0 | 256) |
2575 | 0 | : m_oBlendDataset.m_opacity255Scale; |
2576 | 0 | const int nColorValue = |
2577 | 0 | std::max({nColorR, nColorG, nColorB}); |
2578 | |
|
2579 | 0 | float h, s; |
2580 | 0 | rgb_to_hs(pabyR[nSrcIdx], pabyG[nSrcIdx], pabyB[nSrcIdx], |
2581 | 0 | &h, &s); |
2582 | |
|
2583 | 0 | const GByte nTargetValue = static_cast<GByte>( |
2584 | 0 | (nOverlayV * nOverlayA + |
2585 | 0 | nColorValue * (255 - nOverlayA) + 255) / |
2586 | 0 | 256); |
2587 | |
|
2588 | 0 | if (nBand == 1) |
2589 | 0 | { |
2590 | 0 | hsv_to_rgb(h, s, nTargetValue, &pabyDst[nDstOffset], |
2591 | 0 | nullptr, nullptr); |
2592 | 0 | } |
2593 | 0 | else if (nBand == 2) |
2594 | 0 | { |
2595 | 0 | hsv_to_rgb(h, s, nTargetValue, nullptr, |
2596 | 0 | &pabyDst[nDstOffset], nullptr); |
2597 | 0 | } |
2598 | 0 | else |
2599 | 0 | { |
2600 | 0 | CPLAssert(nBand == 3); |
2601 | 0 | hsv_to_rgb(h, s, nTargetValue, nullptr, nullptr, |
2602 | 0 | &pabyDst[nDstOffset]); |
2603 | 0 | } |
2604 | 0 | } |
2605 | 0 | } |
2606 | 0 | } |
2607 | | |
2608 | 0 | return CE_None; |
2609 | 0 | } |
2610 | 0 | else if (m_oBlendDataset.m_ioError) |
2611 | 0 | { |
2612 | 0 | return CE_Failure; |
2613 | 0 | } |
2614 | 0 | else |
2615 | 0 | { |
2616 | 0 | const CPLErr eErr = GDALRasterBand::IRasterIO( |
2617 | 0 | eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize, nBufYSize, |
2618 | 0 | eBufType, nPixelSpace, nLineSpace, psExtraArg); |
2619 | 0 | m_oBlendDataset.m_ioError = eErr != CE_None; |
2620 | 0 | return eErr; |
2621 | 0 | } |
2622 | 0 | } |
2623 | | |
2624 | | } // namespace |
2625 | | |
2626 | | /************************************************************************/ |
2627 | | /* GDALRasterBlendAlgorithm::ValidateGlobal() */ |
2628 | | /************************************************************************/ |
2629 | | |
2630 | | bool GDALRasterBlendAlgorithm::ValidateGlobal() |
2631 | 0 | { |
2632 | 0 | auto poSrcDS = |
2633 | 0 | m_inputDataset.empty() ? nullptr : m_inputDataset[0].GetDatasetRef(); |
2634 | 0 | auto poOverlayDS = m_overlayDataset.GetDatasetRef(); |
2635 | 0 | if (poSrcDS) |
2636 | 0 | { |
2637 | 0 | if (poSrcDS->GetRasterCount() == 0 || poSrcDS->GetRasterCount() > 4 || |
2638 | 0 | poSrcDS->GetRasterBand(1)->GetRasterDataType() != GDT_UInt8) |
2639 | 0 | { |
2640 | 0 | ReportError(CE_Failure, CPLE_NotSupported, |
2641 | 0 | "Only 1-band, 2-band, 3-band or 4-band Byte dataset " |
2642 | 0 | "supported as input"); |
2643 | 0 | return false; |
2644 | 0 | } |
2645 | 0 | } |
2646 | 0 | if (poOverlayDS) |
2647 | 0 | { |
2648 | 0 | if (poOverlayDS->GetRasterCount() == 0 || |
2649 | 0 | poOverlayDS->GetRasterCount() > 4 || |
2650 | 0 | poOverlayDS->GetRasterBand(1)->GetRasterDataType() != GDT_UInt8) |
2651 | 0 | { |
2652 | 0 | ReportError(CE_Failure, CPLE_NotSupported, |
2653 | 0 | "Only 1-band, 2-band, 3-band or 4-band Byte dataset " |
2654 | 0 | "supported as overlay"); |
2655 | 0 | return false; |
2656 | 0 | } |
2657 | 0 | } |
2658 | | |
2659 | 0 | if (poSrcDS && poOverlayDS) |
2660 | 0 | { |
2661 | 0 | if (poSrcDS->GetRasterXSize() != poOverlayDS->GetRasterXSize() || |
2662 | 0 | poSrcDS->GetRasterYSize() != poOverlayDS->GetRasterYSize()) |
2663 | 0 | { |
2664 | 0 | ReportError(CE_Failure, CPLE_IllegalArg, |
2665 | 0 | "Input dataset and overlay dataset must have " |
2666 | 0 | "the same dimensions"); |
2667 | 0 | return false; |
2668 | 0 | } |
2669 | | |
2670 | 0 | if (!BandCountIsCompatibleWithCompositionMode(poSrcDS->GetRasterCount(), |
2671 | 0 | m_operator)) |
2672 | 0 | { |
2673 | 0 | const int minRequiredBands{ |
2674 | 0 | MinBandCountForCompositionMode(m_operator)}; |
2675 | 0 | const int maxRequiredBands{ |
2676 | 0 | MaxBandCountForCompositionMode(m_operator)}; |
2677 | 0 | if (minRequiredBands != maxRequiredBands) |
2678 | 0 | ReportError(CE_Failure, CPLE_IllegalArg, |
2679 | 0 | "Input dataset has %d band(s), but operator %s " |
2680 | 0 | "requires between %d and %d bands", |
2681 | 0 | poSrcDS->GetRasterCount(), |
2682 | 0 | CompositionModeToString(m_operator).c_str(), |
2683 | 0 | minRequiredBands, maxRequiredBands); |
2684 | 0 | else |
2685 | 0 | ReportError(CE_Failure, CPLE_IllegalArg, |
2686 | 0 | "Input dataset has %d band(s), but operator %s " |
2687 | 0 | "requires %d bands", |
2688 | 0 | poSrcDS->GetRasterCount(), |
2689 | 0 | CompositionModeToString(m_operator).c_str(), |
2690 | 0 | minRequiredBands); |
2691 | 0 | return false; |
2692 | 0 | } |
2693 | 0 | } |
2694 | | |
2695 | | // Check that for LIGHTEN and DARKEN, the source dataset and destination dataset |
2696 | | // have the same number of color bands (do not consider alpha) |
2697 | 0 | if (poSrcDS && poOverlayDS && |
2698 | 0 | (m_operator == CompositionMode::LIGHTEN || |
2699 | 0 | m_operator == CompositionMode::DARKEN)) |
2700 | 0 | { |
2701 | 0 | const int nSrcColorBands = |
2702 | 0 | (poSrcDS->GetRasterCount() == 2 || poSrcDS->GetRasterCount() == 4) |
2703 | 0 | ? poSrcDS->GetRasterCount() - 1 |
2704 | 0 | : poSrcDS->GetRasterCount(); |
2705 | 0 | const int nOverlayColorBands = (poOverlayDS->GetRasterCount() == 2 || |
2706 | 0 | poOverlayDS->GetRasterCount() == 4) |
2707 | 0 | ? poOverlayDS->GetRasterCount() - 1 |
2708 | 0 | : poOverlayDS->GetRasterCount(); |
2709 | 0 | if (nSrcColorBands != nOverlayColorBands) |
2710 | 0 | { |
2711 | 0 | ReportError( |
2712 | 0 | CE_Failure, CPLE_IllegalArg, |
2713 | 0 | "For LIGHTEN and DARKEN operators, the source dataset " |
2714 | 0 | "and overlay dataset must have the same number of " |
2715 | 0 | "bands (without considering alpha). They have %d and %d " |
2716 | 0 | "bands respectively", |
2717 | 0 | nSrcColorBands, nOverlayColorBands); |
2718 | 0 | return false; |
2719 | 0 | } |
2720 | 0 | } |
2721 | | |
2722 | 0 | return true; |
2723 | 0 | } |
2724 | | |
2725 | | /************************************************************************/ |
2726 | | /* GDALRasterBlendAlgorithm::RunStep() */ |
2727 | | /************************************************************************/ |
2728 | | |
2729 | | bool GDALRasterBlendAlgorithm::RunStep(GDALPipelineStepRunContext &) |
2730 | 0 | { |
2731 | 0 | auto poSrcDS = m_inputDataset[0].GetDatasetRef(); |
2732 | 0 | CPLAssert(poSrcDS); |
2733 | | |
2734 | 0 | auto poOverlayDS = m_overlayDataset.GetDatasetRef(); |
2735 | 0 | CPLAssert(poOverlayDS); |
2736 | | |
2737 | | // If any of the dataset single band has a color table implicitly convert it to RGBA by calling |
2738 | | // GDALTranslate with -expand RGBA |
2739 | 0 | auto convertToRGBAifNeeded = |
2740 | 0 | [](GDALDataset *&poDS, |
2741 | 0 | std::unique_ptr<GDALDataset, GDALDatasetUniquePtrReleaser> &ds) |
2742 | 0 | -> bool |
2743 | 0 | { |
2744 | 0 | if (poDS->GetRasterCount() == 1 && |
2745 | 0 | poDS->GetRasterBand(1)->GetColorTable() != nullptr) |
2746 | 0 | { |
2747 | 0 | CPLStringList aosOptions; |
2748 | 0 | aosOptions.AddString("-of"); |
2749 | 0 | aosOptions.AddString("VRT"); |
2750 | 0 | aosOptions.AddString("-expand"); |
2751 | 0 | aosOptions.AddString("RGBA"); |
2752 | 0 | GDALTranslateOptions *translateOptions = |
2753 | 0 | GDALTranslateOptionsNew(aosOptions.List(), nullptr); |
2754 | |
|
2755 | 0 | ds.reset(GDALDataset::FromHandle(GDALTranslate( |
2756 | 0 | "", GDALDataset::ToHandle(poDS), translateOptions, nullptr))); |
2757 | |
|
2758 | 0 | GDALTranslateOptionsFree(translateOptions); |
2759 | |
|
2760 | 0 | if (ds != nullptr) |
2761 | 0 | { |
2762 | 0 | poDS = ds.get(); |
2763 | 0 | return true; |
2764 | 0 | } |
2765 | 0 | else |
2766 | 0 | { |
2767 | 0 | return false; |
2768 | 0 | } |
2769 | 0 | } |
2770 | 0 | return true; |
2771 | 0 | }; |
2772 | |
|
2773 | 0 | if (!convertToRGBAifNeeded(poSrcDS, m_poTmpSrcDS)) |
2774 | 0 | { |
2775 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
2776 | 0 | "Conversion of source dataset color table to RGBA failed"); |
2777 | 0 | return false; |
2778 | 0 | } |
2779 | | |
2780 | 0 | if (!convertToRGBAifNeeded(poOverlayDS, m_poTmpOverlayDS)) |
2781 | 0 | { |
2782 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
2783 | 0 | "Conversion of overlay dataset color table to RGBA failed"); |
2784 | 0 | return false; |
2785 | 0 | } |
2786 | | |
2787 | 0 | if (!ValidateGlobal()) |
2788 | 0 | return false; |
2789 | | |
2790 | 0 | const int nOpacity255Scale = |
2791 | 0 | (m_opacity * 255 + OPACITY_INPUT_RANGE / 2) / OPACITY_INPUT_RANGE; |
2792 | |
|
2793 | 0 | bool bSwappedOpacity = false; |
2794 | | // Many algorithms are commutative regarding the two inputs but BlendDataset assume |
2795 | | // RGB(A) is in the source (and not in the overlay). |
2796 | 0 | if ((m_operator == CompositionMode::MULTIPLY || |
2797 | 0 | m_operator == CompositionMode::SCREEN || |
2798 | 0 | m_operator == CompositionMode::HARD_LIGHT || |
2799 | 0 | m_operator == CompositionMode::OVERLAY) && |
2800 | 0 | (poSrcDS->GetRasterCount() < poOverlayDS->GetRasterCount())) |
2801 | 0 | { |
2802 | 0 | bSwappedOpacity = true; |
2803 | 0 | std::swap(poSrcDS, poOverlayDS); |
2804 | 0 | } |
2805 | |
|
2806 | 0 | m_outputDataset.Set(std::make_unique<BlendDataset>( |
2807 | 0 | *poSrcDS, *poOverlayDS, m_operator, nOpacity255Scale, bSwappedOpacity)); |
2808 | |
|
2809 | 0 | return true; |
2810 | 0 | } |
2811 | | |
2812 | 0 | GDALRasterBlendAlgorithmStandalone::~GDALRasterBlendAlgorithmStandalone() = |
2813 | | default; |
2814 | | |
2815 | | //! @endcond |