/src/mozilla-central/gfx/qcms/transform.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* vim: set ts=8 sw=8 noexpandtab: */ |
2 | | // qcms |
3 | | // Copyright (C) 2009 Mozilla Corporation |
4 | | // Copyright (C) 1998-2007 Marti Maria |
5 | | // |
6 | | // Permission is hereby granted, free of charge, to any person obtaining |
7 | | // a copy of this software and associated documentation files (the "Software"), |
8 | | // to deal in the Software without restriction, including without limitation |
9 | | // the rights to use, copy, modify, merge, publish, distribute, sublicense, |
10 | | // and/or sell copies of the Software, and to permit persons to whom the Software |
11 | | // is furnished to do so, subject to the following conditions: |
12 | | // |
13 | | // The above copyright notice and this permission notice shall be included in |
14 | | // all copies or substantial portions of the Software. |
15 | | // |
16 | | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
17 | | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO |
18 | | // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
19 | | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
20 | | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
21 | | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
22 | | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
23 | | |
24 | | #include <stdlib.h> |
25 | | #include <math.h> |
26 | | #include <assert.h> |
27 | | #include <string.h> //memcpy |
28 | | #include "qcmsint.h" |
29 | | #include "chain.h" |
30 | | #include "matrix.h" |
31 | | #include "transform_util.h" |
32 | | |
33 | | /* for MSVC, GCC, Intel, and Sun compilers */ |
34 | | #if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_M_AMD64) || defined(__x86_64__) || defined(__x86_64) |
35 | | #define X86 |
36 | | #endif /* _M_IX86 || __i386__ || __i386 || _M_AMD64 || __x86_64__ || __x86_64 */ |
37 | | |
38 | | /** |
39 | | * AltiVec detection for PowerPC CPUs |
40 | | * In case we have a method of detecting do the runtime detection. |
41 | | * Otherwise statically choose the AltiVec path in case the compiler |
42 | | * was told to build with AltiVec support. |
43 | | */ |
44 | | #if (defined(__POWERPC__) || defined(__powerpc__)) |
45 | | #if defined(__linux__) |
46 | | #include <unistd.h> |
47 | | #include <fcntl.h> |
48 | | #include <stdio.h> |
49 | | #include <elf.h> |
50 | | #include <linux/auxvec.h> |
51 | | #include <asm/cputable.h> |
52 | | #include <link.h> |
53 | | |
54 | | static inline qcms_bool have_altivec() { |
55 | | static int available = -1; |
56 | | int new_avail = 0; |
57 | | ElfW(auxv_t) auxv; |
58 | | ssize_t count; |
59 | | int fd, i; |
60 | | |
61 | | if (available != -1) |
62 | | return (available != 0 ? true : false); |
63 | | |
64 | | fd = open("/proc/self/auxv", O_RDONLY); |
65 | | if (fd < 0) |
66 | | goto out; |
67 | | do { |
68 | | count = read(fd, &auxv, sizeof(auxv)); |
69 | | if (count < 0) |
70 | | goto out_close; |
71 | | |
72 | | if (auxv.a_type == AT_HWCAP) { |
73 | | new_avail = !!(auxv.a_un.a_val & PPC_FEATURE_HAS_ALTIVEC); |
74 | | goto out_close; |
75 | | } |
76 | | } while (auxv.a_type != AT_NULL); |
77 | | |
78 | | out_close: |
79 | | close(fd); |
80 | | out: |
81 | | available = new_avail; |
82 | | return (available != 0 ? true : false); |
83 | | } |
84 | | #elif defined(__APPLE__) && defined(__MACH__) |
85 | | #include <sys/sysctl.h> |
86 | | |
87 | | /** |
88 | | * rip-off from ffmpeg AltiVec detection code. |
89 | | * this code also appears on Apple's AltiVec pages. |
90 | | */ |
91 | | static inline qcms_bool have_altivec() { |
92 | | int sels[2] = {CTL_HW, HW_VECTORUNIT}; |
93 | | static int available = -1; |
94 | | size_t len = sizeof(available); |
95 | | int err; |
96 | | |
97 | | if (available != -1) |
98 | | return (available != 0 ? true : false); |
99 | | |
100 | | err = sysctl(sels, 2, &available, &len, NULL, 0); |
101 | | |
102 | | if (err == 0) |
103 | | if (available != 0) |
104 | | return true; |
105 | | |
106 | | return false; |
107 | | } |
108 | | #elif defined(__ALTIVEC__) || defined(__APPLE_ALTIVEC__) |
109 | | #define have_altivec() true |
110 | | #else |
111 | | #define have_altivec() false |
112 | | #endif |
113 | | #endif // (defined(__POWERPC__) || defined(__powerpc__)) |
114 | | |
115 | | // Build a White point, primary chromas transfer matrix from RGB to CIE XYZ |
116 | | // This is just an approximation, I am not handling all the non-linear |
117 | | // aspects of the RGB to XYZ process, and assumming that the gamma correction |
118 | | // has transitive property in the tranformation chain. |
119 | | // |
120 | | // the alghoritm: |
121 | | // |
122 | | // - First I build the absolute conversion matrix using |
123 | | // primaries in XYZ. This matrix is next inverted |
124 | | // - Then I eval the source white point across this matrix |
125 | | // obtaining the coeficients of the transformation |
126 | | // - Then, I apply these coeficients to the original matrix |
127 | | static struct matrix build_RGB_to_XYZ_transfer_matrix(qcms_CIE_xyY white, qcms_CIE_xyYTRIPLE primrs) |
128 | 0 | { |
129 | 0 | struct matrix primaries; |
130 | 0 | struct matrix primaries_invert; |
131 | 0 | struct matrix result; |
132 | 0 | struct vector white_point; |
133 | 0 | struct vector coefs; |
134 | 0 |
|
135 | 0 | double xn, yn; |
136 | 0 | double xr, yr; |
137 | 0 | double xg, yg; |
138 | 0 | double xb, yb; |
139 | 0 |
|
140 | 0 | xn = white.x; |
141 | 0 | yn = white.y; |
142 | 0 |
|
143 | 0 | if (yn == 0.0) |
144 | 0 | return matrix_invalid(); |
145 | 0 | |
146 | 0 | xr = primrs.red.x; |
147 | 0 | yr = primrs.red.y; |
148 | 0 | xg = primrs.green.x; |
149 | 0 | yg = primrs.green.y; |
150 | 0 | xb = primrs.blue.x; |
151 | 0 | yb = primrs.blue.y; |
152 | 0 |
|
153 | 0 | primaries.m[0][0] = xr; |
154 | 0 | primaries.m[0][1] = xg; |
155 | 0 | primaries.m[0][2] = xb; |
156 | 0 |
|
157 | 0 | primaries.m[1][0] = yr; |
158 | 0 | primaries.m[1][1] = yg; |
159 | 0 | primaries.m[1][2] = yb; |
160 | 0 |
|
161 | 0 | primaries.m[2][0] = 1 - xr - yr; |
162 | 0 | primaries.m[2][1] = 1 - xg - yg; |
163 | 0 | primaries.m[2][2] = 1 - xb - yb; |
164 | 0 | primaries.invalid = false; |
165 | 0 |
|
166 | 0 | white_point.v[0] = xn/yn; |
167 | 0 | white_point.v[1] = 1.; |
168 | 0 | white_point.v[2] = (1.0-xn-yn)/yn; |
169 | 0 |
|
170 | 0 | primaries_invert = matrix_invert(primaries); |
171 | 0 | if (primaries_invert.invalid) { |
172 | 0 | return matrix_invalid(); |
173 | 0 | } |
174 | 0 | |
175 | 0 | coefs = matrix_eval(primaries_invert, white_point); |
176 | 0 |
|
177 | 0 | result.m[0][0] = coefs.v[0]*xr; |
178 | 0 | result.m[0][1] = coefs.v[1]*xg; |
179 | 0 | result.m[0][2] = coefs.v[2]*xb; |
180 | 0 |
|
181 | 0 | result.m[1][0] = coefs.v[0]*yr; |
182 | 0 | result.m[1][1] = coefs.v[1]*yg; |
183 | 0 | result.m[1][2] = coefs.v[2]*yb; |
184 | 0 |
|
185 | 0 | result.m[2][0] = coefs.v[0]*(1.-xr-yr); |
186 | 0 | result.m[2][1] = coefs.v[1]*(1.-xg-yg); |
187 | 0 | result.m[2][2] = coefs.v[2]*(1.-xb-yb); |
188 | 0 | result.invalid = primaries_invert.invalid; |
189 | 0 |
|
190 | 0 | return result; |
191 | 0 | } |
192 | | |
193 | | struct CIE_XYZ { |
194 | | double X; |
195 | | double Y; |
196 | | double Z; |
197 | | }; |
198 | | |
199 | | /* CIE Illuminant D50 */ |
200 | | static const struct CIE_XYZ D50_XYZ = { |
201 | | 0.9642, |
202 | | 1.0000, |
203 | | 0.8249 |
204 | | }; |
205 | | |
206 | | /* from lcms: xyY2XYZ() |
207 | | * corresponds to argyll: icmYxy2XYZ() */ |
208 | | static struct CIE_XYZ xyY2XYZ(qcms_CIE_xyY source) |
209 | 0 | { |
210 | 0 | struct CIE_XYZ dest; |
211 | 0 | dest.X = (source.x / source.y) * source.Y; |
212 | 0 | dest.Y = source.Y; |
213 | 0 | dest.Z = ((1 - source.x - source.y) / source.y) * source.Y; |
214 | 0 | return dest; |
215 | 0 | } |
216 | | |
217 | | /* from lcms: ComputeChromaticAdaption */ |
218 | | // Compute chromatic adaption matrix using chad as cone matrix |
219 | | static struct matrix |
220 | | compute_chromatic_adaption(struct CIE_XYZ source_white_point, |
221 | | struct CIE_XYZ dest_white_point, |
222 | | struct matrix chad) |
223 | 0 | { |
224 | 0 | struct matrix chad_inv; |
225 | 0 | struct vector cone_source_XYZ, cone_source_rgb; |
226 | 0 | struct vector cone_dest_XYZ, cone_dest_rgb; |
227 | 0 | struct matrix cone, tmp; |
228 | 0 |
|
229 | 0 | tmp = chad; |
230 | 0 | chad_inv = matrix_invert(tmp); |
231 | 0 | if (chad_inv.invalid) { |
232 | 0 | return matrix_invalid(); |
233 | 0 | } |
234 | 0 | |
235 | 0 | cone_source_XYZ.v[0] = source_white_point.X; |
236 | 0 | cone_source_XYZ.v[1] = source_white_point.Y; |
237 | 0 | cone_source_XYZ.v[2] = source_white_point.Z; |
238 | 0 |
|
239 | 0 | cone_dest_XYZ.v[0] = dest_white_point.X; |
240 | 0 | cone_dest_XYZ.v[1] = dest_white_point.Y; |
241 | 0 | cone_dest_XYZ.v[2] = dest_white_point.Z; |
242 | 0 |
|
243 | 0 | cone_source_rgb = matrix_eval(chad, cone_source_XYZ); |
244 | 0 | cone_dest_rgb = matrix_eval(chad, cone_dest_XYZ); |
245 | 0 |
|
246 | 0 | cone.m[0][0] = cone_dest_rgb.v[0]/cone_source_rgb.v[0]; |
247 | 0 | cone.m[0][1] = 0; |
248 | 0 | cone.m[0][2] = 0; |
249 | 0 | cone.m[1][0] = 0; |
250 | 0 | cone.m[1][1] = cone_dest_rgb.v[1]/cone_source_rgb.v[1]; |
251 | 0 | cone.m[1][2] = 0; |
252 | 0 | cone.m[2][0] = 0; |
253 | 0 | cone.m[2][1] = 0; |
254 | 0 | cone.m[2][2] = cone_dest_rgb.v[2]/cone_source_rgb.v[2]; |
255 | 0 | cone.invalid = false; |
256 | 0 |
|
257 | 0 | // Normalize |
258 | 0 | return matrix_multiply(chad_inv, matrix_multiply(cone, chad)); |
259 | 0 | } |
260 | | |
261 | | /* from lcms: cmsAdaptionMatrix */ |
262 | | // Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll |
263 | | // Bradford is assumed |
264 | | static struct matrix |
265 | | adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumination) |
266 | 0 | { |
267 | 0 | struct matrix lam_rigg = {{ // Bradford matrix |
268 | 0 | { 0.8951f, 0.2664f, -0.1614f }, |
269 | 0 | { -0.7502f, 1.7135f, 0.0367f }, |
270 | 0 | { 0.0389f, -0.0685f, 1.0296f } |
271 | 0 | }}; |
272 | 0 | return compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg); |
273 | 0 | } |
274 | | |
275 | | /* from lcms: cmsAdaptMatrixToD50 */ |
276 | | static struct matrix adapt_matrix_to_D50(struct matrix r, qcms_CIE_xyY source_white_pt) |
277 | 0 | { |
278 | 0 | struct CIE_XYZ Dn; |
279 | 0 | struct matrix Bradford; |
280 | 0 |
|
281 | 0 | if (source_white_pt.y == 0.0) { |
282 | 0 | return matrix_invalid(); |
283 | 0 | } |
284 | 0 | |
285 | 0 | Dn = xyY2XYZ(source_white_pt); |
286 | 0 |
|
287 | 0 | Bradford = adaption_matrix(Dn, D50_XYZ); |
288 | 0 | if (Bradford.invalid) { |
289 | 0 | return matrix_invalid(); |
290 | 0 | } |
291 | 0 | return matrix_multiply(Bradford, r); |
292 | 0 | } |
293 | | |
294 | | qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries) |
295 | 0 | { |
296 | 0 | struct matrix colorants; |
297 | 0 | colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries); |
298 | 0 | colorants = adapt_matrix_to_D50(colorants, white_point); |
299 | 0 |
|
300 | 0 | if (colorants.invalid) |
301 | 0 | return false; |
302 | 0 | |
303 | 0 | /* note: there's a transpose type of operation going on here */ |
304 | 0 | profile->redColorant.X = double_to_s15Fixed16Number(colorants.m[0][0]); |
305 | 0 | profile->redColorant.Y = double_to_s15Fixed16Number(colorants.m[1][0]); |
306 | 0 | profile->redColorant.Z = double_to_s15Fixed16Number(colorants.m[2][0]); |
307 | 0 |
|
308 | 0 | profile->greenColorant.X = double_to_s15Fixed16Number(colorants.m[0][1]); |
309 | 0 | profile->greenColorant.Y = double_to_s15Fixed16Number(colorants.m[1][1]); |
310 | 0 | profile->greenColorant.Z = double_to_s15Fixed16Number(colorants.m[2][1]); |
311 | 0 |
|
312 | 0 | profile->blueColorant.X = double_to_s15Fixed16Number(colorants.m[0][2]); |
313 | 0 | profile->blueColorant.Y = double_to_s15Fixed16Number(colorants.m[1][2]); |
314 | 0 | profile->blueColorant.Z = double_to_s15Fixed16Number(colorants.m[2][2]); |
315 | 0 |
|
316 | 0 | return true; |
317 | 0 | } |
318 | | |
319 | | qcms_bool get_rgb_colorants(struct matrix *colorants, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries) |
320 | 0 | { |
321 | 0 | *colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries); |
322 | 0 | *colorants = adapt_matrix_to_D50(*colorants, white_point); |
323 | 0 |
|
324 | 0 | return (colorants->invalid ? true : false); |
325 | 0 | } |
326 | | |
327 | | #if 0 |
328 | | static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
329 | | { |
330 | | int i; |
331 | | float (*mat)[4] = transform->matrix; |
332 | | for (i=0; i<length; i++) { |
333 | | unsigned char device_r = *src++; |
334 | | unsigned char device_g = *src++; |
335 | | unsigned char device_b = *src++; |
336 | | |
337 | | float linear_r = transform->input_gamma_table_r[device_r]; |
338 | | float linear_g = transform->input_gamma_table_g[device_g]; |
339 | | float linear_b = transform->input_gamma_table_b[device_b]; |
340 | | |
341 | | float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; |
342 | | float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; |
343 | | float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; |
344 | | |
345 | | float out_device_r = pow(out_linear_r, transform->out_gamma_r); |
346 | | float out_device_g = pow(out_linear_g, transform->out_gamma_g); |
347 | | float out_device_b = pow(out_linear_b, transform->out_gamma_b); |
348 | | |
349 | | dest[OUTPUT_R_INDEX] = clamp_u8(255*out_device_r); |
350 | | dest[OUTPUT_G_INDEX] = clamp_u8(255*out_device_g); |
351 | | dest[OUTPUT_B_INDEX] = clamp_u8(255*out_device_b); |
352 | | dest += RGB_OUTPUT_COMPONENTS; |
353 | | } |
354 | | } |
355 | | #endif |
356 | | |
357 | | static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
358 | 0 | { |
359 | 0 | unsigned int i; |
360 | 0 | for (i = 0; i < length; i++) { |
361 | 0 | float out_device_r, out_device_g, out_device_b; |
362 | 0 | unsigned char device = *src++; |
363 | 0 |
|
364 | 0 | float linear = transform->input_gamma_table_gray[device]; |
365 | 0 |
|
366 | 0 | out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length); |
367 | 0 | out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length); |
368 | 0 | out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length); |
369 | 0 |
|
370 | 0 | dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255); |
371 | 0 | dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255); |
372 | 0 | dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255); |
373 | 0 | dest += RGB_OUTPUT_COMPONENTS; |
374 | 0 | } |
375 | 0 | } |
376 | | |
377 | | /* Alpha is not corrected. |
378 | | A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If |
379 | | RGB Is?" Tech Memo 17 (December 14, 1998). |
380 | | See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf |
381 | | */ |
382 | | |
383 | | static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
384 | 0 | { |
385 | 0 | unsigned int i; |
386 | 0 | for (i = 0; i < length; i++) { |
387 | 0 | float out_device_r, out_device_g, out_device_b; |
388 | 0 | unsigned char device = *src++; |
389 | 0 | unsigned char alpha = *src++; |
390 | 0 |
|
391 | 0 | float linear = transform->input_gamma_table_gray[device]; |
392 | 0 |
|
393 | 0 | out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length); |
394 | 0 | out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length); |
395 | 0 | out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length); |
396 | 0 |
|
397 | 0 | dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255); |
398 | 0 | dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255); |
399 | 0 | dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255); |
400 | 0 | dest[OUTPUT_A_INDEX] = alpha; |
401 | 0 | dest += RGBA_OUTPUT_COMPONENTS; |
402 | 0 | } |
403 | 0 | } |
404 | | |
405 | | |
406 | | static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
407 | 0 | { |
408 | 0 | unsigned int i; |
409 | 0 | for (i = 0; i < length; i++) { |
410 | 0 | unsigned char device = *src++; |
411 | 0 | uint16_t gray; |
412 | 0 |
|
413 | 0 | float linear = transform->input_gamma_table_gray[device]; |
414 | 0 |
|
415 | 0 | /* we could round here... */ |
416 | 0 | gray = linear * PRECACHE_OUTPUT_MAX; |
417 | 0 |
|
418 | 0 | dest[OUTPUT_R_INDEX] = transform->output_table_r->data[gray]; |
419 | 0 | dest[OUTPUT_G_INDEX] = transform->output_table_g->data[gray]; |
420 | 0 | dest[OUTPUT_B_INDEX] = transform->output_table_b->data[gray]; |
421 | 0 | dest += RGB_OUTPUT_COMPONENTS; |
422 | 0 | } |
423 | 0 | } |
424 | | |
425 | | static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
426 | 0 | { |
427 | 0 | unsigned int i; |
428 | 0 | for (i = 0; i < length; i++) { |
429 | 0 | unsigned char device = *src++; |
430 | 0 | unsigned char alpha = *src++; |
431 | 0 | uint16_t gray; |
432 | 0 |
|
433 | 0 | float linear = transform->input_gamma_table_gray[device]; |
434 | 0 |
|
435 | 0 | /* we could round here... */ |
436 | 0 | gray = linear * PRECACHE_OUTPUT_MAX; |
437 | 0 |
|
438 | 0 | dest[OUTPUT_R_INDEX] = transform->output_table_r->data[gray]; |
439 | 0 | dest[OUTPUT_G_INDEX] = transform->output_table_g->data[gray]; |
440 | 0 | dest[OUTPUT_B_INDEX] = transform->output_table_b->data[gray]; |
441 | 0 | dest[OUTPUT_A_INDEX] = alpha; |
442 | 0 | dest += RGBA_OUTPUT_COMPONENTS; |
443 | 0 | } |
444 | 0 | } |
445 | | |
446 | | static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
447 | 0 | { |
448 | 0 | unsigned int i; |
449 | 0 | float (*mat)[4] = transform->matrix; |
450 | 0 | for (i = 0; i < length; i++) { |
451 | 0 | unsigned char device_r = *src++; |
452 | 0 | unsigned char device_g = *src++; |
453 | 0 | unsigned char device_b = *src++; |
454 | 0 | uint16_t r, g, b; |
455 | 0 |
|
456 | 0 | float linear_r = transform->input_gamma_table_r[device_r]; |
457 | 0 | float linear_g = transform->input_gamma_table_g[device_g]; |
458 | 0 | float linear_b = transform->input_gamma_table_b[device_b]; |
459 | 0 |
|
460 | 0 | float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; |
461 | 0 | float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; |
462 | 0 | float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; |
463 | 0 |
|
464 | 0 | out_linear_r = clamp_float(out_linear_r); |
465 | 0 | out_linear_g = clamp_float(out_linear_g); |
466 | 0 | out_linear_b = clamp_float(out_linear_b); |
467 | 0 |
|
468 | 0 | /* we could round here... */ |
469 | 0 | r = out_linear_r * PRECACHE_OUTPUT_MAX; |
470 | 0 | g = out_linear_g * PRECACHE_OUTPUT_MAX; |
471 | 0 | b = out_linear_b * PRECACHE_OUTPUT_MAX; |
472 | 0 |
|
473 | 0 | dest[OUTPUT_R_INDEX] = transform->output_table_r->data[r]; |
474 | 0 | dest[OUTPUT_G_INDEX] = transform->output_table_g->data[g]; |
475 | 0 | dest[OUTPUT_B_INDEX] = transform->output_table_b->data[b]; |
476 | 0 | dest += RGB_OUTPUT_COMPONENTS; |
477 | 0 | } |
478 | 0 | } |
479 | | |
480 | | static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
481 | 0 | { |
482 | 0 | unsigned int i; |
483 | 0 | float (*mat)[4] = transform->matrix; |
484 | 0 | for (i = 0; i < length; i++) { |
485 | 0 | unsigned char device_r = *src++; |
486 | 0 | unsigned char device_g = *src++; |
487 | 0 | unsigned char device_b = *src++; |
488 | 0 | unsigned char alpha = *src++; |
489 | 0 | uint16_t r, g, b; |
490 | 0 |
|
491 | 0 | float linear_r = transform->input_gamma_table_r[device_r]; |
492 | 0 | float linear_g = transform->input_gamma_table_g[device_g]; |
493 | 0 | float linear_b = transform->input_gamma_table_b[device_b]; |
494 | 0 |
|
495 | 0 | float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; |
496 | 0 | float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; |
497 | 0 | float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; |
498 | 0 |
|
499 | 0 | out_linear_r = clamp_float(out_linear_r); |
500 | 0 | out_linear_g = clamp_float(out_linear_g); |
501 | 0 | out_linear_b = clamp_float(out_linear_b); |
502 | 0 |
|
503 | 0 | /* we could round here... */ |
504 | 0 | r = out_linear_r * PRECACHE_OUTPUT_MAX; |
505 | 0 | g = out_linear_g * PRECACHE_OUTPUT_MAX; |
506 | 0 | b = out_linear_b * PRECACHE_OUTPUT_MAX; |
507 | 0 |
|
508 | 0 | dest[OUTPUT_R_INDEX] = transform->output_table_r->data[r]; |
509 | 0 | dest[OUTPUT_G_INDEX] = transform->output_table_g->data[g]; |
510 | 0 | dest[OUTPUT_B_INDEX] = transform->output_table_b->data[b]; |
511 | 0 | dest[OUTPUT_A_INDEX] = alpha; |
512 | 0 | dest += RGBA_OUTPUT_COMPONENTS; |
513 | 0 | } |
514 | 0 | } |
515 | | |
516 | | // Not used |
517 | | /* |
518 | | static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) { |
519 | | unsigned int i; |
520 | | int xy_len = 1; |
521 | | int x_len = transform->grid_size; |
522 | | int len = x_len * x_len; |
523 | | float* r_table = transform->r_clut; |
524 | | float* g_table = transform->g_clut; |
525 | | float* b_table = transform->b_clut; |
526 | | |
527 | | for (i = 0; i < length; i++) { |
528 | | unsigned char in_r = *src++; |
529 | | unsigned char in_g = *src++; |
530 | | unsigned char in_b = *src++; |
531 | | float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f; |
532 | | |
533 | | int x = floorf(linear_r * (transform->grid_size-1)); |
534 | | int y = floorf(linear_g * (transform->grid_size-1)); |
535 | | int z = floorf(linear_b * (transform->grid_size-1)); |
536 | | int x_n = ceilf(linear_r * (transform->grid_size-1)); |
537 | | int y_n = ceilf(linear_g * (transform->grid_size-1)); |
538 | | int z_n = ceilf(linear_b * (transform->grid_size-1)); |
539 | | float x_d = linear_r * (transform->grid_size-1) - x; |
540 | | float y_d = linear_g * (transform->grid_size-1) - y; |
541 | | float z_d = linear_b * (transform->grid_size-1) - z; |
542 | | |
543 | | float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d); |
544 | | float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d); |
545 | | float r_y1 = lerp(r_x1, r_x2, y_d); |
546 | | float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d); |
547 | | float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_n), x_d); |
548 | | float r_y2 = lerp(r_x3, r_x4, y_d); |
549 | | float clut_r = lerp(r_y1, r_y2, z_d); |
550 | | |
551 | | float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d); |
552 | | float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d); |
553 | | float g_y1 = lerp(g_x1, g_x2, y_d); |
554 | | float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d); |
555 | | float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_n), x_d); |
556 | | float g_y2 = lerp(g_x3, g_x4, y_d); |
557 | | float clut_g = lerp(g_y1, g_y2, z_d); |
558 | | |
559 | | float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d); |
560 | | float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d); |
561 | | float b_y1 = lerp(b_x1, b_x2, y_d); |
562 | | float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d); |
563 | | float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_n), x_d); |
564 | | float b_y2 = lerp(b_x3, b_x4, y_d); |
565 | | float clut_b = lerp(b_y1, b_y2, z_d); |
566 | | |
567 | | *dest++ = clamp_u8(clut_r*255.0f); |
568 | | *dest++ = clamp_u8(clut_g*255.0f); |
569 | | *dest++ = clamp_u8(clut_b*255.0f); |
570 | | } |
571 | | } |
572 | | */ |
573 | | |
574 | 0 | static int int_div_ceil(int value, int div) { |
575 | 0 | return ((value + div - 1) / div); |
576 | 0 | } |
577 | | |
578 | | // Using lcms' tetra interpolation algorithm. |
579 | 0 | static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) { |
580 | 0 | unsigned int i; |
581 | 0 | int xy_len = 1; |
582 | 0 | int x_len = transform->grid_size; |
583 | 0 | int len = x_len * x_len; |
584 | 0 | float* r_table = transform->r_clut; |
585 | 0 | float* g_table = transform->g_clut; |
586 | 0 | float* b_table = transform->b_clut; |
587 | 0 | float c0_r, c1_r, c2_r, c3_r; |
588 | 0 | float c0_g, c1_g, c2_g, c3_g; |
589 | 0 | float c0_b, c1_b, c2_b, c3_b; |
590 | 0 | float clut_r, clut_g, clut_b; |
591 | 0 | for (i = 0; i < length; i++) { |
592 | 0 | unsigned char in_r = *src++; |
593 | 0 | unsigned char in_g = *src++; |
594 | 0 | unsigned char in_b = *src++; |
595 | 0 | unsigned char in_a = *src++; |
596 | 0 | float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f; |
597 | 0 |
|
598 | 0 | int x = in_r * (transform->grid_size-1) / 255; |
599 | 0 | int y = in_g * (transform->grid_size-1) / 255; |
600 | 0 | int z = in_b * (transform->grid_size-1) / 255; |
601 | 0 | int x_n = int_div_ceil(in_r * (transform->grid_size-1), 255); |
602 | 0 | int y_n = int_div_ceil(in_g * (transform->grid_size-1), 255); |
603 | 0 | int z_n = int_div_ceil(in_b * (transform->grid_size-1), 255); |
604 | 0 | float rx = linear_r * (transform->grid_size-1) - x; |
605 | 0 | float ry = linear_g * (transform->grid_size-1) - y; |
606 | 0 | float rz = linear_b * (transform->grid_size-1) - z; |
607 | 0 |
|
608 | 0 | c0_r = CLU(r_table, x, y, z); |
609 | 0 | c0_g = CLU(g_table, x, y, z); |
610 | 0 | c0_b = CLU(b_table, x, y, z); |
611 | 0 |
|
612 | 0 | if( rx >= ry ) { |
613 | 0 | if (ry >= rz) { //rx >= ry && ry >= rz |
614 | 0 | c1_r = CLU(r_table, x_n, y, z) - c0_r; |
615 | 0 | c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z); |
616 | 0 | c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); |
617 | 0 | c1_g = CLU(g_table, x_n, y, z) - c0_g; |
618 | 0 | c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z); |
619 | 0 | c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); |
620 | 0 | c1_b = CLU(b_table, x_n, y, z) - c0_b; |
621 | 0 | c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z); |
622 | 0 | c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); |
623 | 0 | } else { |
624 | 0 | if (rx >= rz) { //rx >= rz && rz >= ry |
625 | 0 | c1_r = CLU(r_table, x_n, y, z) - c0_r; |
626 | 0 | c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); |
627 | 0 | c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z); |
628 | 0 | c1_g = CLU(g_table, x_n, y, z) - c0_g; |
629 | 0 | c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); |
630 | 0 | c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z); |
631 | 0 | c1_b = CLU(b_table, x_n, y, z) - c0_b; |
632 | 0 | c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); |
633 | 0 | c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z); |
634 | 0 | } else { //rz > rx && rx >= ry |
635 | 0 | c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n); |
636 | 0 | c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); |
637 | 0 | c3_r = CLU(r_table, x, y, z_n) - c0_r; |
638 | 0 | c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n); |
639 | 0 | c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); |
640 | 0 | c3_g = CLU(g_table, x, y, z_n) - c0_g; |
641 | 0 | c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n); |
642 | 0 | c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); |
643 | 0 | c3_b = CLU(b_table, x, y, z_n) - c0_b; |
644 | 0 | } |
645 | 0 | } |
646 | 0 | } else { |
647 | 0 | if (rx >= rz) { //ry > rx && rx >= rz |
648 | 0 | c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z); |
649 | 0 | c2_r = CLU(r_table, x, y_n, z) - c0_r; |
650 | 0 | c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); |
651 | 0 | c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z); |
652 | 0 | c2_g = CLU(g_table, x, y_n, z) - c0_g; |
653 | 0 | c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); |
654 | 0 | c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z); |
655 | 0 | c2_b = CLU(b_table, x, y_n, z) - c0_b; |
656 | 0 | c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); |
657 | 0 | } else { |
658 | 0 | if (ry >= rz) { //ry >= rz && rz > rx |
659 | 0 | c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); |
660 | 0 | c2_r = CLU(r_table, x, y_n, z) - c0_r; |
661 | 0 | c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z); |
662 | 0 | c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); |
663 | 0 | c2_g = CLU(g_table, x, y_n, z) - c0_g; |
664 | 0 | c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z); |
665 | 0 | c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); |
666 | 0 | c2_b = CLU(b_table, x, y_n, z) - c0_b; |
667 | 0 | c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z); |
668 | 0 | } else { //rz > ry && ry > rx |
669 | 0 | c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); |
670 | 0 | c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n); |
671 | 0 | c3_r = CLU(r_table, x, y, z_n) - c0_r; |
672 | 0 | c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); |
673 | 0 | c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n); |
674 | 0 | c3_g = CLU(g_table, x, y, z_n) - c0_g; |
675 | 0 | c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); |
676 | 0 | c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n); |
677 | 0 | c3_b = CLU(b_table, x, y, z_n) - c0_b; |
678 | 0 | } |
679 | 0 | } |
680 | 0 | } |
681 | 0 | |
682 | 0 | clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz; |
683 | 0 | clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz; |
684 | 0 | clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz; |
685 | 0 |
|
686 | 0 | dest[OUTPUT_R_INDEX] = clamp_u8(clut_r*255.0f); |
687 | 0 | dest[OUTPUT_G_INDEX] = clamp_u8(clut_g*255.0f); |
688 | 0 | dest[OUTPUT_B_INDEX] = clamp_u8(clut_b*255.0f); |
689 | 0 | dest[OUTPUT_A_INDEX] = in_a; |
690 | 0 | dest += RGBA_OUTPUT_COMPONENTS; |
691 | 0 | } |
692 | 0 | } |
693 | | |
694 | | // Using lcms' tetra interpolation code. |
695 | 0 | static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) { |
696 | 0 | unsigned int i; |
697 | 0 | int xy_len = 1; |
698 | 0 | int x_len = transform->grid_size; |
699 | 0 | int len = x_len * x_len; |
700 | 0 | float* r_table = transform->r_clut; |
701 | 0 | float* g_table = transform->g_clut; |
702 | 0 | float* b_table = transform->b_clut; |
703 | 0 | float c0_r, c1_r, c2_r, c3_r; |
704 | 0 | float c0_g, c1_g, c2_g, c3_g; |
705 | 0 | float c0_b, c1_b, c2_b, c3_b; |
706 | 0 | float clut_r, clut_g, clut_b; |
707 | 0 | for (i = 0; i < length; i++) { |
708 | 0 | unsigned char in_r = *src++; |
709 | 0 | unsigned char in_g = *src++; |
710 | 0 | unsigned char in_b = *src++; |
711 | 0 | float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f; |
712 | 0 |
|
713 | 0 | int x = in_r * (transform->grid_size-1) / 255; |
714 | 0 | int y = in_g * (transform->grid_size-1) / 255; |
715 | 0 | int z = in_b * (transform->grid_size-1) / 255; |
716 | 0 | int x_n = int_div_ceil(in_r * (transform->grid_size-1), 255); |
717 | 0 | int y_n = int_div_ceil(in_g * (transform->grid_size-1), 255); |
718 | 0 | int z_n = int_div_ceil(in_b * (transform->grid_size-1), 255); |
719 | 0 | float rx = linear_r * (transform->grid_size-1) - x; |
720 | 0 | float ry = linear_g * (transform->grid_size-1) - y; |
721 | 0 | float rz = linear_b * (transform->grid_size-1) - z; |
722 | 0 |
|
723 | 0 | c0_r = CLU(r_table, x, y, z); |
724 | 0 | c0_g = CLU(g_table, x, y, z); |
725 | 0 | c0_b = CLU(b_table, x, y, z); |
726 | 0 |
|
727 | 0 | if( rx >= ry ) { |
728 | 0 | if (ry >= rz) { //rx >= ry && ry >= rz |
729 | 0 | c1_r = CLU(r_table, x_n, y, z) - c0_r; |
730 | 0 | c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z); |
731 | 0 | c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); |
732 | 0 | c1_g = CLU(g_table, x_n, y, z) - c0_g; |
733 | 0 | c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z); |
734 | 0 | c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); |
735 | 0 | c1_b = CLU(b_table, x_n, y, z) - c0_b; |
736 | 0 | c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z); |
737 | 0 | c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); |
738 | 0 | } else { |
739 | 0 | if (rx >= rz) { //rx >= rz && rz >= ry |
740 | 0 | c1_r = CLU(r_table, x_n, y, z) - c0_r; |
741 | 0 | c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); |
742 | 0 | c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z); |
743 | 0 | c1_g = CLU(g_table, x_n, y, z) - c0_g; |
744 | 0 | c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); |
745 | 0 | c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z); |
746 | 0 | c1_b = CLU(b_table, x_n, y, z) - c0_b; |
747 | 0 | c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); |
748 | 0 | c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z); |
749 | 0 | } else { //rz > rx && rx >= ry |
750 | 0 | c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n); |
751 | 0 | c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); |
752 | 0 | c3_r = CLU(r_table, x, y, z_n) - c0_r; |
753 | 0 | c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n); |
754 | 0 | c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); |
755 | 0 | c3_g = CLU(g_table, x, y, z_n) - c0_g; |
756 | 0 | c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n); |
757 | 0 | c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); |
758 | 0 | c3_b = CLU(b_table, x, y, z_n) - c0_b; |
759 | 0 | } |
760 | 0 | } |
761 | 0 | } else { |
762 | 0 | if (rx >= rz) { //ry > rx && rx >= rz |
763 | 0 | c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z); |
764 | 0 | c2_r = CLU(r_table, x, y_n, z) - c0_r; |
765 | 0 | c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); |
766 | 0 | c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z); |
767 | 0 | c2_g = CLU(g_table, x, y_n, z) - c0_g; |
768 | 0 | c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); |
769 | 0 | c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z); |
770 | 0 | c2_b = CLU(b_table, x, y_n, z) - c0_b; |
771 | 0 | c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); |
772 | 0 | } else { |
773 | 0 | if (ry >= rz) { //ry >= rz && rz > rx |
774 | 0 | c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); |
775 | 0 | c2_r = CLU(r_table, x, y_n, z) - c0_r; |
776 | 0 | c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z); |
777 | 0 | c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); |
778 | 0 | c2_g = CLU(g_table, x, y_n, z) - c0_g; |
779 | 0 | c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z); |
780 | 0 | c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); |
781 | 0 | c2_b = CLU(b_table, x, y_n, z) - c0_b; |
782 | 0 | c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z); |
783 | 0 | } else { //rz > ry && ry > rx |
784 | 0 | c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); |
785 | 0 | c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n); |
786 | 0 | c3_r = CLU(r_table, x, y, z_n) - c0_r; |
787 | 0 | c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); |
788 | 0 | c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n); |
789 | 0 | c3_g = CLU(g_table, x, y, z_n) - c0_g; |
790 | 0 | c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); |
791 | 0 | c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n); |
792 | 0 | c3_b = CLU(b_table, x, y, z_n) - c0_b; |
793 | 0 | } |
794 | 0 | } |
795 | 0 | } |
796 | 0 | |
797 | 0 | clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz; |
798 | 0 | clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz; |
799 | 0 | clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz; |
800 | 0 |
|
801 | 0 | dest[OUTPUT_R_INDEX] = clamp_u8(clut_r*255.0f); |
802 | 0 | dest[OUTPUT_G_INDEX] = clamp_u8(clut_g*255.0f); |
803 | 0 | dest[OUTPUT_B_INDEX] = clamp_u8(clut_b*255.0f); |
804 | 0 | dest += RGB_OUTPUT_COMPONENTS; |
805 | 0 | } |
806 | 0 | } |
807 | | |
808 | | static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
809 | 0 | { |
810 | 0 | unsigned int i; |
811 | 0 | float (*mat)[4] = transform->matrix; |
812 | 0 | for (i = 0; i < length; i++) { |
813 | 0 | unsigned char device_r = *src++; |
814 | 0 | unsigned char device_g = *src++; |
815 | 0 | unsigned char device_b = *src++; |
816 | 0 | float out_device_r, out_device_g, out_device_b; |
817 | 0 |
|
818 | 0 | float linear_r = transform->input_gamma_table_r[device_r]; |
819 | 0 | float linear_g = transform->input_gamma_table_g[device_g]; |
820 | 0 | float linear_b = transform->input_gamma_table_b[device_b]; |
821 | 0 |
|
822 | 0 | float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; |
823 | 0 | float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; |
824 | 0 | float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; |
825 | 0 |
|
826 | 0 | out_linear_r = clamp_float(out_linear_r); |
827 | 0 | out_linear_g = clamp_float(out_linear_g); |
828 | 0 | out_linear_b = clamp_float(out_linear_b); |
829 | 0 |
|
830 | 0 | out_device_r = lut_interp_linear(out_linear_r, |
831 | 0 | transform->output_gamma_lut_r, transform->output_gamma_lut_r_length); |
832 | 0 | out_device_g = lut_interp_linear(out_linear_g, |
833 | 0 | transform->output_gamma_lut_g, transform->output_gamma_lut_g_length); |
834 | 0 | out_device_b = lut_interp_linear(out_linear_b, |
835 | 0 | transform->output_gamma_lut_b, transform->output_gamma_lut_b_length); |
836 | 0 |
|
837 | 0 | dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255); |
838 | 0 | dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255); |
839 | 0 | dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255); |
840 | 0 | dest += RGB_OUTPUT_COMPONENTS; |
841 | 0 | } |
842 | 0 | } |
843 | | |
844 | | static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
845 | 0 | { |
846 | 0 | unsigned int i; |
847 | 0 | float (*mat)[4] = transform->matrix; |
848 | 0 | for (i = 0; i < length; i++) { |
849 | 0 | unsigned char device_r = *src++; |
850 | 0 | unsigned char device_g = *src++; |
851 | 0 | unsigned char device_b = *src++; |
852 | 0 | unsigned char alpha = *src++; |
853 | 0 | float out_device_r, out_device_g, out_device_b; |
854 | 0 |
|
855 | 0 | float linear_r = transform->input_gamma_table_r[device_r]; |
856 | 0 | float linear_g = transform->input_gamma_table_g[device_g]; |
857 | 0 | float linear_b = transform->input_gamma_table_b[device_b]; |
858 | 0 |
|
859 | 0 | float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; |
860 | 0 | float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; |
861 | 0 | float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; |
862 | 0 |
|
863 | 0 | out_linear_r = clamp_float(out_linear_r); |
864 | 0 | out_linear_g = clamp_float(out_linear_g); |
865 | 0 | out_linear_b = clamp_float(out_linear_b); |
866 | 0 |
|
867 | 0 | out_device_r = lut_interp_linear(out_linear_r, |
868 | 0 | transform->output_gamma_lut_r, transform->output_gamma_lut_r_length); |
869 | 0 | out_device_g = lut_interp_linear(out_linear_g, |
870 | 0 | transform->output_gamma_lut_g, transform->output_gamma_lut_g_length); |
871 | 0 | out_device_b = lut_interp_linear(out_linear_b, |
872 | 0 | transform->output_gamma_lut_b, transform->output_gamma_lut_b_length); |
873 | 0 |
|
874 | 0 | dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255); |
875 | 0 | dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255); |
876 | 0 | dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255); |
877 | 0 | dest[OUTPUT_A_INDEX] = alpha; |
878 | 0 | dest += RGBA_OUTPUT_COMPONENTS; |
879 | 0 | } |
880 | 0 | } |
881 | | |
882 | | #if 0 |
883 | | static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
884 | | { |
885 | | int i; |
886 | | float (*mat)[4] = transform->matrix; |
887 | | for (i = 0; i < length; i++) { |
888 | | unsigned char device_r = *src++; |
889 | | unsigned char device_g = *src++; |
890 | | unsigned char device_b = *src++; |
891 | | |
892 | | float linear_r = transform->input_gamma_table_r[device_r]; |
893 | | float linear_g = transform->input_gamma_table_g[device_g]; |
894 | | float linear_b = transform->input_gamma_table_b[device_b]; |
895 | | |
896 | | float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; |
897 | | float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; |
898 | | float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; |
899 | | |
900 | | *dest++ = clamp_u8(out_linear_r*255); |
901 | | *dest++ = clamp_u8(out_linear_g*255); |
902 | | *dest++ = clamp_u8(out_linear_b*255); |
903 | | } |
904 | | } |
905 | | #endif |
906 | | |
907 | | /* |
908 | | * If users create and destroy objects on different threads, even if the same |
909 | | * objects aren't used on different threads at the same time, we can still run |
910 | | * in to trouble with refcounts if they aren't atomic. |
911 | | * |
912 | | * This can lead to us prematurely deleting the precache if threads get unlucky |
913 | | * and write the wrong value to the ref count. |
914 | | */ |
915 | | static struct precache_output *precache_reference(struct precache_output *p) |
916 | 0 | { |
917 | 0 | qcms_atomic_increment(p->ref_count); |
918 | 0 | return p; |
919 | 0 | } |
920 | | |
921 | | static struct precache_output *precache_create() |
922 | 0 | { |
923 | 0 | struct precache_output *p = malloc(sizeof(struct precache_output)); |
924 | 0 | if (p) |
925 | 0 | p->ref_count = 1; |
926 | 0 | return p; |
927 | 0 | } |
928 | | |
929 | | void precache_release(struct precache_output *p) |
930 | 0 | { |
931 | 0 | if (qcms_atomic_decrement(p->ref_count) == 0) { |
932 | 0 | free(p); |
933 | 0 | } |
934 | 0 | } |
935 | | |
936 | | #ifdef HAVE_POSIX_MEMALIGN |
937 | | static qcms_transform *transform_alloc(void) |
938 | 0 | { |
939 | 0 | qcms_transform *t; |
940 | 0 |
|
941 | 0 | void *allocated_memory; |
942 | 0 | if (!posix_memalign(&allocated_memory, 16, sizeof(qcms_transform))) { |
943 | 0 | /* Doing a memset to initialise all bits to 'zero'*/ |
944 | 0 | memset(allocated_memory, 0, sizeof(qcms_transform)); |
945 | 0 | t = allocated_memory; |
946 | 0 | return t; |
947 | 0 | } else { |
948 | 0 | return NULL; |
949 | 0 | } |
950 | 0 | } |
951 | | static void transform_free(qcms_transform *t) |
952 | 0 | { |
953 | 0 | free(t); |
954 | 0 | } |
955 | | #else |
956 | | static qcms_transform *transform_alloc(void) |
957 | | { |
958 | | /* transform needs to be aligned on a 16byte boundrary */ |
959 | | char *original_block = calloc(sizeof(qcms_transform) + sizeof(void*) + 16, 1); |
960 | | /* make room for a pointer to the block returned by calloc */ |
961 | | void *transform_start = original_block + sizeof(void*); |
962 | | /* align transform_start */ |
963 | | qcms_transform *transform_aligned = (qcms_transform*)(((uintptr_t)transform_start + 15) & ~0xf); |
964 | | |
965 | | /* store a pointer to the block returned by calloc so that we can free it later */ |
966 | | void **(original_block_ptr) = (void**)transform_aligned; |
967 | | if (!original_block) |
968 | | return NULL; |
969 | | original_block_ptr--; |
970 | | *original_block_ptr = original_block; |
971 | | |
972 | | return transform_aligned; |
973 | | } |
974 | | static void transform_free(qcms_transform *t) |
975 | | { |
976 | | /* get at the pointer to the unaligned block returned by calloc */ |
977 | | void **p = (void**)t; |
978 | | p--; |
979 | | free(*p); |
980 | | } |
981 | | #endif |
982 | | |
983 | | void qcms_transform_release(qcms_transform *t) |
984 | 0 | { |
985 | 0 | /* ensure we only free the gamma tables once even if there are |
986 | 0 | * multiple references to the same data */ |
987 | 0 |
|
988 | 0 | if (t->output_table_r) |
989 | 0 | precache_release(t->output_table_r); |
990 | 0 | if (t->output_table_g) |
991 | 0 | precache_release(t->output_table_g); |
992 | 0 | if (t->output_table_b) |
993 | 0 | precache_release(t->output_table_b); |
994 | 0 |
|
995 | 0 | free(t->input_gamma_table_r); |
996 | 0 | if (t->input_gamma_table_g != t->input_gamma_table_r) |
997 | 0 | free(t->input_gamma_table_g); |
998 | 0 | if (t->input_gamma_table_g != t->input_gamma_table_r && |
999 | 0 | t->input_gamma_table_g != t->input_gamma_table_b) |
1000 | 0 | free(t->input_gamma_table_b); |
1001 | 0 |
|
1002 | 0 | free(t->input_gamma_table_gray); |
1003 | 0 |
|
1004 | 0 | free(t->output_gamma_lut_r); |
1005 | 0 | free(t->output_gamma_lut_g); |
1006 | 0 | free(t->output_gamma_lut_b); |
1007 | 0 |
|
1008 | 0 | /* r_clut points to beginning of buffer allocated in qcms_transform_precacheLUT_float */ |
1009 | 0 | if (t->r_clut) |
1010 | 0 | free(t->r_clut); |
1011 | 0 |
|
1012 | 0 | transform_free(t); |
1013 | 0 | } |
1014 | | |
1015 | | #ifdef X86 |
1016 | | // Determine if we can build with SSE2 (this was partly copied from jmorecfg.h in |
1017 | | // mozilla/jpeg) |
1018 | | // ------------------------------------------------------------------------- |
1019 | | #if defined(_M_IX86) && defined(_MSC_VER) |
1020 | | #define HAS_CPUID |
1021 | | /* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC |
1022 | | register - I'm not sure if that ever happens on windows, but cpuid isn't |
1023 | | on the critical path so we just preserve the register to be safe and to be |
1024 | | consistent with the non-windows version. */ |
1025 | | static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) { |
1026 | | uint32_t a_, b_, c_, d_; |
1027 | | __asm { |
1028 | | xchg ebx, esi |
1029 | | mov eax, fxn |
1030 | | cpuid |
1031 | | mov a_, eax |
1032 | | mov b_, ebx |
1033 | | mov c_, ecx |
1034 | | mov d_, edx |
1035 | | xchg ebx, esi |
1036 | | } |
1037 | | *a = a_; |
1038 | | *b = b_; |
1039 | | *c = c_; |
1040 | | *d = d_; |
1041 | | } |
1042 | | #elif (defined(__GNUC__) || defined(__SUNPRO_C)) && (defined(__i386__) || defined(__i386)) |
1043 | | #define HAS_CPUID |
1044 | | /* Get us a CPUID function. We can't use ebx because it's the PIC register on |
1045 | | some platforms, so we use ESI instead and save ebx to avoid clobbering it. */ |
1046 | | static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) { |
1047 | | |
1048 | | uint32_t a_, b_, c_, d_; |
1049 | | __asm__ __volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;" |
1050 | | : "=a" (a_), "=S" (b_), "=c" (c_), "=d" (d_) : "a" (fxn)); |
1051 | | *a = a_; |
1052 | | *b = b_; |
1053 | | *c = c_; |
1054 | | *d = d_; |
1055 | | } |
1056 | | #endif |
1057 | | |
1058 | | // -------------------------Runtime SSEx Detection----------------------------- |
1059 | | |
1060 | | /* MMX is always supported per |
1061 | | * Gecko v1.9.1 minimum CPU requirements */ |
1062 | | #define SSE1_EDX_MASK (1UL << 25) |
1063 | | #define SSE2_EDX_MASK (1UL << 26) |
1064 | | #define SSE3_ECX_MASK (1UL << 0) |
1065 | | |
1066 | | static int sse_version_available(void) |
1067 | 0 | { |
1068 | 0 | #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) |
1069 | 0 | /* we know at build time that 64-bit CPUs always have SSE2 |
1070 | 0 | * this tells the compiler that non-SSE2 branches will never be |
1071 | 0 | * taken (i.e. OK to optimze away the SSE1 and non-SIMD code */ |
1072 | 0 | return 2; |
1073 | | #elif defined(HAS_CPUID) |
1074 | | static int sse_version = -1; |
1075 | | uint32_t a, b, c, d; |
1076 | | uint32_t function = 0x00000001; |
1077 | | |
1078 | | if (sse_version == -1) { |
1079 | | sse_version = 0; |
1080 | | cpuid(function, &a, &b, &c, &d); |
1081 | | if (c & SSE3_ECX_MASK) |
1082 | | sse_version = 3; |
1083 | | else if (d & SSE2_EDX_MASK) |
1084 | | sse_version = 2; |
1085 | | else if (d & SSE1_EDX_MASK) |
1086 | | sse_version = 1; |
1087 | | } |
1088 | | |
1089 | | return sse_version; |
1090 | | #else |
1091 | | return 0; |
1092 | | #endif |
1093 | | } |
1094 | | #endif |
1095 | | |
1096 | | static const struct matrix bradford_matrix = {{ { 0.8951f, 0.2664f,-0.1614f}, |
1097 | | {-0.7502f, 1.7135f, 0.0367f}, |
1098 | | { 0.0389f,-0.0685f, 1.0296f}}, |
1099 | | false}; |
1100 | | |
1101 | | static const struct matrix bradford_matrix_inv = {{ { 0.9869929f,-0.1470543f, 0.1599627f}, |
1102 | | { 0.4323053f, 0.5183603f, 0.0492912f}, |
1103 | | {-0.0085287f, 0.0400428f, 0.9684867f}}, |
1104 | | false}; |
1105 | | |
1106 | | // See ICCv4 E.3 |
1107 | 0 | struct matrix compute_whitepoint_adaption(float X, float Y, float Z) { |
1108 | 0 | float p = (0.96422f*bradford_matrix.m[0][0] + 1.000f*bradford_matrix.m[1][0] + 0.82521f*bradford_matrix.m[2][0]) / |
1109 | 0 | (X*bradford_matrix.m[0][0] + Y*bradford_matrix.m[1][0] + Z*bradford_matrix.m[2][0] ); |
1110 | 0 | float y = (0.96422f*bradford_matrix.m[0][1] + 1.000f*bradford_matrix.m[1][1] + 0.82521f*bradford_matrix.m[2][1]) / |
1111 | 0 | (X*bradford_matrix.m[0][1] + Y*bradford_matrix.m[1][1] + Z*bradford_matrix.m[2][1] ); |
1112 | 0 | float b = (0.96422f*bradford_matrix.m[0][2] + 1.000f*bradford_matrix.m[1][2] + 0.82521f*bradford_matrix.m[2][2]) / |
1113 | 0 | (X*bradford_matrix.m[0][2] + Y*bradford_matrix.m[1][2] + Z*bradford_matrix.m[2][2] ); |
1114 | 0 | struct matrix white_adaption = {{ {p,0,0}, {0,y,0}, {0,0,b}}, false}; |
1115 | 0 | return matrix_multiply( bradford_matrix_inv, matrix_multiply(white_adaption, bradford_matrix) ); |
1116 | 0 | } |
1117 | | |
1118 | | void qcms_profile_precache_output_transform(qcms_profile *profile) |
1119 | 0 | { |
1120 | 0 | /* we only support precaching on rgb profiles */ |
1121 | 0 | if (profile->color_space != RGB_SIGNATURE) |
1122 | 0 | return; |
1123 | 0 | |
1124 | 0 | if (qcms_supports_iccv4) { |
1125 | 0 | /* don't precache since we will use the B2A LUT */ |
1126 | 0 | if (profile->B2A0) |
1127 | 0 | return; |
1128 | 0 | |
1129 | 0 | /* don't precache since we will use the mBA LUT */ |
1130 | 0 | if (profile->mBA) |
1131 | 0 | return; |
1132 | 0 | } |
1133 | 0 | |
1134 | 0 | /* don't precache if we do not have the TRC curves */ |
1135 | 0 | if (!profile->redTRC || !profile->greenTRC || !profile->blueTRC) |
1136 | 0 | return; |
1137 | 0 | |
1138 | 0 | if (!profile->output_table_r) { |
1139 | 0 | profile->output_table_r = precache_create(); |
1140 | 0 | if (profile->output_table_r && |
1141 | 0 | !compute_precache(profile->redTRC, profile->output_table_r->data)) { |
1142 | 0 | precache_release(profile->output_table_r); |
1143 | 0 | profile->output_table_r = NULL; |
1144 | 0 | } |
1145 | 0 | } |
1146 | 0 | if (!profile->output_table_g) { |
1147 | 0 | profile->output_table_g = precache_create(); |
1148 | 0 | if (profile->output_table_g && |
1149 | 0 | !compute_precache(profile->greenTRC, profile->output_table_g->data)) { |
1150 | 0 | precache_release(profile->output_table_g); |
1151 | 0 | profile->output_table_g = NULL; |
1152 | 0 | } |
1153 | 0 | } |
1154 | 0 | if (!profile->output_table_b) { |
1155 | 0 | profile->output_table_b = precache_create(); |
1156 | 0 | if (profile->output_table_b && |
1157 | 0 | !compute_precache(profile->blueTRC, profile->output_table_b->data)) { |
1158 | 0 | precache_release(profile->output_table_b); |
1159 | 0 | profile->output_table_b = NULL; |
1160 | 0 | } |
1161 | 0 | } |
1162 | 0 | } |
1163 | | |
1164 | | /* Replace the current transformation with a LUT transformation using a given number of sample points */ |
1165 | | qcms_transform* qcms_transform_precacheLUT_float(qcms_transform *transform, qcms_profile *in, qcms_profile *out, |
1166 | | int samples, qcms_data_type in_type) |
1167 | 0 | { |
1168 | 0 | /* The range between which 2 consecutive sample points can be used to interpolate */ |
1169 | 0 | uint16_t x,y,z; |
1170 | 0 | uint32_t l; |
1171 | 0 | uint32_t lutSize = 3 * samples * samples * samples; |
1172 | 0 | float* src = NULL; |
1173 | 0 | float* dest = NULL; |
1174 | 0 | float* lut = NULL; |
1175 | 0 |
|
1176 | 0 | src = malloc(lutSize*sizeof(float)); |
1177 | 0 | dest = malloc(lutSize*sizeof(float)); |
1178 | 0 |
|
1179 | 0 | if (src && dest) { |
1180 | 0 | /* Prepare a list of points we want to sample */ |
1181 | 0 | l = 0; |
1182 | 0 | for (x = 0; x < samples; x++) { |
1183 | 0 | for (y = 0; y < samples; y++) { |
1184 | 0 | for (z = 0; z < samples; z++) { |
1185 | 0 | src[l++] = x / (float)(samples-1); |
1186 | 0 | src[l++] = y / (float)(samples-1); |
1187 | 0 | src[l++] = z / (float)(samples-1); |
1188 | 0 | } |
1189 | 0 | } |
1190 | 0 | } |
1191 | 0 |
|
1192 | 0 | lut = qcms_chain_transform(in, out, src, dest, lutSize); |
1193 | 0 | if (lut) { |
1194 | 0 | transform->r_clut = &lut[0]; |
1195 | 0 | transform->g_clut = &lut[1]; |
1196 | 0 | transform->b_clut = &lut[2]; |
1197 | 0 | transform->grid_size = samples; |
1198 | 0 | if (in_type == QCMS_DATA_RGBA_8) { |
1199 | 0 | transform->transform_fn = qcms_transform_data_tetra_clut_rgba; |
1200 | 0 | } else { |
1201 | 0 | transform->transform_fn = qcms_transform_data_tetra_clut; |
1202 | 0 | } |
1203 | 0 | } |
1204 | 0 | } |
1205 | 0 |
|
1206 | 0 |
|
1207 | 0 | //XXX: qcms_modular_transform_data may return either the src or dest buffer. If so it must not be free-ed |
1208 | 0 | // It will be stored in r_clut, which will be cleaned up in qcms_transform_release. |
1209 | 0 | if (src && lut != src) { |
1210 | 0 | free(src); |
1211 | 0 | } |
1212 | 0 | if (dest && lut != dest) { |
1213 | 0 | free(dest); |
1214 | 0 | } |
1215 | 0 |
|
1216 | 0 | if (lut == NULL) { |
1217 | 0 | return NULL; |
1218 | 0 | } |
1219 | 0 | return transform; |
1220 | 0 | } |
1221 | | |
1222 | 0 | #define NO_MEM_TRANSFORM NULL |
1223 | | |
1224 | | qcms_transform* qcms_transform_create( |
1225 | | qcms_profile *in, qcms_data_type in_type, |
1226 | | qcms_profile *out, qcms_data_type out_type, |
1227 | | qcms_intent intent) |
1228 | 0 | { |
1229 | 0 | bool precache = false; |
1230 | 0 |
|
1231 | 0 | qcms_transform *transform = transform_alloc(); |
1232 | 0 | if (!transform) { |
1233 | 0 | return NULL; |
1234 | 0 | } |
1235 | 0 | if (out_type != QCMS_DATA_RGB_8 && |
1236 | 0 | out_type != QCMS_DATA_RGBA_8) { |
1237 | 0 | assert(0 && "output type"); |
1238 | 0 | qcms_transform_release(transform); |
1239 | 0 | return NULL; |
1240 | 0 | } |
1241 | 0 | |
1242 | 0 | if (out->output_table_r && |
1243 | 0 | out->output_table_g && |
1244 | 0 | out->output_table_b) { |
1245 | 0 | precache = true; |
1246 | 0 | } |
1247 | 0 |
|
1248 | 0 | // This precache assumes RGB_SIGNATURE (fails on GRAY_SIGNATURE, for instance) |
1249 | 0 | if (qcms_supports_iccv4 && |
1250 | 0 | (in_type == QCMS_DATA_RGB_8 || in_type == QCMS_DATA_RGBA_8) && |
1251 | 0 | (in->A2B0 || out->B2A0 || in->mAB || out->mAB)) |
1252 | 0 | { |
1253 | 0 | // Precache the transformation to a CLUT 33x33x33 in size. |
1254 | 0 | // 33 is used by many profiles and works well in pratice. |
1255 | 0 | // This evenly divides 256 into blocks of 8x8x8. |
1256 | 0 | // TODO For transforming small data sets of about 200x200 or less |
1257 | 0 | // precaching should be avoided. |
1258 | 0 | qcms_transform *result = qcms_transform_precacheLUT_float(transform, in, out, 33, in_type); |
1259 | 0 | if (!result) { |
1260 | 0 | assert(0 && "precacheLUT failed"); |
1261 | 0 | qcms_transform_release(transform); |
1262 | 0 | return NULL; |
1263 | 0 | } |
1264 | 0 | return result; |
1265 | 0 | } |
1266 | 0 | |
1267 | 0 | if (precache) { |
1268 | 0 | transform->output_table_r = precache_reference(out->output_table_r); |
1269 | 0 | transform->output_table_g = precache_reference(out->output_table_g); |
1270 | 0 | transform->output_table_b = precache_reference(out->output_table_b); |
1271 | 0 | } else { |
1272 | 0 | if (!out->redTRC || !out->greenTRC || !out->blueTRC) { |
1273 | 0 | qcms_transform_release(transform); |
1274 | 0 | return NO_MEM_TRANSFORM; |
1275 | 0 | } |
1276 | 0 | build_output_lut(out->redTRC, &transform->output_gamma_lut_r, &transform->output_gamma_lut_r_length); |
1277 | 0 | build_output_lut(out->greenTRC, &transform->output_gamma_lut_g, &transform->output_gamma_lut_g_length); |
1278 | 0 | build_output_lut(out->blueTRC, &transform->output_gamma_lut_b, &transform->output_gamma_lut_b_length); |
1279 | 0 | if (!transform->output_gamma_lut_r || !transform->output_gamma_lut_g || !transform->output_gamma_lut_b) { |
1280 | 0 | qcms_transform_release(transform); |
1281 | 0 | return NO_MEM_TRANSFORM; |
1282 | 0 | } |
1283 | 0 | } |
1284 | 0 |
|
1285 | 0 | if (in->color_space == RGB_SIGNATURE) { |
1286 | 0 | struct matrix in_matrix, out_matrix, result; |
1287 | 0 |
|
1288 | 0 | if (in_type != QCMS_DATA_RGB_8 && |
1289 | 0 | in_type != QCMS_DATA_RGBA_8){ |
1290 | 0 | assert(0 && "input type"); |
1291 | 0 | qcms_transform_release(transform); |
1292 | 0 | return NULL; |
1293 | 0 | } |
1294 | 0 | if (precache) { |
1295 | 0 | #ifdef X86 |
1296 | 0 | if (sse_version_available() >= 2) { |
1297 | 0 | if (in_type == QCMS_DATA_RGB_8) |
1298 | 0 | transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2; |
1299 | 0 | else |
1300 | 0 | transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2; |
1301 | 0 |
|
1302 | 0 | #if !(defined(_MSC_VER) && defined(_M_AMD64)) |
1303 | 0 | /* Microsoft Compiler for x64 doesn't support MMX. |
1304 | 0 | * SSE code uses MMX so that we disable on x64 */ |
1305 | 0 | } else |
1306 | 0 | if (sse_version_available() >= 1) { |
1307 | 0 | if (in_type == QCMS_DATA_RGB_8) |
1308 | 0 | transform->transform_fn = qcms_transform_data_rgb_out_lut_sse1; |
1309 | 0 | else |
1310 | 0 | transform->transform_fn = qcms_transform_data_rgba_out_lut_sse1; |
1311 | 0 | #endif |
1312 | 0 | } else |
1313 | 0 | #endif |
1314 | | #if (defined(__POWERPC__) || defined(__powerpc__) && !defined(__NO_FPRS__)) |
1315 | | if (have_altivec()) { |
1316 | | if (in_type == QCMS_DATA_RGB_8) |
1317 | | transform->transform_fn = qcms_transform_data_rgb_out_lut_altivec; |
1318 | | else |
1319 | | transform->transform_fn = qcms_transform_data_rgba_out_lut_altivec; |
1320 | | } else |
1321 | | #endif |
1322 | 0 | { |
1323 | 0 | if (in_type == QCMS_DATA_RGB_8) |
1324 | 0 | transform->transform_fn = qcms_transform_data_rgb_out_lut_precache; |
1325 | 0 | else |
1326 | 0 | transform->transform_fn = qcms_transform_data_rgba_out_lut_precache; |
1327 | 0 | } |
1328 | 0 | } else { |
1329 | 0 | if (in_type == QCMS_DATA_RGB_8) |
1330 | 0 | transform->transform_fn = qcms_transform_data_rgb_out_lut; |
1331 | 0 | else |
1332 | 0 | transform->transform_fn = qcms_transform_data_rgba_out_lut; |
1333 | 0 | } |
1334 | 0 |
|
1335 | 0 | //XXX: avoid duplicating tables if we can |
1336 | 0 | transform->input_gamma_table_r = build_input_gamma_table(in->redTRC); |
1337 | 0 | transform->input_gamma_table_g = build_input_gamma_table(in->greenTRC); |
1338 | 0 | transform->input_gamma_table_b = build_input_gamma_table(in->blueTRC); |
1339 | 0 | if (!transform->input_gamma_table_r || !transform->input_gamma_table_g || !transform->input_gamma_table_b) { |
1340 | 0 | qcms_transform_release(transform); |
1341 | 0 | return NO_MEM_TRANSFORM; |
1342 | 0 | } |
1343 | 0 |
|
1344 | 0 |
|
1345 | 0 | /* build combined colorant matrix */ |
1346 | 0 | in_matrix = build_colorant_matrix(in); |
1347 | 0 | out_matrix = build_colorant_matrix(out); |
1348 | 0 | out_matrix = matrix_invert(out_matrix); |
1349 | 0 | if (out_matrix.invalid) { |
1350 | 0 | qcms_transform_release(transform); |
1351 | 0 | return NULL; |
1352 | 0 | } |
1353 | 0 | result = matrix_multiply(out_matrix, in_matrix); |
1354 | 0 |
|
1355 | 0 | /* check for NaN values in the matrix and bail if we find any */ |
1356 | 0 | for (unsigned i = 0 ; i < 3 ; ++i) { |
1357 | 0 | for (unsigned j = 0 ; j < 3 ; ++j) { |
1358 | 0 | if (result.m[i][j] != result.m[i][j]) { |
1359 | 0 | qcms_transform_release(transform); |
1360 | 0 | return NULL; |
1361 | 0 | } |
1362 | 0 | } |
1363 | 0 | } |
1364 | 0 |
|
1365 | 0 | /* store the results in column major mode |
1366 | 0 | * this makes doing the multiplication with sse easier */ |
1367 | 0 | transform->matrix[0][0] = result.m[0][0]; |
1368 | 0 | transform->matrix[1][0] = result.m[0][1]; |
1369 | 0 | transform->matrix[2][0] = result.m[0][2]; |
1370 | 0 | transform->matrix[0][1] = result.m[1][0]; |
1371 | 0 | transform->matrix[1][1] = result.m[1][1]; |
1372 | 0 | transform->matrix[2][1] = result.m[1][2]; |
1373 | 0 | transform->matrix[0][2] = result.m[2][0]; |
1374 | 0 | transform->matrix[1][2] = result.m[2][1]; |
1375 | 0 | transform->matrix[2][2] = result.m[2][2]; |
1376 | 0 |
|
1377 | 0 | } else if (in->color_space == GRAY_SIGNATURE) { |
1378 | 0 | if (in_type != QCMS_DATA_GRAY_8 && |
1379 | 0 | in_type != QCMS_DATA_GRAYA_8){ |
1380 | 0 | assert(0 && "input type"); |
1381 | 0 | qcms_transform_release(transform); |
1382 | 0 | return NULL; |
1383 | 0 | } |
1384 | 0 | |
1385 | 0 | transform->input_gamma_table_gray = build_input_gamma_table(in->grayTRC); |
1386 | 0 | if (!transform->input_gamma_table_gray) { |
1387 | 0 | qcms_transform_release(transform); |
1388 | 0 | return NO_MEM_TRANSFORM; |
1389 | 0 | } |
1390 | 0 |
|
1391 | 0 | if (precache) { |
1392 | 0 | if (in_type == QCMS_DATA_GRAY_8) { |
1393 | 0 | transform->transform_fn = qcms_transform_data_gray_out_precache; |
1394 | 0 | } else { |
1395 | 0 | transform->transform_fn = qcms_transform_data_graya_out_precache; |
1396 | 0 | } |
1397 | 0 | } else { |
1398 | 0 | if (in_type == QCMS_DATA_GRAY_8) { |
1399 | 0 | transform->transform_fn = qcms_transform_data_gray_out_lut; |
1400 | 0 | } else { |
1401 | 0 | transform->transform_fn = qcms_transform_data_graya_out_lut; |
1402 | 0 | } |
1403 | 0 | } |
1404 | 0 | } else { |
1405 | 0 | assert(0 && "unexpected colorspace"); |
1406 | 0 | qcms_transform_release(transform); |
1407 | 0 | return NULL; |
1408 | 0 | } |
1409 | 0 | return transform; |
1410 | 0 | } |
1411 | | |
1412 | | #if defined(__GNUC__) && defined(__i386__) |
1413 | | /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */ |
1414 | | __attribute__((__force_align_arg_pointer__)) |
1415 | | #endif |
1416 | | void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length) |
1417 | 0 | { |
1418 | 0 | transform->transform_fn(transform, src, dest, length); |
1419 | 0 | } |
1420 | | |
1421 | | qcms_bool qcms_supports_iccv4; |
1422 | | void qcms_enable_iccv4() |
1423 | 0 | { |
1424 | 0 | qcms_supports_iccv4 = true; |
1425 | 0 | } |