/src/xpdf-4.04/splash/SplashMath.h
Line | Count | Source (jump to first uncovered line) |
1 | | //======================================================================== |
2 | | // |
3 | | // SplashMath.h |
4 | | // |
5 | | // Copyright 2003-2013 Glyph & Cog, LLC |
6 | | // |
7 | | //======================================================================== |
8 | | |
9 | | #ifndef SPLASHMATH_H |
10 | | #define SPLASHMATH_H |
11 | | |
12 | | #include <aconf.h> |
13 | | |
14 | | #if USE_FIXEDPONT |
15 | | # include "FixedPoint.h" |
16 | | #else |
17 | | # include <math.h> |
18 | | # if (defined(__GNUC__) && defined(__SSE2__)) || \ |
19 | | (defined(_WIN32) && (_M_IX86_FP == 2 || defined(_M_X64))) |
20 | | # include <emmintrin.h> |
21 | | # endif |
22 | | #endif |
23 | | #include "SplashTypes.h" |
24 | | |
25 | 0 | static inline SplashCoord splashAbs(SplashCoord x) { |
26 | 0 | #if USE_FIXEDPOINT |
27 | 0 | return FixedPoint::abs(x); |
28 | 0 | #else |
29 | 0 | return fabs(x); |
30 | 0 | #endif |
31 | 0 | } |
32 | | |
33 | | // floor() and (int)() are implemented separately, which results |
34 | | // in changing the FPCW multiple times - so we optimize it with |
35 | | // some inline assembly or SSE intrinsics. |
36 | 0 | static inline int splashFloor(SplashCoord x) { |
37 | 0 | #if USE_FIXEDPOINT |
38 | 0 |
|
39 | 0 | //--- fixed point |
40 | 0 |
|
41 | 0 | return FixedPoint::floor(x); |
42 | 0 |
|
43 | 0 | #elif (defined(__GNUC__) && defined(__SSE2__)) || \ |
44 | 0 | (defined(_WIN32) && (_M_IX86_FP == 2 || defined(_M_X64))) |
45 | 0 |
|
46 | 0 | //--- SSE2 intrinsics |
47 | 0 | // NB: 64-bit x86 guarantees availability of SSE2. |
48 | 0 |
|
49 | 0 | __m128d m1, m2; |
50 | 0 | int i, s; |
51 | 0 |
|
52 | 0 | m1 = _mm_set_sd(x); |
53 | 0 | i = _mm_cvttsd_si32(m1); |
54 | 0 | m2 = _mm_cvtsi32_sd(m1, i); |
55 | 0 | s = _mm_ucomigt_sd(m2, m1); |
56 | 0 | return i - s; |
57 | 0 |
|
58 | 0 | #elif defined(__GNUC__) && defined(__i386__) && !defined(__APPLE__) |
59 | 0 |
|
60 | 0 | //--- x87 inline assembly (gcc/clang) |
61 | 0 | // (this code fails on OSX for reasons I don't understand) |
62 | 0 |
|
63 | 0 | Gushort oldCW, newCW, t; |
64 | 0 | int result; |
65 | 0 |
|
66 | 0 | __asm__ volatile("fnstcw %0\n" |
67 | 0 | "movw %0, %3\n" |
68 | 0 | "andw $0xf3ff, %3\n" |
69 | 0 | "orw $0x0400, %3\n" |
70 | 0 | "movw %3, %1\n" // round down |
71 | 0 | "fldcw %1\n" |
72 | 0 | "fistl %2\n" |
73 | 0 | "fldcw %0\n" |
74 | 0 | : "=m" (oldCW), "=m" (newCW), "=m" (result), "=r" (t) |
75 | 0 | : "t" (x)); |
76 | 0 | return result; |
77 | 0 |
|
78 | 0 | #elif defined(_WIN32) && defined(_M_IX86) |
79 | 0 |
|
80 | 0 | //--- x87 inline assembly (VC) |
81 | 0 |
|
82 | 0 | Gushort oldCW, newCW; |
83 | 0 | int result; |
84 | 0 |
|
85 | 0 | __asm fld QWORD PTR x |
86 | 0 | __asm fnstcw WORD PTR oldCW |
87 | 0 | __asm mov ax, WORD PTR oldCW |
88 | 0 | __asm and ax, 0xf3ff |
89 | 0 | __asm or ax, 0x0400 |
90 | 0 | __asm mov WORD PTR newCW, ax // round down |
91 | 0 | __asm fldcw WORD PTR newCW |
92 | 0 | __asm fistp DWORD PTR result |
93 | 0 | __asm fldcw WORD PTR oldCW |
94 | 0 | return result; |
95 | 0 |
|
96 | 0 | #else |
97 | 0 |
|
98 | 0 | //--- all others |
99 | 0 |
|
100 | 0 | return (int)floor(x); |
101 | 0 |
|
102 | 0 | #endif |
103 | 0 | } |
104 | | |
105 | | // ceil() and (int)() are implemented separately, which results |
106 | | // in changing the FPCW multiple times - so we optimize it with |
107 | | // some inline assembly or SSE intrinsics. |
108 | 0 | static inline int splashCeil(SplashCoord x) { |
109 | 0 | #if USE_FIXEDPOINT |
110 | 0 |
|
111 | 0 | //--- fixed point |
112 | 0 |
|
113 | 0 | return FixedPoint::ceil(x); |
114 | 0 |
|
115 | 0 | #elif (defined(__GNUC__) && defined(__SSE2__)) || \ |
116 | 0 | (defined(_WIN32) && (_M_IX86_FP == 2 || defined(_M_X64))) |
117 | 0 |
|
118 | 0 | //--- SSE2 intrinsics |
119 | 0 | // NB: 64-bit x86 guarantees availability of SSE2. |
120 | 0 |
|
121 | 0 | __m128d m1, m2; |
122 | 0 | int i, s; |
123 | 0 |
|
124 | 0 | m1 = _mm_set_sd(x); |
125 | 0 | i = _mm_cvttsd_si32(m1); |
126 | 0 | m2 = _mm_cvtsi32_sd(m1, i); |
127 | 0 | s = _mm_ucomilt_sd(m2, m1); |
128 | 0 | return i + s; |
129 | 0 |
|
130 | 0 | #elif defined(__GNUC__) && defined(__i386__) && !defined(__APPLE__) |
131 | 0 |
|
132 | 0 | //--- x87 inline assembly (gcc/clang) |
133 | 0 | // (this code fails on OSX for reasons I don't understand) |
134 | 0 |
|
135 | 0 | Gushort oldCW, newCW, t; |
136 | 0 | int result; |
137 | 0 |
|
138 | 0 | __asm__ volatile("fnstcw %0\n" |
139 | 0 | "movw %0, %3\n" |
140 | 0 | "andw $0xf3ff, %3\n" |
141 | 0 | "orw $0x0800, %3\n" |
142 | 0 | "movw %3, %1\n" // round up |
143 | 0 | "fldcw %1\n" |
144 | 0 | "fistl %2\n" |
145 | 0 | "fldcw %0\n" |
146 | 0 | : "=m" (oldCW), "=m" (newCW), "=m" (result), "=r" (t) |
147 | 0 | : "t" (x)); |
148 | 0 | return result; |
149 | 0 |
|
150 | 0 | #elif defined(_WIN32) && defined(_M_IX86) |
151 | 0 |
|
152 | 0 | //--- x87 inline assembly (VC) |
153 | 0 |
|
154 | 0 | // ceil() and (int)() are implemented separately, which results |
155 | 0 | // in changing the FPCW multiple times - so we optimize it with |
156 | 0 | // some inline assembly |
157 | 0 | Gushort oldCW, newCW; |
158 | 0 | int result; |
159 | 0 |
|
160 | 0 | __asm fld QWORD PTR x |
161 | 0 | __asm fnstcw WORD PTR oldCW |
162 | 0 | __asm mov ax, WORD PTR oldCW |
163 | 0 | __asm and ax, 0xf3ff |
164 | 0 | __asm or ax, 0x0800 |
165 | 0 | __asm mov WORD PTR newCW, ax // round up |
166 | 0 | __asm fldcw WORD PTR newCW |
167 | 0 | __asm fistp DWORD PTR result |
168 | 0 | __asm fldcw WORD PTR oldCW |
169 | 0 | return result; |
170 | 0 |
|
171 | 0 | #else |
172 | 0 |
|
173 | 0 | //--- all others |
174 | 0 |
|
175 | 0 | return (int)ceil(x); |
176 | 0 |
|
177 | 0 | #endif |
178 | 0 | } |
179 | | |
180 | 0 | static inline int splashRound(SplashCoord x) { |
181 | 0 | #if USE_FIXEDPOINT |
182 | 0 |
|
183 | 0 | //--- fixed point |
184 | 0 |
|
185 | 0 | return FixedPoint::round(x); |
186 | 0 |
|
187 | 0 | #else |
188 | 0 |
|
189 | 0 | //--- all others |
190 | 0 |
|
191 | 0 | return splashFloor(x + 0.5); |
192 | 0 |
|
193 | 0 | #endif |
194 | 0 | } |
195 | | |
196 | 0 | static inline SplashCoord splashAvg(SplashCoord x, SplashCoord y) { |
197 | 0 | #if USE_FIXEDPOINT |
198 | 0 | return FixedPoint::avg(x, y); |
199 | 0 | #else |
200 | 0 | return 0.5 * (x + y); |
201 | 0 | #endif |
202 | 0 | } |
203 | | |
204 | 0 | static inline SplashCoord splashSqrt(SplashCoord x) { |
205 | 0 | #if USE_FIXEDPOINT |
206 | 0 | return FixedPoint::sqrt(x); |
207 | 0 | #else |
208 | 0 | return sqrt(x); |
209 | 0 | #endif |
210 | 0 | } |
211 | | |
212 | 0 | static inline SplashCoord splashPow(SplashCoord x, SplashCoord y) { |
213 | 0 | #if USE_FIXEDPOINT |
214 | 0 | return FixedPoint::pow(x, y); |
215 | 0 | #else |
216 | 0 | return pow(x, y); |
217 | 0 | #endif |
218 | 0 | } |
219 | | |
220 | | static inline SplashCoord splashDist(SplashCoord x0, SplashCoord y0, |
221 | 0 | SplashCoord x1, SplashCoord y1) { |
222 | 0 | SplashCoord dx, dy; |
223 | 0 | dx = x1 - x0; |
224 | 0 | dy = y1 - y0; |
225 | 0 | #if USE_FIXEDPOINT |
226 | 0 | // this handles the situation where dx*dx or dy*dy is too large to |
227 | 0 | // fit in the 16.16 fixed point format |
228 | 0 | SplashCoord dxa, dya, d; |
229 | 0 | dxa = splashAbs(dx); |
230 | 0 | dya = splashAbs(dy); |
231 | 0 | if (dxa == 0 && dya == 0) { |
232 | 0 | return 0; |
233 | 0 | } else if (dxa > dya) { |
234 | 0 | d = dya / dxa; |
235 | 0 | return dxa * FixedPoint::sqrt(d*d + 1); |
236 | 0 | } else { |
237 | 0 | d = dxa / dya; |
238 | 0 | return dya * FixedPoint::sqrt(d*d + 1); |
239 | 0 | } |
240 | 0 | #else |
241 | 0 | return sqrt(dx * dx + dy * dy); |
242 | 0 | #endif |
243 | 0 | } |
244 | | |
245 | | static inline GBool splashCheckDet(SplashCoord m11, SplashCoord m12, |
246 | | SplashCoord m21, SplashCoord m22, |
247 | 0 | SplashCoord epsilon) { |
248 | 0 | #if USE_FIXEDPOINT |
249 | 0 | return FixedPoint::checkDet(m11, m12, m21, m22, epsilon); |
250 | 0 | #else |
251 | 0 | return fabs(m11 * m22 - m12 * m21) >= epsilon; |
252 | 0 | #endif |
253 | 0 | } |
254 | | |
255 | | // Perform stroke adjustment on a SplashCoord range [xMin, xMax), |
256 | | // resulting in an int range [*xMinI, *xMaxI). |
257 | | // |
258 | | // There are several options: |
259 | | // |
260 | | // 1. Round both edge coordinates. |
261 | | // Pro: adjacent strokes/fills line up without any gaps or |
262 | | // overlaps |
263 | | // Con: lines with the same original floating point width can |
264 | | // end up with different integer widths, e.g.: |
265 | | // xMin = 10.1 xMax = 11.3 (width = 1.2) |
266 | | // --> xMinI = 10 xMaxI = 11 (width = 1) |
267 | | // but |
268 | | // xMin = 10.4 xMax = 11.6 (width = 1.2) |
269 | | // --> xMinI = 10 xMaxI = 12 (width = 2) |
270 | | // |
271 | | // 2. Round the min coordinate; add the ceiling of the width. |
272 | | // Pro: lines with the same original floating point width will |
273 | | // always end up with the same integer width |
274 | | // Con: adjacent strokes/fills can have overlaps (which is |
275 | | // problematic with transparency) |
276 | | // (This could use floor on the min coordinate, instead of |
277 | | // rounding, with similar results.) |
278 | | // (If the width is rounded instead of using ceiling, the results |
279 | | // Are similar, except that adjacent strokes/fills can have gaps |
280 | | // as well as overlaps.) |
281 | | // |
282 | | // 3. Use floor on the min coordinate and ceiling on the max |
283 | | // coordinate. |
284 | | // Pro: lines always end up at least as wide as the original |
285 | | // floating point width |
286 | | // Con: adjacent strokes/fills can have overlaps, and lines with |
287 | | // the same original floating point width can end up with |
288 | | // different integer widths; the integer width can be more |
289 | | // than one pixel wider than the original width, e.g.: |
290 | | // xMin = 10.9 xMax = 12.1 (width = 1.2) |
291 | | // --> xMinI = 10 xMaxI = 13 (width = 3) |
292 | | // but |
293 | | // xMin = 10.1 xMax = 11.3 (width = 1.2) |
294 | | // --> xMinI = 10 xMaxI = 12 (width = 2) |
295 | | // |
296 | | // 4. Use a hybrid approach, choosing between two of the above |
297 | | // options, based on width. E.g., use #2 if width <= 4, and use #1 |
298 | | // if width > 4. |
299 | | // |
300 | | // If w >= 0 and strokeAdjMode is splashStrokeAdjustCAD then a special |
301 | | // mode for projecting line caps is enabled, with w being the |
302 | | // transformed line width. |
303 | | |
304 | | static inline void splashStrokeAdjust(SplashCoord xMin, SplashCoord xMax, |
305 | | int *xMinI, int *xMaxI, |
306 | | SplashStrokeAdjustMode strokeAdjMode, |
307 | 0 | SplashCoord w = -1) { |
308 | 0 | int x0, x1; |
309 | 0 |
|
310 | 0 | // make sure the coords fit in 32-bit ints |
311 | 0 | #if USE_FIXEDPOINT |
312 | 0 | if (xMin < -32767) { |
313 | 0 | xMin = -32767; |
314 | 0 | } else if (xMin > 32767) { |
315 | 0 | xMin = 32767; |
316 | 0 | } |
317 | 0 | if (xMax < -32767) { |
318 | 0 | xMax = -32767; |
319 | 0 | } else if (xMax > 32767) { |
320 | 0 | xMax = 32767; |
321 | 0 | } |
322 | 0 | #else |
323 | 0 | if (xMin < -1e9) { |
324 | 0 | xMin = -1e9; |
325 | 0 | } else if (xMin > 1e9) { |
326 | 0 | xMin = 1e9; |
327 | 0 | } |
328 | 0 | if (xMax < -1e9) { |
329 | 0 | xMax = -1e9; |
330 | 0 | } else if (xMax > 1e9) { |
331 | 0 | xMax = 1e9; |
332 | 0 | } |
333 | 0 | #endif |
334 | 0 |
|
335 | 0 | // this will never be called with strokeAdjMode == splashStrokeAdjustOff |
336 | 0 | if (strokeAdjMode == splashStrokeAdjustCAD) { |
337 | 0 | x0 = splashRound(xMin); |
338 | 0 | if (w >= 0) { |
339 | 0 | x1 = splashRound(xMax - w) + splashRound(w); |
340 | 0 | } else { |
341 | 0 | x1 = x0 + splashRound(xMax - xMin); |
342 | 0 | } |
343 | 0 | } else { |
344 | 0 | // NB: enable exactly one of these. |
345 | 0 | #if 1 // 1. Round both edge coordinates. |
346 | 0 | x0 = splashRound(xMin); |
347 | 0 | x1 = splashRound(xMax); |
348 | 0 | #endif |
349 | 0 | #if 0 // 2. Round the min coordinate; add the ceiling of the width. |
350 | 0 | x0 = splashRound(xMin); |
351 | 0 | x1 = x0 + splashCeil(xMax - xMin); |
352 | 0 | #endif |
353 | 0 | #if 0 // 3. Use floor on the min coord and ceiling on the max coord. |
354 | 0 | x0 = splashFloor(xMin); |
355 | 0 | x1 = splashCeil(xMax); |
356 | 0 | #endif |
357 | 0 | #if 0 // 4. Hybrid. |
358 | 0 | SplashCoord w = xMax - xMin; |
359 | 0 | x0 = splashRound(xMin); |
360 | 0 | if (w > 4) { |
361 | 0 | x1 = splashRound(xMax); |
362 | 0 | } else { |
363 | 0 | x1 = x0 + splashRound(w); |
364 | 0 | } |
365 | 0 | #endif |
366 | 0 | } |
367 | 0 | if (x0 == x1) { |
368 | 0 | if (xMin + xMax < 2 * x0) { |
369 | 0 | --x0; |
370 | 0 | } else { |
371 | 0 | ++x1; |
372 | 0 | } |
373 | 0 | } |
374 | 0 | *xMinI = x0; |
375 | 0 | *xMaxI = x1; |
376 | 0 | } |
377 | | |
378 | | #endif |