/src/ghostpdl/base/gsroprun1.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2001-2023 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
13 | | CA 94129, USA, for further information. |
14 | | */ |
15 | | |
16 | | |
17 | | /* This file is repeatedly included by gsroprun.c to 'autogenerate' many |
18 | | * different versions of roprun code. DO NOT USE THIS FILE EXCEPT FROM |
19 | | * gsroprun.c. |
20 | | */ |
21 | | |
22 | | /* Set the following defines as appropriate on entry: |
23 | | * TEMPLATE_NAME (Compulsory) The name of the function to generate |
24 | | * SPECIFIC_ROP (Optional) If set, the function will base its decision |
25 | | * about whether to provide S and T upon |
26 | | * this value. |
27 | | * SPECIFIC_CODE (Optional) If set, this should expand out to code to |
28 | | * perform the rop. Will be invoked as: |
29 | | * SPECIFIC_ROP(OUT,D,S,T) |
30 | | * S_CONST (Optional) If set, S will be taken to be constant, else |
31 | | * S will be read from a pointer. |
32 | | * T_CONST (Optional) If set, T will be taken to be constant, else |
33 | | * T will be read from a pointer. |
34 | | */ |
35 | | |
36 | | #if defined(TEMPLATE_NAME) |
37 | | |
38 | | #ifdef SPECIFIC_ROP |
39 | | #if rop3_uses_S(SPECIFIC_ROP) |
40 | | #define S_USED |
41 | | #endif |
42 | | #if rop3_uses_T(SPECIFIC_ROP) |
43 | | #define T_USED |
44 | | #endif |
45 | | #else /* !SPECIFIC_ROP */ |
46 | | #define S_USED |
47 | | #define T_USED |
48 | | #endif /* SPECIFIC_ROP */ |
49 | | |
50 | | /* We work in 'chunks' here; for bigendian machines, we can safely use |
51 | | * chunks of 'int' size. For little endian machines where we have a cheap |
52 | | * endian swap, we can do likewise. For others, we'll work at the byte |
53 | | * level. */ |
54 | | #if !ARCH_IS_BIG_ENDIAN && !defined(ENDIAN_SWAP_INT) |
55 | | #define CHUNKSIZE 8 |
56 | | #define CHUNK byte |
57 | | #define CHUNKONES 255 |
58 | | |
59 | | #define ADJUST_TO_CHUNK(d,dpos) do {} while (0) |
60 | | |
61 | | #else /* ARCH_IS_BIG_ENDIAN || defined(ENDIAN_SWAP_INT) */ |
62 | | #if ARCH_LOG2_SIZEOF_INT == 2 |
63 | 1.63G | #define CHUNKSIZE 32 |
64 | 254M | #define CHUNK unsigned int |
65 | 125M | #define CHUNKONES 0xFFFFFFFFU |
66 | | |
67 | | #if ARCH_SIZEOF_PTR == (1<<ARCH_LOG2_SIZEOF_INT) |
68 | | #define ROP_PTRDIFF_T int |
69 | | #else |
70 | | #define ROP_PTRDIFF_T int64_t |
71 | | #endif |
72 | | #define ADJUST_TO_CHUNK(d, dpos) \ |
73 | 125M | do { int offset = ((ROP_PTRDIFF_T)d) & ((CHUNKSIZE>>3)-1); \ |
74 | 125M | d = (CHUNK *)(void *)(((byte *)(void *)d)-offset); \ |
75 | 125M | dpos += offset<<3; \ |
76 | 125M | } while (0) |
77 | | #else |
78 | | /* FIXME: Write more code in here when we find an example. */ |
79 | | #endif |
80 | | #endif /* ARCH_IS_BIG_ENDIAN || defined(ENDIAN_SWAP_INT) */ |
81 | | |
82 | | /* We define an 'RE' macro that reverses the endianness of a chunk, if we |
83 | | * need it, and does nothing otherwise. */ |
84 | | #if !ARCH_IS_BIG_ENDIAN && defined(ENDIAN_SWAP_INT) && (CHUNKSIZE != 8) |
85 | 806M | #define RE(I) ((CHUNK)ENDIAN_SWAP_INT(I)) |
86 | | #else /* ARCH_IS_BIG_ENDIAN || !defined(ENDIAN_SWAP_INT) || (CHUNKSIZE == 8) */ |
87 | | #define RE(I) (I) |
88 | | #endif /* ARCH_IS_BIG_ENDIAN || !defined(ENDIAN_SWAP_INT) || (CHUNKSIZE == 8) */ |
89 | | |
90 | | /* In some cases we will need to fetch values from a pointer, and 'skew' |
91 | | * them. We need 2 variants of this macro. One that is 'SAFE' to use when |
92 | | * SKEW might be 0, and one that can be faster, because we know that SKEW |
93 | | * is non zero. */ |
94 | | #define SKEW_FETCH(S,s,SKEW) \ |
95 | 566M | do { S = RE((RE(s[0])<<SKEW) | (RE(s[1])>>(CHUNKSIZE-SKEW))); s++; } while (0) |
96 | | #define SAFE_SKEW_FETCH(S,s,SKEW,L,R) \ |
97 | 114M | do { S = RE(((L) ? 0 : (RE(s[0])<<SKEW)) | ((R) ? 0 : (RE(s[1])>>(CHUNKSIZE-SKEW)))); s++; } while (0) |
98 | | |
99 | | #if defined(S_USED) && !defined(S_CONST) |
100 | | #define S_SKEW |
101 | 566M | #define FETCH_S SKEW_FETCH(S,s,s_skew) |
102 | 114M | #define SAFE_FETCH_S(L,R) SAFE_SKEW_FETCH(S,s,s_skew,L,R) |
103 | | #else /* !defined(S_USED) || defined(S_CONST) */ |
104 | | #define FETCH_S |
105 | | #define SAFE_FETCH_S(L,R) |
106 | | #endif /* !defined(S_USED) || defined(S_CONST) */ |
107 | | |
108 | | #if defined(T_USED) && !defined(T_CONST) |
109 | | #define T_SKEW |
110 | 0 | #define FETCH_T SKEW_FETCH(T,t,t_skew) |
111 | 0 | #define SAFE_FETCH_T(L,R) SAFE_SKEW_FETCH(T,t,t_skew,L,R) |
112 | | #else /* !defined(T_USED) || defined(T_CONST) */ |
113 | | #define FETCH_T |
114 | | #define SAFE_FETCH_T(L,R) |
115 | | #endif /* !defined(T_USED) || defined(T_CONST) */ |
116 | | |
117 | | static void TEMPLATE_NAME(rop_run_op *op, byte *d_, int len) |
118 | 62.8M | { |
119 | | #ifndef SPECIFIC_CODE |
120 | | rop_proc proc = rop_proc_table[op->rop]; |
121 | 12.8M | #define SPECIFIC_CODE(OUT_, D_,S_,T_) OUT_ = proc(D_,S_,T_) |
122 | | #endif /* !defined(SPECIFIC_CODE) */ |
123 | 62.8M | CHUNK lmask, rmask; |
124 | | #ifdef S_USED |
125 | | #ifdef S_CONST |
126 | 0 | CHUNK S = (CHUNK)op->s.c; |
127 | | #else /* !defined(S_CONST) */ |
128 | | const CHUNK *s = (CHUNK *)(void *)op->s.b.ptr; |
129 | 62.8M | CHUNK S; |
130 | | int s_skew; |
131 | | #endif /* !defined(S_CONST) */ |
132 | | #else /* !defined(S_USED) */ |
133 | | #define S 0 |
134 | | #undef S_CONST |
135 | | #endif /* !defined(S_USED) */ |
136 | | #ifdef T_USED |
137 | | #ifdef T_CONST |
138 | 2.85M | CHUNK T = (CHUNK)op->t.c; |
139 | | #else /* !defined(T_CONST) */ |
140 | | const CHUNK *t = (CHUNK *)(void *)op->t.b.ptr; |
141 | 0 | CHUNK T; |
142 | | int t_skew; |
143 | | #endif /* !defined(T_CONST) */ |
144 | | #else /* !defined(T_USED) */ |
145 | | #define T 0 |
146 | | #undef T_CONST |
147 | | #endif /* !defined(T_USED) */ |
148 | | #if defined(S_SKEW) || defined(T_SKEW) |
149 | | int skewflags = 0; |
150 | | #endif |
151 | 62.8M | CHUNK D; |
152 | 62.8M | int dpos = op->dpos; |
153 | 62.8M | CHUNK *d = (CHUNK *)(void *)d_; |
154 | | |
155 | | /* Align d to CHUNKSIZE */ |
156 | 62.8M | ADJUST_TO_CHUNK(d,dpos); |
157 | | |
158 | | /* On entry len = length in 'depth' chunks. Change it to be the length |
159 | | * in bits, and add on the number of bits we skip at the start of the |
160 | | * run. */ |
161 | 62.8M | len = len * op->depth + dpos; |
162 | | |
163 | | /* lmask = the set of bits to alter in the output bitmap on the left |
164 | | * hand edge of the run. rmask = the set of bits NOT to alter in the |
165 | | * output bitmap on the right hand edge of the run. */ |
166 | 62.8M | lmask = RE((CHUNKONES>>((CHUNKSIZE-1) & dpos))); |
167 | 62.8M | rmask = RE((CHUNKONES>>((CHUNKSIZE-1) & len))); |
168 | 62.8M | if (rmask == CHUNKONES) rmask = 0; |
169 | | |
170 | | #if defined(S_CONST) || defined(T_CONST) |
171 | | /* S and T should be supplied as 'depth' bits. Duplicate them up to be |
172 | | * byte size (if they are supplied byte sized, that's fine too). */ |
173 | 2.85M | if (op->depth & 1) { |
174 | | #ifdef S_CONST |
175 | | S |= S<<1; |
176 | | #endif /* !defined(S_CONST) */ |
177 | 2.85M | #ifdef T_CONST |
178 | 2.85M | T |= T<<1; |
179 | 2.85M | #endif /* !defined(T_CONST) */ |
180 | 2.85M | } |
181 | 2.85M | if (op->depth & 3) { |
182 | | #ifdef S_CONST |
183 | | S |= S<<2; |
184 | | #endif /* !defined(S_CONST) */ |
185 | 2.85M | #ifdef T_CONST |
186 | 2.85M | T |= T<<2; |
187 | 2.85M | #endif /* !defined(T_CONST) */ |
188 | 2.85M | } |
189 | 2.85M | if (op->depth & 7) { |
190 | | #ifdef S_CONST |
191 | | S |= S<<4; |
192 | | #endif /* !defined(S_CONST) */ |
193 | 2.85M | #ifdef T_CONST |
194 | 2.85M | T |= T<<4; |
195 | 2.85M | #endif /* !defined(T_CONST) */ |
196 | 2.85M | } |
197 | | #if CHUNKSIZE > 8 |
198 | 2.85M | if (op->depth & 15) { |
199 | | #ifdef S_CONST |
200 | | S |= S<<8; |
201 | | #endif /* !defined(S_CONST) */ |
202 | 2.85M | #ifdef T_CONST |
203 | 2.85M | T |= T<<8; |
204 | 2.85M | #endif /* !defined(T_CONST) */ |
205 | 2.85M | } |
206 | | #endif /* CHUNKSIZE > 8 */ |
207 | | #if CHUNKSIZE > 16 |
208 | 2.85M | if (op->depth & 31) { |
209 | | #ifdef S_CONST |
210 | | S |= S<<16; |
211 | | #endif /* !defined(S_CONST) */ |
212 | 2.85M | #ifdef T_CONST |
213 | 2.85M | T |= T<<16; |
214 | 2.85M | #endif /* !defined(T_CONST) */ |
215 | 2.85M | } |
216 | | #endif /* CHUNKSIZE > 16 */ |
217 | | #endif /* defined(S_CONST) || defined(T_CONST) */ |
218 | | |
219 | | /* Note #1: This mirrors what the original code did, but I think it has |
220 | | * the risk of moving s and t back beyond officially allocated space. We |
221 | | * may be saved by the fact that all blocks have a word or two in front |
222 | | * of them due to the allocator. If we ever get valgrind properly marking |
223 | | * allocated blocks as readable etc, then this may throw some spurious |
224 | | * errors. RJW. */ |
225 | | #ifdef S_SKEW |
226 | | { |
227 | | int slen, slen2; |
228 | | int spos = op->s.b.pos; |
229 | 62.8M | ADJUST_TO_CHUNK(s, spos); |
230 | | s_skew = spos - dpos; |
231 | 62.8M | if (s_skew < 0) { |
232 | 46.8M | s_skew += CHUNKSIZE; |
233 | 46.8M | s--; |
234 | 46.8M | skewflags |= 1; /* Suppress reading off left edge */ |
235 | 46.8M | } |
236 | | /* We are allowed to read all the data bits, so: len - dpos + tpos |
237 | | * We're allowed to read in CHUNKS, so: CHUNKUP(len-dpos+tpos). |
238 | | * This code will actually read CHUNKUP(len)+CHUNKSIZE bits. If |
239 | | * This is larger, then suppress. */ |
240 | 62.8M | slen = (len + s_skew + CHUNKSIZE-1) & ~(CHUNKSIZE-1); |
241 | 62.8M | slen2 = (len + CHUNKSIZE + CHUNKSIZE-1) & ~(CHUNKSIZE-1); |
242 | 62.8M | if ((s_skew == 0) || (slen < slen2)) { |
243 | 35.6M | skewflags |= 4; /* Suppress reading off the right edge */ |
244 | 35.6M | } |
245 | | } |
246 | | #endif /* !defined(S_SKEW) */ |
247 | | #ifdef T_SKEW |
248 | | { |
249 | | int tlen, tlen2; |
250 | | int tpos = op->t.b.pos; |
251 | 0 | ADJUST_TO_CHUNK(t, tpos); |
252 | | t_skew = tpos - dpos; |
253 | 0 | if (t_skew < 0) { |
254 | 0 | t_skew += CHUNKSIZE; |
255 | 0 | t--; |
256 | 0 | skewflags |= 2; /* Suppress reading off left edge */ |
257 | 0 | } |
258 | | /* We are allowed to read all the data bits, so: len - dpos + tpos |
259 | | * We're allowed to read in CHUNKS, so: CHUNKUP(len-dpos+tpos). |
260 | | * This code will actually read CHUNKUP(len)+CHUNKSIZE bits. If |
261 | | * This is larger, then suppress. */ |
262 | 0 | tlen = (len + t_skew + CHUNKSIZE-1) & ~(CHUNKSIZE-1); |
263 | 0 | tlen2 = (len + CHUNKSIZE + CHUNKSIZE-1) & ~(CHUNKSIZE-1); |
264 | 0 | if ((t_skew == 0) || (tlen < tlen2)) { |
265 | 0 | skewflags |= 8; /* Suppress reading off the right edge */ |
266 | 0 | } |
267 | | } |
268 | | #endif /* !defined(T_SKEW) */ |
269 | | |
270 | 62.8M | len -= CHUNKSIZE; /* len = bytes to do - CHUNKSIZE */ |
271 | | /* len <= 0 means 1 word or less to do */ |
272 | 62.8M | if (len <= 0) { |
273 | | /* Short case - starts and ends in the same chunk */ |
274 | 380k | lmask &= ~rmask; /* Combined mask = bits to alter */ |
275 | 380k | SAFE_FETCH_S(skewflags & 1,skewflags & 4); |
276 | 380k | SAFE_FETCH_T(skewflags & 2,skewflags & 8); |
277 | 380k | SPECIFIC_CODE(D, *d, S, T); |
278 | 380k | *d = (*d & ~lmask) | (D & lmask); |
279 | 380k | return; |
280 | 380k | } |
281 | 62.4M | if ((lmask != CHUNKONES) |
282 | | #if defined(S_SKEW) || defined(T_SKEW) |
283 | 10.8M | || (skewflags & 3) |
284 | | #endif |
285 | 62.4M | ) { |
286 | | /* Unaligned left hand case */ |
287 | 51.6M | SAFE_FETCH_S(skewflags & 1,s_skew == 0); |
288 | 51.6M | SAFE_FETCH_T(skewflags & 2,t_skew == 0); |
289 | 51.6M | SPECIFIC_CODE(D, *d, S, T); |
290 | 51.6M | *d = (*d & ~lmask) | (D & lmask); |
291 | 51.6M | d++; |
292 | 51.6M | len -= CHUNKSIZE; |
293 | 51.6M | } |
294 | 62.4M | if (len > 0) { |
295 | | /* Simple middle case (complete destination chunks). */ |
296 | | #ifdef S_SKEW |
297 | 46.3M | if (s_skew == 0) { |
298 | | #ifdef T_SKEW |
299 | 0 | if (t_skew == 0) { |
300 | 0 | do { |
301 | 0 | SPECIFIC_CODE(*d, *d, *s++, *t++); |
302 | 0 | d++; |
303 | 0 | len -= CHUNKSIZE; |
304 | 0 | } while (len > 0); |
305 | 0 | } else |
306 | 0 | #endif /* !defined(T_SKEW) */ |
307 | 0 | { |
308 | 466M | do { |
309 | 466M | FETCH_T; |
310 | 466M | SPECIFIC_CODE(*d, *d, *s++, T); |
311 | 466M | d++; |
312 | 466M | len -= CHUNKSIZE; |
313 | 466M | } while (len > 0); |
314 | 0 | } |
315 | 11.0M | } else |
316 | 35.2M | #endif /* !defined(S_SKEW) */ |
317 | 35.2M | { |
318 | | #ifdef T_SKEW |
319 | 0 | if (t_skew == 0) { |
320 | 0 | do { |
321 | 0 | FETCH_S; |
322 | 0 | SPECIFIC_CODE(*d, *d, S, *t++); |
323 | 0 | d++; |
324 | 0 | len -= CHUNKSIZE; |
325 | 0 | } while (len > 0); |
326 | 0 | } else |
327 | 0 | #endif /* !defined(T_SKEW) */ |
328 | 0 | { |
329 | 566M | do { |
330 | 566M | FETCH_S; |
331 | 566M | FETCH_T; |
332 | 566M | SPECIFIC_CODE(*d, *d, S, T); |
333 | 566M | d++; |
334 | 566M | len -= CHUNKSIZE; |
335 | 566M | } while (len > 0); |
336 | 0 | } |
337 | 35.2M | } |
338 | 46.3M | } |
339 | | /* Unaligned right hand case */ |
340 | 62.4M | SAFE_FETCH_S(0,skewflags & 4); |
341 | 62.4M | SAFE_FETCH_T(0,skewflags & 8); |
342 | 62.4M | SPECIFIC_CODE(D, *d, S, T); |
343 | 62.4M | *d = (*d & rmask) | (D & ~rmask); |
344 | 62.4M | } gsroprun.c:notS_rop_run1_const_t Line | Count | Source | 118 | 165k | { | 119 | | #ifndef SPECIFIC_CODE | 120 | | rop_proc proc = rop_proc_table[op->rop]; | 121 | | #define SPECIFIC_CODE(OUT_, D_,S_,T_) OUT_ = proc(D_,S_,T_) | 122 | | #endif /* !defined(SPECIFIC_CODE) */ | 123 | 165k | CHUNK lmask, rmask; | 124 | 165k | #ifdef S_USED | 125 | | #ifdef S_CONST | 126 | | CHUNK S = (CHUNK)op->s.c; | 127 | | #else /* !defined(S_CONST) */ | 128 | 165k | const CHUNK *s = (CHUNK *)(void *)op->s.b.ptr; | 129 | 165k | CHUNK S; | 130 | 165k | int s_skew; | 131 | 165k | #endif /* !defined(S_CONST) */ | 132 | | #else /* !defined(S_USED) */ | 133 | | #define S 0 | 134 | | #undef S_CONST | 135 | | #endif /* !defined(S_USED) */ | 136 | | #ifdef T_USED | 137 | | #ifdef T_CONST | 138 | | CHUNK T = (CHUNK)op->t.c; | 139 | | #else /* !defined(T_CONST) */ | 140 | | const CHUNK *t = (CHUNK *)(void *)op->t.b.ptr; | 141 | | CHUNK T; | 142 | | int t_skew; | 143 | | #endif /* !defined(T_CONST) */ | 144 | | #else /* !defined(T_USED) */ | 145 | 165k | #define T 0 | 146 | 165k | #undef T_CONST | 147 | 165k | #endif /* !defined(T_USED) */ | 148 | 165k | #if defined(S_SKEW) || defined(T_SKEW) | 149 | 165k | int skewflags = 0; | 150 | 165k | #endif | 151 | 165k | CHUNK D; | 152 | 165k | int dpos = op->dpos; | 153 | 165k | CHUNK *d = (CHUNK *)(void *)d_; | 154 | | | 155 | | /* Align d to CHUNKSIZE */ | 156 | 165k | ADJUST_TO_CHUNK(d,dpos); | 157 | | | 158 | | /* On entry len = length in 'depth' chunks. Change it to be the length | 159 | | * in bits, and add on the number of bits we skip at the start of the | 160 | | * run. */ | 161 | 165k | len = len * op->depth + dpos; | 162 | | | 163 | | /* lmask = the set of bits to alter in the output bitmap on the left | 164 | | * hand edge of the run. rmask = the set of bits NOT to alter in the | 165 | | * output bitmap on the right hand edge of the run. */ | 166 | 165k | lmask = RE((CHUNKONES>>((CHUNKSIZE-1) & dpos))); | 167 | 165k | rmask = RE((CHUNKONES>>((CHUNKSIZE-1) & len))); | 168 | 165k | if (rmask == CHUNKONES) rmask = 0; | 169 | | | 170 | | #if defined(S_CONST) || defined(T_CONST) | 171 | | /* S and T should be supplied as 'depth' bits. Duplicate them up to be | 172 | | * byte size (if they are supplied byte sized, that's fine too). */ | 173 | | if (op->depth & 1) { | 174 | | #ifdef S_CONST | 175 | | S |= S<<1; | 176 | | #endif /* !defined(S_CONST) */ | 177 | | #ifdef T_CONST | 178 | | T |= T<<1; | 179 | | #endif /* !defined(T_CONST) */ | 180 | | } | 181 | | if (op->depth & 3) { | 182 | | #ifdef S_CONST | 183 | | S |= S<<2; | 184 | | #endif /* !defined(S_CONST) */ | 185 | | #ifdef T_CONST | 186 | | T |= T<<2; | 187 | | #endif /* !defined(T_CONST) */ | 188 | | } | 189 | | if (op->depth & 7) { | 190 | | #ifdef S_CONST | 191 | | S |= S<<4; | 192 | | #endif /* !defined(S_CONST) */ | 193 | | #ifdef T_CONST | 194 | | T |= T<<4; | 195 | | #endif /* !defined(T_CONST) */ | 196 | | } | 197 | | #if CHUNKSIZE > 8 | 198 | | if (op->depth & 15) { | 199 | | #ifdef S_CONST | 200 | | S |= S<<8; | 201 | | #endif /* !defined(S_CONST) */ | 202 | | #ifdef T_CONST | 203 | | T |= T<<8; | 204 | | #endif /* !defined(T_CONST) */ | 205 | | } | 206 | | #endif /* CHUNKSIZE > 8 */ | 207 | | #if CHUNKSIZE > 16 | 208 | | if (op->depth & 31) { | 209 | | #ifdef S_CONST | 210 | | S |= S<<16; | 211 | | #endif /* !defined(S_CONST) */ | 212 | | #ifdef T_CONST | 213 | | T |= T<<16; | 214 | | #endif /* !defined(T_CONST) */ | 215 | | } | 216 | | #endif /* CHUNKSIZE > 16 */ | 217 | | #endif /* defined(S_CONST) || defined(T_CONST) */ | 218 | | | 219 | | /* Note #1: This mirrors what the original code did, but I think it has | 220 | | * the risk of moving s and t back beyond officially allocated space. We | 221 | | * may be saved by the fact that all blocks have a word or two in front | 222 | | * of them due to the allocator. If we ever get valgrind properly marking | 223 | | * allocated blocks as readable etc, then this may throw some spurious | 224 | | * errors. RJW. */ | 225 | 165k | #ifdef S_SKEW | 226 | 165k | { | 227 | 165k | int slen, slen2; | 228 | 165k | int spos = op->s.b.pos; | 229 | 165k | ADJUST_TO_CHUNK(s, spos); | 230 | 165k | s_skew = spos - dpos; | 231 | 165k | if (s_skew < 0) { | 232 | 87.1k | s_skew += CHUNKSIZE; | 233 | 87.1k | s--; | 234 | 87.1k | skewflags |= 1; /* Suppress reading off left edge */ | 235 | 87.1k | } | 236 | | /* We are allowed to read all the data bits, so: len - dpos + tpos | 237 | | * We're allowed to read in CHUNKS, so: CHUNKUP(len-dpos+tpos). | 238 | | * This code will actually read CHUNKUP(len)+CHUNKSIZE bits. If | 239 | | * This is larger, then suppress. */ | 240 | 165k | slen = (len + s_skew + CHUNKSIZE-1) & ~(CHUNKSIZE-1); | 241 | 165k | slen2 = (len + CHUNKSIZE + CHUNKSIZE-1) & ~(CHUNKSIZE-1); | 242 | 165k | if ((s_skew == 0) || (slen < slen2)) { | 243 | 149k | skewflags |= 4; /* Suppress reading off the right edge */ | 244 | 149k | } | 245 | 165k | } | 246 | 165k | #endif /* !defined(S_SKEW) */ | 247 | | #ifdef T_SKEW | 248 | | { | 249 | | int tlen, tlen2; | 250 | | int tpos = op->t.b.pos; | 251 | | ADJUST_TO_CHUNK(t, tpos); | 252 | | t_skew = tpos - dpos; | 253 | | if (t_skew < 0) { | 254 | | t_skew += CHUNKSIZE; | 255 | | t--; | 256 | | skewflags |= 2; /* Suppress reading off left edge */ | 257 | | } | 258 | | /* We are allowed to read all the data bits, so: len - dpos + tpos | 259 | | * We're allowed to read in CHUNKS, so: CHUNKUP(len-dpos+tpos). | 260 | | * This code will actually read CHUNKUP(len)+CHUNKSIZE bits. If | 261 | | * This is larger, then suppress. */ | 262 | | tlen = (len + t_skew + CHUNKSIZE-1) & ~(CHUNKSIZE-1); | 263 | | tlen2 = (len + CHUNKSIZE + CHUNKSIZE-1) & ~(CHUNKSIZE-1); | 264 | | if ((t_skew == 0) || (tlen < tlen2)) { | 265 | | skewflags |= 8; /* Suppress reading off the right edge */ | 266 | | } | 267 | | } | 268 | | #endif /* !defined(T_SKEW) */ | 269 | | | 270 | 165k | len -= CHUNKSIZE; /* len = bytes to do - CHUNKSIZE */ | 271 | | /* len <= 0 means 1 word or less to do */ | 272 | 165k | if (len <= 0) { | 273 | | /* Short case - starts and ends in the same chunk */ | 274 | 0 | lmask &= ~rmask; /* Combined mask = bits to alter */ | 275 | 0 | SAFE_FETCH_S(skewflags & 1,skewflags & 4); | 276 | 0 | SAFE_FETCH_T(skewflags & 2,skewflags & 8); | 277 | 0 | SPECIFIC_CODE(D, *d, S, T); | 278 | 0 | *d = (*d & ~lmask) | (D & lmask); | 279 | 0 | return; | 280 | 0 | } | 281 | 165k | if ((lmask != CHUNKONES) | 282 | 165k | #if defined(S_SKEW) || defined(T_SKEW) | 283 | 165k | || (skewflags & 3) | 284 | 165k | #endif | 285 | 165k | ) { | 286 | | /* Unaligned left hand case */ | 287 | 110k | SAFE_FETCH_S(skewflags & 1,s_skew == 0); | 288 | 110k | SAFE_FETCH_T(skewflags & 2,t_skew == 0); | 289 | 110k | SPECIFIC_CODE(D, *d, S, T); | 290 | 110k | *d = (*d & ~lmask) | (D & lmask); | 291 | 110k | d++; | 292 | 110k | len -= CHUNKSIZE; | 293 | 110k | } | 294 | 165k | if (len > 0) { | 295 | | /* Simple middle case (complete destination chunks). */ | 296 | 164k | #ifdef S_SKEW | 297 | 164k | if (s_skew == 0) { | 298 | | #ifdef T_SKEW | 299 | | if (t_skew == 0) { | 300 | | do { | 301 | | SPECIFIC_CODE(*d, *d, *s++, *t++); | 302 | | d++; | 303 | | len -= CHUNKSIZE; | 304 | | } while (len > 0); | 305 | | } else | 306 | | #endif /* !defined(T_SKEW) */ | 307 | 72.3k | { | 308 | 2.89M | do { | 309 | 2.89M | FETCH_T; | 310 | 2.89M | SPECIFIC_CODE(*d, *d, *s++, T); | 311 | 2.89M | d++; | 312 | 2.89M | len -= CHUNKSIZE; | 313 | 2.89M | } while (len > 0); | 314 | 72.3k | } | 315 | 72.3k | } else | 316 | 92.4k | #endif /* !defined(S_SKEW) */ | 317 | 92.4k | { | 318 | | #ifdef T_SKEW | 319 | | if (t_skew == 0) { | 320 | | do { | 321 | | FETCH_S; | 322 | | SPECIFIC_CODE(*d, *d, S, *t++); | 323 | | d++; | 324 | | len -= CHUNKSIZE; | 325 | | } while (len > 0); | 326 | | } else | 327 | | #endif /* !defined(T_SKEW) */ | 328 | 92.4k | { | 329 | 967k | do { | 330 | 967k | FETCH_S; | 331 | 967k | FETCH_T; | 332 | 967k | SPECIFIC_CODE(*d, *d, S, T); | 333 | 967k | d++; | 334 | 967k | len -= CHUNKSIZE; | 335 | 967k | } while (len > 0); | 336 | 92.4k | } | 337 | 92.4k | } | 338 | 164k | } | 339 | | /* Unaligned right hand case */ | 340 | 165k | SAFE_FETCH_S(0,skewflags & 4); | 341 | 165k | SAFE_FETCH_T(0,skewflags & 8); | 342 | 165k | SPECIFIC_CODE(D, *d, S, T); | 343 | 165k | *d = (*d & rmask) | (D & ~rmask); | 344 | 165k | } |
Unexecuted instantiation: gsroprun.c:invert_rop_run1 Unexecuted instantiation: gsroprun.c:xor_rop_run1_const_t Line | Count | Source | 118 | 56.2M | { | 119 | | #ifndef SPECIFIC_CODE | 120 | | rop_proc proc = rop_proc_table[op->rop]; | 121 | | #define SPECIFIC_CODE(OUT_, D_,S_,T_) OUT_ = proc(D_,S_,T_) | 122 | | #endif /* !defined(SPECIFIC_CODE) */ | 123 | 56.2M | CHUNK lmask, rmask; | 124 | 56.2M | #ifdef S_USED | 125 | | #ifdef S_CONST | 126 | | CHUNK S = (CHUNK)op->s.c; | 127 | | #else /* !defined(S_CONST) */ | 128 | 56.2M | const CHUNK *s = (CHUNK *)(void *)op->s.b.ptr; | 129 | 56.2M | CHUNK S; | 130 | 56.2M | int s_skew; | 131 | 56.2M | #endif /* !defined(S_CONST) */ | 132 | | #else /* !defined(S_USED) */ | 133 | | #define S 0 | 134 | | #undef S_CONST | 135 | | #endif /* !defined(S_USED) */ | 136 | | #ifdef T_USED | 137 | | #ifdef T_CONST | 138 | | CHUNK T = (CHUNK)op->t.c; | 139 | | #else /* !defined(T_CONST) */ | 140 | | const CHUNK *t = (CHUNK *)(void *)op->t.b.ptr; | 141 | | CHUNK T; | 142 | | int t_skew; | 143 | | #endif /* !defined(T_CONST) */ | 144 | | #else /* !defined(T_USED) */ | 145 | 56.2M | #define T 0 | 146 | 56.2M | #undef T_CONST | 147 | 56.2M | #endif /* !defined(T_USED) */ | 148 | 56.2M | #if defined(S_SKEW) || defined(T_SKEW) | 149 | 56.2M | int skewflags = 0; | 150 | 56.2M | #endif | 151 | 56.2M | CHUNK D; | 152 | 56.2M | int dpos = op->dpos; | 153 | 56.2M | CHUNK *d = (CHUNK *)(void *)d_; | 154 | | | 155 | | /* Align d to CHUNKSIZE */ | 156 | 56.2M | ADJUST_TO_CHUNK(d,dpos); | 157 | | | 158 | | /* On entry len = length in 'depth' chunks. Change it to be the length | 159 | | * in bits, and add on the number of bits we skip at the start of the | 160 | | * run. */ | 161 | 56.2M | len = len * op->depth + dpos; | 162 | | | 163 | | /* lmask = the set of bits to alter in the output bitmap on the left | 164 | | * hand edge of the run. rmask = the set of bits NOT to alter in the | 165 | | * output bitmap on the right hand edge of the run. */ | 166 | 56.2M | lmask = RE((CHUNKONES>>((CHUNKSIZE-1) & dpos))); | 167 | 56.2M | rmask = RE((CHUNKONES>>((CHUNKSIZE-1) & len))); | 168 | 56.2M | if (rmask == CHUNKONES) rmask = 0; | 169 | | | 170 | | #if defined(S_CONST) || defined(T_CONST) | 171 | | /* S and T should be supplied as 'depth' bits. Duplicate them up to be | 172 | | * byte size (if they are supplied byte sized, that's fine too). */ | 173 | | if (op->depth & 1) { | 174 | | #ifdef S_CONST | 175 | | S |= S<<1; | 176 | | #endif /* !defined(S_CONST) */ | 177 | | #ifdef T_CONST | 178 | | T |= T<<1; | 179 | | #endif /* !defined(T_CONST) */ | 180 | | } | 181 | | if (op->depth & 3) { | 182 | | #ifdef S_CONST | 183 | | S |= S<<2; | 184 | | #endif /* !defined(S_CONST) */ | 185 | | #ifdef T_CONST | 186 | | T |= T<<2; | 187 | | #endif /* !defined(T_CONST) */ | 188 | | } | 189 | | if (op->depth & 7) { | 190 | | #ifdef S_CONST | 191 | | S |= S<<4; | 192 | | #endif /* !defined(S_CONST) */ | 193 | | #ifdef T_CONST | 194 | | T |= T<<4; | 195 | | #endif /* !defined(T_CONST) */ | 196 | | } | 197 | | #if CHUNKSIZE > 8 | 198 | | if (op->depth & 15) { | 199 | | #ifdef S_CONST | 200 | | S |= S<<8; | 201 | | #endif /* !defined(S_CONST) */ | 202 | | #ifdef T_CONST | 203 | | T |= T<<8; | 204 | | #endif /* !defined(T_CONST) */ | 205 | | } | 206 | | #endif /* CHUNKSIZE > 8 */ | 207 | | #if CHUNKSIZE > 16 | 208 | | if (op->depth & 31) { | 209 | | #ifdef S_CONST | 210 | | S |= S<<16; | 211 | | #endif /* !defined(S_CONST) */ | 212 | | #ifdef T_CONST | 213 | | T |= T<<16; | 214 | | #endif /* !defined(T_CONST) */ | 215 | | } | 216 | | #endif /* CHUNKSIZE > 16 */ | 217 | | #endif /* defined(S_CONST) || defined(T_CONST) */ | 218 | | | 219 | | /* Note #1: This mirrors what the original code did, but I think it has | 220 | | * the risk of moving s and t back beyond officially allocated space. We | 221 | | * may be saved by the fact that all blocks have a word or two in front | 222 | | * of them due to the allocator. If we ever get valgrind properly marking | 223 | | * allocated blocks as readable etc, then this may throw some spurious | 224 | | * errors. RJW. */ | 225 | 56.2M | #ifdef S_SKEW | 226 | 56.2M | { | 227 | 56.2M | int slen, slen2; | 228 | 56.2M | int spos = op->s.b.pos; | 229 | 56.2M | ADJUST_TO_CHUNK(s, spos); | 230 | 56.2M | s_skew = spos - dpos; | 231 | 56.2M | if (s_skew < 0) { | 232 | 40.8M | s_skew += CHUNKSIZE; | 233 | 40.8M | s--; | 234 | 40.8M | skewflags |= 1; /* Suppress reading off left edge */ | 235 | 40.8M | } | 236 | | /* We are allowed to read all the data bits, so: len - dpos + tpos | 237 | | * We're allowed to read in CHUNKS, so: CHUNKUP(len-dpos+tpos). | 238 | | * This code will actually read CHUNKUP(len)+CHUNKSIZE bits. If | 239 | | * This is larger, then suppress. */ | 240 | 56.2M | slen = (len + s_skew + CHUNKSIZE-1) & ~(CHUNKSIZE-1); | 241 | 56.2M | slen2 = (len + CHUNKSIZE + CHUNKSIZE-1) & ~(CHUNKSIZE-1); | 242 | 56.2M | if ((s_skew == 0) || (slen < slen2)) { | 243 | 32.3M | skewflags |= 4; /* Suppress reading off the right edge */ | 244 | 32.3M | } | 245 | 56.2M | } | 246 | 56.2M | #endif /* !defined(S_SKEW) */ | 247 | | #ifdef T_SKEW | 248 | | { | 249 | | int tlen, tlen2; | 250 | | int tpos = op->t.b.pos; | 251 | | ADJUST_TO_CHUNK(t, tpos); | 252 | | t_skew = tpos - dpos; | 253 | | if (t_skew < 0) { | 254 | | t_skew += CHUNKSIZE; | 255 | | t--; | 256 | | skewflags |= 2; /* Suppress reading off left edge */ | 257 | | } | 258 | | /* We are allowed to read all the data bits, so: len - dpos + tpos | 259 | | * We're allowed to read in CHUNKS, so: CHUNKUP(len-dpos+tpos). | 260 | | * This code will actually read CHUNKUP(len)+CHUNKSIZE bits. If | 261 | | * This is larger, then suppress. */ | 262 | | tlen = (len + t_skew + CHUNKSIZE-1) & ~(CHUNKSIZE-1); | 263 | | tlen2 = (len + CHUNKSIZE + CHUNKSIZE-1) & ~(CHUNKSIZE-1); | 264 | | if ((t_skew == 0) || (tlen < tlen2)) { | 265 | | skewflags |= 8; /* Suppress reading off the right edge */ | 266 | | } | 267 | | } | 268 | | #endif /* !defined(T_SKEW) */ | 269 | | | 270 | 56.2M | len -= CHUNKSIZE; /* len = bytes to do - CHUNKSIZE */ | 271 | | /* len <= 0 means 1 word or less to do */ | 272 | 56.2M | if (len <= 0) { | 273 | | /* Short case - starts and ends in the same chunk */ | 274 | 368k | lmask &= ~rmask; /* Combined mask = bits to alter */ | 275 | 368k | SAFE_FETCH_S(skewflags & 1,skewflags & 4); | 276 | 368k | SAFE_FETCH_T(skewflags & 2,skewflags & 8); | 277 | 368k | SPECIFIC_CODE(D, *d, S, T); | 278 | 368k | *d = (*d & ~lmask) | (D & lmask); | 279 | 368k | return; | 280 | 368k | } | 281 | 55.8M | if ((lmask != CHUNKONES) | 282 | 55.8M | #if defined(S_SKEW) || defined(T_SKEW) | 283 | 55.8M | || (skewflags & 3) | 284 | 55.8M | #endif | 285 | 55.8M | ) { | 286 | | /* Unaligned left hand case */ | 287 | 45.4M | SAFE_FETCH_S(skewflags & 1,s_skew == 0); | 288 | 45.4M | SAFE_FETCH_T(skewflags & 2,t_skew == 0); | 289 | 45.4M | SPECIFIC_CODE(D, *d, S, T); | 290 | 45.4M | *d = (*d & ~lmask) | (D & lmask); | 291 | 45.4M | d++; | 292 | 45.4M | len -= CHUNKSIZE; | 293 | 45.4M | } | 294 | 55.8M | if (len > 0) { | 295 | | /* Simple middle case (complete destination chunks). */ | 296 | 42.3M | #ifdef S_SKEW | 297 | 42.3M | if (s_skew == 0) { | 298 | | #ifdef T_SKEW | 299 | | if (t_skew == 0) { | 300 | | do { | 301 | | SPECIFIC_CODE(*d, *d, *s++, *t++); | 302 | | d++; | 303 | | len -= CHUNKSIZE; | 304 | | } while (len > 0); | 305 | | } else | 306 | | #endif /* !defined(T_SKEW) */ | 307 | 10.6M | { | 308 | 460M | do { | 309 | 460M | FETCH_T; | 310 | 460M | SPECIFIC_CODE(*d, *d, *s++, T); | 311 | 460M | d++; | 312 | 460M | len -= CHUNKSIZE; | 313 | 460M | } while (len > 0); | 314 | 10.6M | } | 315 | 10.6M | } else | 316 | 31.7M | #endif /* !defined(S_SKEW) */ | 317 | 31.7M | { | 318 | | #ifdef T_SKEW | 319 | | if (t_skew == 0) { | 320 | | do { | 321 | | FETCH_S; | 322 | | SPECIFIC_CODE(*d, *d, S, *t++); | 323 | | d++; | 324 | | len -= CHUNKSIZE; | 325 | | } while (len > 0); | 326 | | } else | 327 | | #endif /* !defined(T_SKEW) */ | 328 | 31.7M | { | 329 | 555M | do { | 330 | 555M | FETCH_S; | 331 | 555M | FETCH_T; | 332 | 555M | SPECIFIC_CODE(*d, *d, S, T); | 333 | 555M | d++; | 334 | 555M | len -= CHUNKSIZE; | 335 | 555M | } while (len > 0); | 336 | 31.7M | } | 337 | 31.7M | } | 338 | 42.3M | } | 339 | | /* Unaligned right hand case */ | 340 | 55.8M | SAFE_FETCH_S(0,skewflags & 4); | 341 | 55.8M | SAFE_FETCH_T(0,skewflags & 8); | 342 | 55.8M | SPECIFIC_CODE(D, *d, S, T); | 343 | 55.8M | *d = (*d & rmask) | (D & ~rmask); | 344 | 55.8M | } |
gsroprun.c:dors_rop_run1_const_t Line | Count | Source | 118 | 3.58M | { | 119 | | #ifndef SPECIFIC_CODE | 120 | | rop_proc proc = rop_proc_table[op->rop]; | 121 | | #define SPECIFIC_CODE(OUT_, D_,S_,T_) OUT_ = proc(D_,S_,T_) | 122 | | #endif /* !defined(SPECIFIC_CODE) */ | 123 | 3.58M | CHUNK lmask, rmask; | 124 | 3.58M | #ifdef S_USED | 125 | | #ifdef S_CONST | 126 | | CHUNK S = (CHUNK)op->s.c; | 127 | | #else /* !defined(S_CONST) */ | 128 | 3.58M | const CHUNK *s = (CHUNK *)(void *)op->s.b.ptr; | 129 | 3.58M | CHUNK S; | 130 | 3.58M | int s_skew; | 131 | 3.58M | #endif /* !defined(S_CONST) */ | 132 | | #else /* !defined(S_USED) */ | 133 | | #define S 0 | 134 | | #undef S_CONST | 135 | | #endif /* !defined(S_USED) */ | 136 | | #ifdef T_USED | 137 | | #ifdef T_CONST | 138 | | CHUNK T = (CHUNK)op->t.c; | 139 | | #else /* !defined(T_CONST) */ | 140 | | const CHUNK *t = (CHUNK *)(void *)op->t.b.ptr; | 141 | | CHUNK T; | 142 | | int t_skew; | 143 | | #endif /* !defined(T_CONST) */ | 144 | | #else /* !defined(T_USED) */ | 145 | 3.58M | #define T 0 | 146 | 3.58M | #undef T_CONST | 147 | 3.58M | #endif /* !defined(T_USED) */ | 148 | 3.58M | #if defined(S_SKEW) || defined(T_SKEW) | 149 | 3.58M | int skewflags = 0; | 150 | 3.58M | #endif | 151 | 3.58M | CHUNK D; | 152 | 3.58M | int dpos = op->dpos; | 153 | 3.58M | CHUNK *d = (CHUNK *)(void *)d_; | 154 | | | 155 | | /* Align d to CHUNKSIZE */ | 156 | 3.58M | ADJUST_TO_CHUNK(d,dpos); | 157 | | | 158 | | /* On entry len = length in 'depth' chunks. Change it to be the length | 159 | | * in bits, and add on the number of bits we skip at the start of the | 160 | | * run. */ | 161 | 3.58M | len = len * op->depth + dpos; | 162 | | | 163 | | /* lmask = the set of bits to alter in the output bitmap on the left | 164 | | * hand edge of the run. rmask = the set of bits NOT to alter in the | 165 | | * output bitmap on the right hand edge of the run. */ | 166 | 3.58M | lmask = RE((CHUNKONES>>((CHUNKSIZE-1) & dpos))); | 167 | 3.58M | rmask = RE((CHUNKONES>>((CHUNKSIZE-1) & len))); | 168 | 3.58M | if (rmask == CHUNKONES) rmask = 0; | 169 | | | 170 | | #if defined(S_CONST) || defined(T_CONST) | 171 | | /* S and T should be supplied as 'depth' bits. Duplicate them up to be | 172 | | * byte size (if they are supplied byte sized, that's fine too). */ | 173 | | if (op->depth & 1) { | 174 | | #ifdef S_CONST | 175 | | S |= S<<1; | 176 | | #endif /* !defined(S_CONST) */ | 177 | | #ifdef T_CONST | 178 | | T |= T<<1; | 179 | | #endif /* !defined(T_CONST) */ | 180 | | } | 181 | | if (op->depth & 3) { | 182 | | #ifdef S_CONST | 183 | | S |= S<<2; | 184 | | #endif /* !defined(S_CONST) */ | 185 | | #ifdef T_CONST | 186 | | T |= T<<2; | 187 | | #endif /* !defined(T_CONST) */ | 188 | | } | 189 | | if (op->depth & 7) { | 190 | | #ifdef S_CONST | 191 | | S |= S<<4; | 192 | | #endif /* !defined(S_CONST) */ | 193 | | #ifdef T_CONST | 194 | | T |= T<<4; | 195 | | #endif /* !defined(T_CONST) */ | 196 | | } | 197 | | #if CHUNKSIZE > 8 | 198 | | if (op->depth & 15) { | 199 | | #ifdef S_CONST | 200 | | S |= S<<8; | 201 | | #endif /* !defined(S_CONST) */ | 202 | | #ifdef T_CONST | 203 | | T |= T<<8; | 204 | | #endif /* !defined(T_CONST) */ | 205 | | } | 206 | | #endif /* CHUNKSIZE > 8 */ | 207 | | #if CHUNKSIZE > 16 | 208 | | if (op->depth & 31) { | 209 | | #ifdef S_CONST | 210 | | S |= S<<16; | 211 | | #endif /* !defined(S_CONST) */ | 212 | | #ifdef T_CONST | 213 | | T |= T<<16; | 214 | | #endif /* !defined(T_CONST) */ | 215 | | } | 216 | | #endif /* CHUNKSIZE > 16 */ | 217 | | #endif /* defined(S_CONST) || defined(T_CONST) */ | 218 | | | 219 | | /* Note #1: This mirrors what the original code did, but I think it has | 220 | | * the risk of moving s and t back beyond officially allocated space. We | 221 | | * may be saved by the fact that all blocks have a word or two in front | 222 | | * of them due to the allocator. If we ever get valgrind properly marking | 223 | | * allocated blocks as readable etc, then this may throw some spurious | 224 | | * errors. RJW. */ | 225 | 3.58M | #ifdef S_SKEW | 226 | 3.58M | { | 227 | 3.58M | int slen, slen2; | 228 | 3.58M | int spos = op->s.b.pos; | 229 | 3.58M | ADJUST_TO_CHUNK(s, spos); | 230 | 3.58M | s_skew = spos - dpos; | 231 | 3.58M | if (s_skew < 0) { | 232 | 3.37M | s_skew += CHUNKSIZE; | 233 | 3.37M | s--; | 234 | 3.37M | skewflags |= 1; /* Suppress reading off left edge */ | 235 | 3.37M | } | 236 | | /* We are allowed to read all the data bits, so: len - dpos + tpos | 237 | | * We're allowed to read in CHUNKS, so: CHUNKUP(len-dpos+tpos). | 238 | | * This code will actually read CHUNKUP(len)+CHUNKSIZE bits. If | 239 | | * This is larger, then suppress. */ | 240 | 3.58M | slen = (len + s_skew + CHUNKSIZE-1) & ~(CHUNKSIZE-1); | 241 | 3.58M | slen2 = (len + CHUNKSIZE + CHUNKSIZE-1) & ~(CHUNKSIZE-1); | 242 | 3.58M | if ((s_skew == 0) || (slen < slen2)) { | 243 | 1.66M | skewflags |= 4; /* Suppress reading off the right edge */ | 244 | 1.66M | } | 245 | 3.58M | } | 246 | 3.58M | #endif /* !defined(S_SKEW) */ | 247 | | #ifdef T_SKEW | 248 | | { | 249 | | int tlen, tlen2; | 250 | | int tpos = op->t.b.pos; | 251 | | ADJUST_TO_CHUNK(t, tpos); | 252 | | t_skew = tpos - dpos; | 253 | | if (t_skew < 0) { | 254 | | t_skew += CHUNKSIZE; | 255 | | t--; | 256 | | skewflags |= 2; /* Suppress reading off left edge */ | 257 | | } | 258 | | /* We are allowed to read all the data bits, so: len - dpos + tpos | 259 | | * We're allowed to read in CHUNKS, so: CHUNKUP(len-dpos+tpos). | 260 | | * This code will actually read CHUNKUP(len)+CHUNKSIZE bits. If | 261 | | * This is larger, then suppress. */ | 262 | | tlen = (len + t_skew + CHUNKSIZE-1) & ~(CHUNKSIZE-1); | 263 | | tlen2 = (len + CHUNKSIZE + CHUNKSIZE-1) & ~(CHUNKSIZE-1); | 264 | | if ((t_skew == 0) || (tlen < tlen2)) { | 265 | | skewflags |= 8; /* Suppress reading off the right edge */ | 266 | | } | 267 | | } | 268 | | #endif /* !defined(T_SKEW) */ | 269 | | | 270 | 3.58M | len -= CHUNKSIZE; /* len = bytes to do - CHUNKSIZE */ | 271 | | /* len <= 0 means 1 word or less to do */ | 272 | 3.58M | if (len <= 0) { | 273 | | /* Short case - starts and ends in the same chunk */ | 274 | 8.45k | lmask &= ~rmask; /* Combined mask = bits to alter */ | 275 | 8.45k | SAFE_FETCH_S(skewflags & 1,skewflags & 4); | 276 | 8.45k | SAFE_FETCH_T(skewflags & 2,skewflags & 8); | 277 | 8.45k | SPECIFIC_CODE(D, *d, S, T); | 278 | 8.45k | *d = (*d & ~lmask) | (D & lmask); | 279 | 8.45k | return; | 280 | 8.45k | } | 281 | 3.58M | if ((lmask != CHUNKONES) | 282 | 3.58M | #if defined(S_SKEW) || defined(T_SKEW) | 283 | 3.58M | || (skewflags & 3) | 284 | 3.58M | #endif | 285 | 3.58M | ) { | 286 | | /* Unaligned left hand case */ | 287 | 3.40M | SAFE_FETCH_S(skewflags & 1,s_skew == 0); | 288 | 3.40M | SAFE_FETCH_T(skewflags & 2,t_skew == 0); | 289 | 3.40M | SPECIFIC_CODE(D, *d, S, T); | 290 | 3.40M | *d = (*d & ~lmask) | (D & lmask); | 291 | 3.40M | d++; | 292 | 3.40M | len -= CHUNKSIZE; | 293 | 3.40M | } | 294 | 3.58M | if (len > 0) { | 295 | | /* Simple middle case (complete destination chunks). */ | 296 | 2.01M | #ifdef S_SKEW | 297 | 2.01M | if (s_skew == 0) { | 298 | | #ifdef T_SKEW | 299 | | if (t_skew == 0) { | 300 | | do { | 301 | | SPECIFIC_CODE(*d, *d, *s++, *t++); | 302 | | d++; | 303 | | len -= CHUNKSIZE; | 304 | | } while (len > 0); | 305 | | } else | 306 | | #endif /* !defined(T_SKEW) */ | 307 | 111k | { | 308 | 796k | do { | 309 | 796k | FETCH_T; | 310 | 796k | SPECIFIC_CODE(*d, *d, *s++, T); | 311 | 796k | d++; | 312 | 796k | len -= CHUNKSIZE; | 313 | 796k | } while (len > 0); | 314 | 111k | } | 315 | 111k | } else | 316 | 1.90M | #endif /* !defined(S_SKEW) */ | 317 | 1.90M | { | 318 | | #ifdef T_SKEW | 319 | | if (t_skew == 0) { | 320 | | do { | 321 | | FETCH_S; | 322 | | SPECIFIC_CODE(*d, *d, S, *t++); | 323 | | d++; | 324 | | len -= CHUNKSIZE; | 325 | | } while (len > 0); | 326 | | } else | 327 | | #endif /* !defined(T_SKEW) */ | 328 | 1.90M | { | 329 | 5.03M | do { | 330 | 5.03M | FETCH_S; | 331 | 5.03M | FETCH_T; | 332 | 5.03M | SPECIFIC_CODE(*d, *d, S, T); | 333 | 5.03M | d++; | 334 | 5.03M | len -= CHUNKSIZE; | 335 | 5.03M | } while (len > 0); | 336 | 1.90M | } | 337 | 1.90M | } | 338 | 2.01M | } | 339 | | /* Unaligned right hand case */ | 340 | 3.58M | SAFE_FETCH_S(0,skewflags & 4); | 341 | 3.58M | SAFE_FETCH_T(0,skewflags & 8); | 342 | 3.58M | SPECIFIC_CODE(D, *d, S, T); | 343 | 3.58M | *d = (*d & rmask) | (D & ~rmask); | 344 | 3.58M | } |
Unexecuted instantiation: gsroprun.c:generic_rop_run1 gsroprun.c:generic_rop_run1_const_t Line | Count | Source | 118 | 2.85M | { | 119 | 2.85M | #ifndef SPECIFIC_CODE | 120 | 2.85M | rop_proc proc = rop_proc_table[op->rop]; | 121 | 2.85M | #define SPECIFIC_CODE(OUT_, D_,S_,T_) OUT_ = proc(D_,S_,T_) | 122 | 2.85M | #endif /* !defined(SPECIFIC_CODE) */ | 123 | 2.85M | CHUNK lmask, rmask; | 124 | 2.85M | #ifdef S_USED | 125 | | #ifdef S_CONST | 126 | | CHUNK S = (CHUNK)op->s.c; | 127 | | #else /* !defined(S_CONST) */ | 128 | 2.85M | const CHUNK *s = (CHUNK *)(void *)op->s.b.ptr; | 129 | 2.85M | CHUNK S; | 130 | 2.85M | int s_skew; | 131 | 2.85M | #endif /* !defined(S_CONST) */ | 132 | | #else /* !defined(S_USED) */ | 133 | | #define S 0 | 134 | | #undef S_CONST | 135 | | #endif /* !defined(S_USED) */ | 136 | 2.85M | #ifdef T_USED | 137 | 2.85M | #ifdef T_CONST | 138 | 2.85M | CHUNK T = (CHUNK)op->t.c; | 139 | | #else /* !defined(T_CONST) */ | 140 | | const CHUNK *t = (CHUNK *)(void *)op->t.b.ptr; | 141 | | CHUNK T; | 142 | | int t_skew; | 143 | | #endif /* !defined(T_CONST) */ | 144 | | #else /* !defined(T_USED) */ | 145 | | #define T 0 | 146 | | #undef T_CONST | 147 | | #endif /* !defined(T_USED) */ | 148 | 2.85M | #if defined(S_SKEW) || defined(T_SKEW) | 149 | 2.85M | int skewflags = 0; | 150 | 2.85M | #endif | 151 | 2.85M | CHUNK D; | 152 | 2.85M | int dpos = op->dpos; | 153 | 2.85M | CHUNK *d = (CHUNK *)(void *)d_; | 154 | | | 155 | | /* Align d to CHUNKSIZE */ | 156 | 2.85M | ADJUST_TO_CHUNK(d,dpos); | 157 | | | 158 | | /* On entry len = length in 'depth' chunks. Change it to be the length | 159 | | * in bits, and add on the number of bits we skip at the start of the | 160 | | * run. */ | 161 | 2.85M | len = len * op->depth + dpos; | 162 | | | 163 | | /* lmask = the set of bits to alter in the output bitmap on the left | 164 | | * hand edge of the run. rmask = the set of bits NOT to alter in the | 165 | | * output bitmap on the right hand edge of the run. */ | 166 | 2.85M | lmask = RE((CHUNKONES>>((CHUNKSIZE-1) & dpos))); | 167 | 2.85M | rmask = RE((CHUNKONES>>((CHUNKSIZE-1) & len))); | 168 | 2.85M | if (rmask == CHUNKONES) rmask = 0; | 169 | | | 170 | 2.85M | #if defined(S_CONST) || defined(T_CONST) | 171 | | /* S and T should be supplied as 'depth' bits. Duplicate them up to be | 172 | | * byte size (if they are supplied byte sized, that's fine too). */ | 173 | 2.85M | if (op->depth & 1) { | 174 | | #ifdef S_CONST | 175 | | S |= S<<1; | 176 | | #endif /* !defined(S_CONST) */ | 177 | 2.85M | #ifdef T_CONST | 178 | 2.85M | T |= T<<1; | 179 | 2.85M | #endif /* !defined(T_CONST) */ | 180 | 2.85M | } | 181 | 2.85M | if (op->depth & 3) { | 182 | | #ifdef S_CONST | 183 | | S |= S<<2; | 184 | | #endif /* !defined(S_CONST) */ | 185 | 2.85M | #ifdef T_CONST | 186 | 2.85M | T |= T<<2; | 187 | 2.85M | #endif /* !defined(T_CONST) */ | 188 | 2.85M | } | 189 | 2.85M | if (op->depth & 7) { | 190 | | #ifdef S_CONST | 191 | | S |= S<<4; | 192 | | #endif /* !defined(S_CONST) */ | 193 | 2.85M | #ifdef T_CONST | 194 | 2.85M | T |= T<<4; | 195 | 2.85M | #endif /* !defined(T_CONST) */ | 196 | 2.85M | } | 197 | 2.85M | #if CHUNKSIZE > 8 | 198 | 2.85M | if (op->depth & 15) { | 199 | | #ifdef S_CONST | 200 | | S |= S<<8; | 201 | | #endif /* !defined(S_CONST) */ | 202 | 2.85M | #ifdef T_CONST | 203 | 2.85M | T |= T<<8; | 204 | 2.85M | #endif /* !defined(T_CONST) */ | 205 | 2.85M | } | 206 | 2.85M | #endif /* CHUNKSIZE > 8 */ | 207 | 2.85M | #if CHUNKSIZE > 16 | 208 | 2.85M | if (op->depth & 31) { | 209 | | #ifdef S_CONST | 210 | | S |= S<<16; | 211 | | #endif /* !defined(S_CONST) */ | 212 | 2.85M | #ifdef T_CONST | 213 | 2.85M | T |= T<<16; | 214 | 2.85M | #endif /* !defined(T_CONST) */ | 215 | 2.85M | } | 216 | 2.85M | #endif /* CHUNKSIZE > 16 */ | 217 | 2.85M | #endif /* defined(S_CONST) || defined(T_CONST) */ | 218 | | | 219 | | /* Note #1: This mirrors what the original code did, but I think it has | 220 | | * the risk of moving s and t back beyond officially allocated space. We | 221 | | * may be saved by the fact that all blocks have a word or two in front | 222 | | * of them due to the allocator. If we ever get valgrind properly marking | 223 | | * allocated blocks as readable etc, then this may throw some spurious | 224 | | * errors. RJW. */ | 225 | 2.85M | #ifdef S_SKEW | 226 | 2.85M | { | 227 | 2.85M | int slen, slen2; | 228 | 2.85M | int spos = op->s.b.pos; | 229 | 2.85M | ADJUST_TO_CHUNK(s, spos); | 230 | 2.85M | s_skew = spos - dpos; | 231 | 2.85M | if (s_skew < 0) { | 232 | 2.56M | s_skew += CHUNKSIZE; | 233 | 2.56M | s--; | 234 | 2.56M | skewflags |= 1; /* Suppress reading off left edge */ | 235 | 2.56M | } | 236 | | /* We are allowed to read all the data bits, so: len - dpos + tpos | 237 | | * We're allowed to read in CHUNKS, so: CHUNKUP(len-dpos+tpos). | 238 | | * This code will actually read CHUNKUP(len)+CHUNKSIZE bits. If | 239 | | * This is larger, then suppress. */ | 240 | 2.85M | slen = (len + s_skew + CHUNKSIZE-1) & ~(CHUNKSIZE-1); | 241 | 2.85M | slen2 = (len + CHUNKSIZE + CHUNKSIZE-1) & ~(CHUNKSIZE-1); | 242 | 2.85M | if ((s_skew == 0) || (slen < slen2)) { | 243 | 1.47M | skewflags |= 4; /* Suppress reading off the right edge */ | 244 | 1.47M | } | 245 | 2.85M | } | 246 | 2.85M | #endif /* !defined(S_SKEW) */ | 247 | | #ifdef T_SKEW | 248 | | { | 249 | | int tlen, tlen2; | 250 | | int tpos = op->t.b.pos; | 251 | | ADJUST_TO_CHUNK(t, tpos); | 252 | | t_skew = tpos - dpos; | 253 | | if (t_skew < 0) { | 254 | | t_skew += CHUNKSIZE; | 255 | | t--; | 256 | | skewflags |= 2; /* Suppress reading off left edge */ | 257 | | } | 258 | | /* We are allowed to read all the data bits, so: len - dpos + tpos | 259 | | * We're allowed to read in CHUNKS, so: CHUNKUP(len-dpos+tpos). | 260 | | * This code will actually read CHUNKUP(len)+CHUNKSIZE bits. If | 261 | | * This is larger, then suppress. */ | 262 | | tlen = (len + t_skew + CHUNKSIZE-1) & ~(CHUNKSIZE-1); | 263 | | tlen2 = (len + CHUNKSIZE + CHUNKSIZE-1) & ~(CHUNKSIZE-1); | 264 | | if ((t_skew == 0) || (tlen < tlen2)) { | 265 | | skewflags |= 8; /* Suppress reading off the right edge */ | 266 | | } | 267 | | } | 268 | | #endif /* !defined(T_SKEW) */ | 269 | | | 270 | 2.85M | len -= CHUNKSIZE; /* len = bytes to do - CHUNKSIZE */ | 271 | | /* len <= 0 means 1 word or less to do */ | 272 | 2.85M | if (len <= 0) { | 273 | | /* Short case - starts and ends in the same chunk */ | 274 | 3.61k | lmask &= ~rmask; /* Combined mask = bits to alter */ | 275 | 3.61k | SAFE_FETCH_S(skewflags & 1,skewflags & 4); | 276 | 3.61k | SAFE_FETCH_T(skewflags & 2,skewflags & 8); | 277 | 3.61k | SPECIFIC_CODE(D, *d, S, T); | 278 | 3.61k | *d = (*d & ~lmask) | (D & lmask); | 279 | 3.61k | return; | 280 | 3.61k | } | 281 | 2.84M | if ((lmask != CHUNKONES) | 282 | 2.84M | #if defined(S_SKEW) || defined(T_SKEW) | 283 | 2.84M | || (skewflags & 3) | 284 | 2.84M | #endif | 285 | 2.84M | ) { | 286 | | /* Unaligned left hand case */ | 287 | 2.65M | SAFE_FETCH_S(skewflags & 1,s_skew == 0); | 288 | 2.65M | SAFE_FETCH_T(skewflags & 2,t_skew == 0); | 289 | 2.65M | SPECIFIC_CODE(D, *d, S, T); | 290 | 2.65M | *d = (*d & ~lmask) | (D & lmask); | 291 | 2.65M | d++; | 292 | 2.65M | len -= CHUNKSIZE; | 293 | 2.65M | } | 294 | 2.84M | if (len > 0) { | 295 | | /* Simple middle case (complete destination chunks). */ | 296 | 1.77M | #ifdef S_SKEW | 297 | 1.77M | if (s_skew == 0) { | 298 | | #ifdef T_SKEW | 299 | | if (t_skew == 0) { | 300 | | do { | 301 | | SPECIFIC_CODE(*d, *d, *s++, *t++); | 302 | | d++; | 303 | | len -= CHUNKSIZE; | 304 | | } while (len > 0); | 305 | | } else | 306 | | #endif /* !defined(T_SKEW) */ | 307 | 234k | { | 308 | 3.00M | do { | 309 | 3.00M | FETCH_T; | 310 | 3.00M | SPECIFIC_CODE(*d, *d, *s++, T); | 311 | 3.00M | d++; | 312 | 3.00M | len -= CHUNKSIZE; | 313 | 3.00M | } while (len > 0); | 314 | 234k | } | 315 | 234k | } else | 316 | 1.53M | #endif /* !defined(S_SKEW) */ | 317 | 1.53M | { | 318 | | #ifdef T_SKEW | 319 | | if (t_skew == 0) { | 320 | | do { | 321 | | FETCH_S; | 322 | | SPECIFIC_CODE(*d, *d, S, *t++); | 323 | | d++; | 324 | | len -= CHUNKSIZE; | 325 | | } while (len > 0); | 326 | | } else | 327 | | #endif /* !defined(T_SKEW) */ | 328 | 1.53M | { | 329 | 4.34M | do { | 330 | 4.34M | FETCH_S; | 331 | 4.34M | FETCH_T; | 332 | 4.34M | SPECIFIC_CODE(*d, *d, S, T); | 333 | 4.34M | d++; | 334 | 4.34M | len -= CHUNKSIZE; | 335 | 4.34M | } while (len > 0); | 336 | 1.53M | } | 337 | 1.53M | } | 338 | 1.77M | } | 339 | | /* Unaligned right hand case */ | 340 | 2.84M | SAFE_FETCH_S(0,skewflags & 4); | 341 | 2.84M | SAFE_FETCH_T(0,skewflags & 8); | 342 | 2.84M | SPECIFIC_CODE(D, *d, S, T); | 343 | 2.84M | *d = (*d & rmask) | (D & ~rmask); | 344 | 2.84M | } |
Unexecuted instantiation: gsroprun.c:generic_rop_run1_const_st |
345 | | |
346 | | #undef ADJUST_TO_CHUNK |
347 | | #undef CHUNKSIZE |
348 | | #undef CHUNK |
349 | | #undef CHUNKONES |
350 | | #undef FETCH_S |
351 | | #undef FETCH_T |
352 | | #undef SAFE_FETCH_S |
353 | | #undef SAFE_FETCH_T |
354 | | #undef RE |
355 | | #undef S |
356 | | #undef S_USED |
357 | | #undef S_CONST |
358 | | #undef S_SKEW |
359 | | #undef SKEW_FETCH |
360 | | #undef SAFE_SKEW_FETCH |
361 | | #undef SPECIFIC_CODE |
362 | | #undef SPECIFIC_ROP |
363 | | #undef T |
364 | | #undef T_USED |
365 | | #undef T_CONST |
366 | | #undef T_SKEW |
367 | | #undef TEMPLATE_NAME |
368 | | #undef ROP_PTRDIFF_T |
369 | | |
370 | | #else |
371 | | int dummy; |
372 | | #endif |