/src/ghostpdl/psi/igcstr.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2001-2024 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
13 | | CA 94129, USA, for further information. |
14 | | */ |
15 | | |
16 | | |
17 | | /* String GC routines for Ghostscript */ |
18 | | #include "memory_.h" |
19 | | #include "ghost.h" |
20 | | #include "gsmdebug.h" |
21 | | #include "gsstruct.h" |
22 | | #include "iastate.h" |
23 | | #include "igcstr.h" |
24 | | #include "igc.h" |
25 | | |
26 | | /* Forward references */ |
27 | | static bool gc_mark_string(const byte *, uint, bool, const clump_t *); |
28 | | |
29 | | /* (Un)mark the strings in a clump. */ |
30 | | void |
31 | | gc_strings_set_marks(clump_t * cp, bool mark) |
32 | 72.3M | { |
33 | 72.3M | if (cp->smark != 0) { |
34 | 48.1M | if_debug3('6', "[6]clearing string marks "PRI_INTPTR"[%u] to %d\n", |
35 | 48.1M | (intptr_t)cp->smark, cp->smark_size, (int)mark); |
36 | 48.1M | memset(cp->smark, 0, cp->smark_size); |
37 | 48.1M | if (mark) |
38 | 3.11M | gc_mark_string(cp->sbase, (cp->climit - cp->sbase), true, cp); |
39 | 48.1M | } |
40 | 72.3M | } |
41 | | |
42 | | /* We mark strings a word at a time. */ |
43 | | typedef string_mark_unit bword; |
44 | | |
45 | 15.5G | #define bword_log2_bytes log2_sizeof_string_mark_unit |
46 | | #define bword_bytes (1 << bword_log2_bytes) |
47 | 15.5G | #define bword_log2_bits (bword_log2_bytes + 3) |
48 | 15.5G | #define bword_bits (1 << bword_log2_bits) |
49 | 12.1G | #define bword_1s (~(bword)0) |
50 | | /* Compensate for byte order reversal if necessary. */ |
51 | | #if ARCH_IS_BIG_ENDIAN |
52 | | # if bword_bytes == 2 |
53 | | # define bword_swap_bytes(m) m = (m << 8) | (m >> 8) |
54 | | # else /* bword_bytes == 4 */ |
55 | | # define bword_swap_bytes(m)\ |
56 | | m = (m << 24) | ((m & 0xff00) << 8) | ((m >> 8) & 0xff00) | (m >> 24) |
57 | | # endif |
58 | | #else |
59 | 7.04G | # define bword_swap_bytes(m) DO_NOTHING |
60 | | #endif |
61 | | |
62 | | /* (Un)mark a string in a known clump. Return true iff any new marks. */ |
63 | | static bool |
64 | | gc_mark_string(const byte * ptr, uint size, bool set, const clump_t * cp) |
65 | 2.41G | { |
66 | 2.41G | uint offset = ptr - cp->sbase; |
67 | 2.41G | bword *bp = (bword *) (cp->smark + ((offset & -bword_bits) >> 3)); |
68 | 2.41G | uint bn = offset & (bword_bits - 1); |
69 | 2.41G | bword m = (bword)(((uint64_t)bword_1s << bn) & bword_1s); |
70 | 2.41G | uint left = size; |
71 | 2.41G | bword marks = 0; |
72 | | |
73 | 2.41G | bword_swap_bytes(m); |
74 | 2.41G | if (set) { |
75 | 2.41G | if (left + bn >= bword_bits) { |
76 | 896M | marks |= ~*bp & m; |
77 | 896M | *bp |= m; |
78 | 896M | m = bword_1s, left -= bword_bits - bn, bp++; |
79 | 3.40G | while (left >= bword_bits) { |
80 | 2.50G | marks |= ~*bp; |
81 | 2.50G | *bp = bword_1s; |
82 | 2.50G | left -= bword_bits, bp++; |
83 | 2.50G | } |
84 | 896M | } |
85 | 2.41G | if (left) { |
86 | 2.31G | bword_swap_bytes(m); |
87 | 2.31G | m = (bword)(((uint64_t)m - ((uint64_t)m << left)) & bword_1s); |
88 | 2.31G | bword_swap_bytes(m); |
89 | 2.31G | marks |= ~*bp & m; |
90 | 2.31G | *bp |= m; |
91 | 2.31G | } |
92 | 2.41G | } else { |
93 | 0 | if (left + bn >= bword_bits) { |
94 | 0 | *bp &= ~m; |
95 | 0 | m = bword_1s, left -= bword_bits - bn, bp++; |
96 | 0 | if (left >= bword_bits * 5) { |
97 | 0 | memset(bp, 0, (left & -bword_bits) >> 3); |
98 | 0 | bp += left >> bword_log2_bits; |
99 | 0 | left &= bword_bits - 1; |
100 | 0 | } else |
101 | 0 | while (left >= bword_bits) { |
102 | 0 | *bp = 0; |
103 | 0 | left -= bword_bits, bp++; |
104 | 0 | } |
105 | 0 | } |
106 | 0 | if (left) { |
107 | 0 | bword_swap_bytes(m); |
108 | 0 | m = (bword)(((uint64_t)m - ((uint64_t)m << left)) & bword_1s); |
109 | 0 | bword_swap_bytes(m); |
110 | 0 | *bp &= ~m; |
111 | 0 | } |
112 | 0 | } |
113 | 2.41G | return marks != 0; |
114 | 2.41G | } |
115 | | |
116 | | #ifdef DEBUG |
117 | | /* Print a string for debugging. We need this because there is no d--- |
118 | | * equivalent of fwrite. |
119 | | */ |
120 | | static void |
121 | | dmfwrite(const gs_memory_t *mem, const byte *ptr, uint count) |
122 | | { |
123 | | uint i; |
124 | | for (i = 0; i < count; ++i) |
125 | | dmputc(mem, ptr[i]); |
126 | | } |
127 | | #endif |
128 | | |
129 | | /* Mark a string. Return true if any new marks. */ |
130 | | bool |
131 | | gc_string_mark(const byte * ptr, uint size, bool set, gc_state_t * gcst) |
132 | 2.60G | { |
133 | 2.60G | const clump_t *cp; |
134 | 2.60G | bool marks; |
135 | | |
136 | 2.60G | if (size == 0) |
137 | 194M | return false; |
138 | 2.41G | #define dmprintstr(mem)\ |
139 | 2.41G | dmputc(mem, '('); dmfwrite(mem, ptr, min(size, 20));\ |
140 | 2.41G | dmputs(mem, (size <= 20 ? ")" : "...)")) |
141 | 2.41G | if (!(cp = gc_locate(ptr, gcst))) { /* not in a clump */ |
142 | | #ifdef DEBUG |
143 | | if (gs_debug_c('5')) { |
144 | | dmlprintf2(gcst->heap, "[5]"PRI_INTPTR"[%u]", (intptr_t)ptr, size); |
145 | | dmprintstr(gcst->heap); |
146 | | dmputs(gcst->heap, " not in a clump\n"); |
147 | | } |
148 | | #endif |
149 | 79.6k | return false; |
150 | 79.6k | } |
151 | 2.41G | if (cp->smark == 0) /* not marking strings */ |
152 | 0 | return false; |
153 | | #ifdef DEBUG |
154 | | if (ptr < cp->ctop) { |
155 | | if_debug4('6', "String pointer "PRI_INTPTR"[%u] outside ["PRI_INTPTR".."PRI_INTPTR")\n", |
156 | | (intptr_t)ptr, size, (intptr_t)cp->ctop, (intptr_t)cp->climit); |
157 | | return false; |
158 | | } else if (ptr + size > cp->climit) { /* |
159 | | * If this is the bottommost string in a clump that has |
160 | | * an inner clump, the string's starting address is both |
161 | | * cp->ctop of the outer clump and cp->climit of the inner; |
162 | | * gc_locate may incorrectly attribute the string to the |
163 | | * inner clump because of this. This doesn't affect |
164 | | * marking or relocation, since the machinery for these |
165 | | * is all associated with the outermost clump, |
166 | | * but it can cause the validity check to fail. |
167 | | * Check for this case now. |
168 | | */ |
169 | | const clump_t *scp = cp; |
170 | | |
171 | | while (ptr == scp->climit && scp->outer != 0) |
172 | | scp = scp->outer; |
173 | | if (ptr + size > scp->climit) { |
174 | | if_debug4('6', "String pointer "PRI_INTPTR"[%u] outside ["PRI_INTPTR".."PRI_INTPTR")\n", |
175 | | (intptr_t)ptr, size, |
176 | | (intptr_t)scp->ctop, (intptr_t)scp->climit); |
177 | | return false; |
178 | | } |
179 | | } |
180 | | #endif |
181 | 2.41G | marks = gc_mark_string(ptr, size, set, cp); |
182 | | #ifdef DEBUG |
183 | | if (gs_debug_c('5')) { |
184 | | dmlprintf4(gcst->heap, "[5]%s%smarked "PRI_INTPTR"[%u]", |
185 | | (marks ? "" : "already "), (set ? "" : "un"), |
186 | | (intptr_t)ptr, size); |
187 | | dmprintstr(gcst->heap); |
188 | | dmputc(gcst->heap, '\n'); |
189 | | } |
190 | | #endif |
191 | 2.41G | return marks; |
192 | 2.41G | } |
193 | | |
194 | | /* Clear the relocation for strings. */ |
195 | | /* This requires setting the marks. */ |
196 | | void |
197 | | gc_strings_clear_reloc(clump_t * cp) |
198 | 2.27M | { |
199 | 2.27M | if (cp->sreloc != 0) { |
200 | 1.55M | gc_strings_set_marks(cp, true); |
201 | 1.55M | if_debug1('6', "[6]clearing string reloc "PRI_INTPTR"\n", |
202 | 1.55M | (intptr_t)cp->sreloc); |
203 | 1.55M | gc_strings_set_reloc(cp); |
204 | 1.55M | } |
205 | 2.27M | } |
206 | | |
207 | | /* Count the 0-bits in a byte. */ |
208 | | static const byte count_zero_bits_table[256] = |
209 | | { |
210 | | #define o4(n) n,n-1,n-1,n-2 |
211 | | #define o16(n) o4(n),o4(n-1),o4(n-1),o4(n-2) |
212 | | #define o64(n) o16(n),o16(n-1),o16(n-1),o16(n-2) |
213 | | o64(8), o64(7), o64(7), o64(6) |
214 | | }; |
215 | | |
216 | | #define byte_count_zero_bits(byt)\ |
217 | 4.94G | (uint)(count_zero_bits_table[byt]) |
218 | | #define byte_count_one_bits(byt)\ |
219 | 11.3G | (uint)(8 - count_zero_bits_table[byt]) |
220 | | |
221 | | /* Set the relocation for the strings in a clump. */ |
222 | | /* The sreloc table stores the relocated offset from climit for */ |
223 | | /* the beginning of each block of string_data_quantum characters. */ |
224 | | void |
225 | | gc_strings_set_reloc(clump_t * cp) |
226 | 70.1M | { |
227 | 70.1M | if (cp->sreloc != 0 && cp->smark != 0) { |
228 | 46.5M | byte *bot = cp->ctop; |
229 | 46.5M | byte *top = cp->climit; |
230 | 46.5M | uint count = |
231 | 46.5M | (top - bot + (string_data_quantum - 1)) >> |
232 | 46.5M | log2_string_data_quantum; |
233 | 46.5M | string_reloc_offset *relp = |
234 | 46.5M | cp->sreloc + |
235 | 46.5M | (cp->smark_size >> (log2_string_data_quantum - 3)); |
236 | 46.5M | register const byte *bitp = cp->smark + cp->smark_size; |
237 | 46.5M | register string_reloc_offset reloc = 0; |
238 | | |
239 | | /* Skip initial unrelocated strings quickly. */ |
240 | 46.5M | #if string_data_quantum == bword_bits || string_data_quantum == bword_bits * 2 |
241 | 46.5M | { |
242 | | /* Work around the alignment aliasing bug. */ |
243 | 46.5M | const bword *wp = (const bword *)bitp; |
244 | | |
245 | | #if string_data_quantum == bword_bits |
246 | | # define RELOC_TEST_1S(wp) (wp[-1]) |
247 | | #else /* string_data_quantum == bword_bits * 2 */ |
248 | 554M | # define RELOC_TEST_1S(wp) (wp[-1] & wp[-2]) |
249 | 46.5M | #endif |
250 | 580M | while (count && RELOC_TEST_1S(wp) == bword_1s) { |
251 | 533M | wp -= string_data_quantum / bword_bits; |
252 | 533M | *--relp = reloc += string_data_quantum; |
253 | 533M | --count; |
254 | 533M | } |
255 | 46.5M | #undef RELOC_TEST_1S |
256 | 46.5M | bitp = (const byte *)wp; |
257 | 46.5M | } |
258 | 46.5M | #endif |
259 | 664M | while (count--) { |
260 | 618M | bitp -= string_data_quantum / 8; |
261 | 618M | reloc += string_data_quantum - |
262 | 618M | byte_count_zero_bits(bitp[0]); |
263 | 618M | reloc -= byte_count_zero_bits(bitp[1]); |
264 | 618M | reloc -= byte_count_zero_bits(bitp[2]); |
265 | 618M | reloc -= byte_count_zero_bits(bitp[3]); |
266 | 618M | #if log2_string_data_quantum > 5 |
267 | 618M | reloc -= byte_count_zero_bits(bitp[4]); |
268 | 618M | reloc -= byte_count_zero_bits(bitp[5]); |
269 | 618M | reloc -= byte_count_zero_bits(bitp[6]); |
270 | 618M | reloc -= byte_count_zero_bits(bitp[7]); |
271 | 618M | #endif |
272 | 618M | *--relp = reloc; |
273 | 618M | } |
274 | 46.5M | } |
275 | 70.1M | cp->sdest = cp->climit; |
276 | 70.1M | } |
277 | | |
278 | | /* Relocate a string pointer. */ |
279 | | void |
280 | | igc_reloc_string(gs_string * sptr, gc_state_t * gcst) |
281 | 2.53G | { |
282 | 2.53G | byte *ptr; |
283 | 2.53G | const clump_t *cp; |
284 | 2.53G | uint offset; |
285 | 2.53G | uint reloc; |
286 | 2.53G | const byte *bitp; |
287 | 2.53G | byte byt; |
288 | | |
289 | 2.53G | if (sptr->size == 0) { |
290 | 26.0M | sptr->data = 0; |
291 | 26.0M | return; |
292 | 26.0M | } |
293 | 2.50G | ptr = sptr->data; |
294 | | |
295 | 2.50G | if (!(cp = gc_locate(ptr, gcst))) /* not in a clump */ |
296 | 79.6k | return; |
297 | 2.50G | if (cp->sreloc == 0 || cp->smark == 0) /* not marking strings */ |
298 | 0 | return; |
299 | 2.50G | offset = ptr - cp->sbase; |
300 | 2.50G | reloc = cp->sreloc[offset >> log2_string_data_quantum]; |
301 | 2.50G | bitp = &cp->smark[offset >> 3]; |
302 | 2.50G | switch (offset & (string_data_quantum - 8)) { |
303 | 0 | #if log2_string_data_quantum > 5 |
304 | 316M | case 56: |
305 | 316M | reloc -= byte_count_one_bits(bitp[-7]); |
306 | 632M | case 48: |
307 | 632M | reloc -= byte_count_one_bits(bitp[-6]); |
308 | 956M | case 40: |
309 | 956M | reloc -= byte_count_one_bits(bitp[-5]); |
310 | 1.27G | case 32: |
311 | 1.27G | reloc -= byte_count_one_bits(bitp[-4]); |
312 | 1.27G | #endif |
313 | 1.58G | case 24: |
314 | 1.58G | reloc -= byte_count_one_bits(bitp[-3]); |
315 | 1.88G | case 16: |
316 | 1.88G | reloc -= byte_count_one_bits(bitp[-2]); |
317 | 2.19G | case 8: |
318 | 2.19G | reloc -= byte_count_one_bits(bitp[-1]); |
319 | 2.50G | } |
320 | 2.50G | byt = *bitp & (0xff >> (8 - (offset & 7))); |
321 | 2.50G | reloc -= byte_count_one_bits(byt); |
322 | 2.50G | if_debug2('5', "[5]relocate string "PRI_INTPTR" to 0x%lx\n", |
323 | 2.50G | (intptr_t)ptr, (intptr_t)(cp->sdest - reloc)); |
324 | 2.50G | sptr->data = (cp->sdest - reloc); |
325 | 2.50G | } |
326 | | void |
327 | | igc_reloc_const_string(gs_const_string * sptr, gc_state_t * gcst) |
328 | 2.20G | { /* We assume the representation of byte * and const byte * is */ |
329 | | /* the same.... */ |
330 | 2.20G | igc_reloc_string((gs_string *) sptr, gcst); |
331 | 2.20G | } |
332 | | void |
333 | | igc_reloc_param_string(gs_param_string * sptr, gc_state_t * gcst) |
334 | 1.95M | { |
335 | 1.95M | if (!sptr->persistent) { |
336 | | /* We assume that gs_param_string is a subclass of gs_string. */ |
337 | 1.95M | igc_reloc_string((gs_string *)sptr, gcst); |
338 | 1.95M | } |
339 | 1.95M | } |
340 | | |
341 | | /* Compact the strings in a clump. */ |
342 | | void |
343 | | gc_strings_compact(clump_t * cp, const gs_memory_t *mem) |
344 | 68.5M | { |
345 | 68.5M | if (cp->smark != 0) { |
346 | 45.0M | byte *hi = cp->climit; |
347 | 45.0M | byte *lo = cp->ctop; |
348 | 45.0M | const byte *from = hi; |
349 | 45.0M | byte *to = hi; |
350 | 45.0M | const byte *bp = cp->smark + cp->smark_size; |
351 | | |
352 | | #ifdef DEBUG |
353 | | if (gs_debug_c('4') || gs_debug_c('5')) { |
354 | | byte *base = cp->sbase; |
355 | | uint i = (lo - base) & -string_data_quantum; |
356 | | uint n = ROUND_UP(hi - base, string_data_quantum); |
357 | | |
358 | | #define R 16 |
359 | | for (; i < n; i += R) { |
360 | | uint j; |
361 | | |
362 | | dmlprintf1(mem, "[4]"PRI_INTPTR": ", (intptr_t) (base + i)); |
363 | | for (j = i; j < i + R; j++) { |
364 | | byte ch = base[j]; |
365 | | |
366 | | if (ch <= 31) { |
367 | | dmputc(mem, '^'); |
368 | | dmputc(mem, ch + 0100); |
369 | | } else |
370 | | dmputc(mem, ch); |
371 | | } |
372 | | dmputc(mem, ' '); |
373 | | for (j = i; j < i + R; j++) |
374 | | dmputc(mem, (cp->smark[j >> 3] & (1 << (j & 7)) ? |
375 | | '+' : '.')); |
376 | | #undef R |
377 | | if (!(i & (string_data_quantum - 1))) |
378 | | dmprintf1(mem, " %u", cp->sreloc[i >> log2_string_data_quantum]); |
379 | | dmputc(mem, '\n'); |
380 | | } |
381 | | } |
382 | | #endif |
383 | | /* |
384 | | * Skip unmodified strings quickly. We know that cp->smark is |
385 | | * aligned to a string_mark_unit. |
386 | | */ |
387 | 45.0M | { |
388 | | /* Work around the alignment aliasing bug. */ |
389 | 45.0M | const bword *wp = (const bword *)bp; |
390 | | |
391 | 1.05G | while (to > lo && wp[-1] == bword_1s) |
392 | 1.01G | to -= bword_bits, --wp; |
393 | 45.0M | bp = (const byte *)wp; |
394 | 66.3M | while (to > lo && bp[-1] == 0xff) |
395 | 21.3M | to -= 8, --bp; |
396 | 45.0M | } |
397 | 45.0M | from = to; |
398 | | |
399 | 4.85G | while (from > lo) { |
400 | 4.81G | byte b = *--bp; |
401 | | |
402 | 4.81G | from -= 8; |
403 | 4.81G | switch (b) { |
404 | 857M | case 0xff: |
405 | 857M | to -= 8; |
406 | | /* |
407 | | * Since we've seen a byte other than 0xff, we know |
408 | | * to != from at this point. |
409 | | */ |
410 | 857M | to[7] = from[7]; |
411 | 857M | to[6] = from[6]; |
412 | 857M | to[5] = from[5]; |
413 | 857M | to[4] = from[4]; |
414 | 857M | to[3] = from[3]; |
415 | 857M | to[2] = from[2]; |
416 | 857M | to[1] = from[1]; |
417 | 857M | to[0] = from[0]; |
418 | 857M | break; |
419 | 70.0M | default: |
420 | 70.0M | if (b & 0x80) |
421 | 41.2M | *--to = from[7]; |
422 | 70.0M | if (b & 0x40) |
423 | 40.6M | *--to = from[6]; |
424 | 70.0M | if (b & 0x20) |
425 | 38.9M | *--to = from[5]; |
426 | 70.0M | if (b & 0x10) |
427 | 40.3M | *--to = from[4]; |
428 | 70.0M | if (b & 8) |
429 | 36.7M | *--to = from[3]; |
430 | 70.0M | if (b & 4) |
431 | 34.1M | *--to = from[2]; |
432 | 70.0M | if (b & 2) |
433 | 29.7M | *--to = from[1]; |
434 | 70.0M | if (b & 1) |
435 | 28.4M | *--to = from[0]; |
436 | | /* falls through */ |
437 | 3.95G | case 0: |
438 | 3.95G | ; |
439 | 4.81G | } |
440 | 4.81G | } |
441 | 45.0M | gs_alloc_fill(cp->ctop, gs_alloc_fill_collected, |
442 | 45.0M | to - cp->ctop); |
443 | 45.0M | cp->ctop = to; |
444 | 45.0M | } |
445 | 68.5M | } |