Line | Count | Source |
1 | | /* ccl - routines for character classes */ |
2 | | |
3 | | /* Copyright (c) 1990 The Regents of the University of California. */ |
4 | | /* All rights reserved. */ |
5 | | |
6 | | /* This code is derived from software contributed to Berkeley by */ |
7 | | /* Vern Paxson. */ |
8 | | |
9 | | /* The United States Government has rights in this work pursuant */ |
10 | | /* to contract no. DE-AC03-76SF00098 between the United States */ |
11 | | /* Department of Energy and the University of California. */ |
12 | | |
13 | | /* This file is part of flex. */ |
14 | | |
15 | | /* Redistribution and use in source and binary forms, with or without */ |
16 | | /* modification, are permitted provided that the following conditions */ |
17 | | /* are met: */ |
18 | | |
19 | | /* 1. Redistributions of source code must retain the above copyright */ |
20 | | /* notice, this list of conditions and the following disclaimer. */ |
21 | | /* 2. Redistributions in binary form must reproduce the above copyright */ |
22 | | /* notice, this list of conditions and the following disclaimer in the */ |
23 | | /* documentation and/or other materials provided with the distribution. */ |
24 | | |
25 | | /* Neither the name of the University nor the names of its contributors */ |
26 | | /* may be used to endorse or promote products derived from this software */ |
27 | | /* without specific prior written permission. */ |
28 | | |
29 | | /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ |
30 | | /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ |
31 | | /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ |
32 | | /* PURPOSE. */ |
33 | | |
34 | | #include "flexdef.h" |
35 | | |
36 | | /* return true if the chr is in the ccl. Takes negation into account. */ |
37 | | static bool |
38 | | ccl_contains (const int cclp, const int ch) |
39 | 0 | { |
40 | 0 | int ind, len, i; |
41 | |
|
42 | 0 | len = ccllen[cclp]; |
43 | 0 | ind = cclmap[cclp]; |
44 | |
|
45 | 0 | for (i = 0; i < len; ++i) |
46 | 0 | if (ccltbl[ind + i] == ch) |
47 | 0 | return !cclng[cclp]; |
48 | | |
49 | 0 | return cclng[cclp]; |
50 | 0 | } |
51 | | |
52 | | |
53 | | /* ccladd - add a single character to a ccl */ |
54 | | |
55 | | void ccladd (int cclp, int ch) |
56 | 0 | { |
57 | 0 | int ind, len, newpos, i; |
58 | |
|
59 | 0 | check_char (ch); |
60 | |
|
61 | 0 | len = ccllen[cclp]; |
62 | 0 | ind = cclmap[cclp]; |
63 | | |
64 | | /* check to see if the character is already in the ccl */ |
65 | |
|
66 | 0 | for (i = 0; i < len; ++i) |
67 | 0 | if (ccltbl[ind + i] == ch) |
68 | 0 | return; |
69 | | |
70 | | /* mark newlines */ |
71 | 0 | if (ch == nlch) |
72 | 0 | ccl_has_nl[cclp] = true; |
73 | |
|
74 | 0 | newpos = ind + len; |
75 | | |
76 | | /* For a non-last cclp, expanding the set will overflow and overwrite a |
77 | | * char in the next cclp. |
78 | | * FIXME: Need another allocation scheme for ccl's. */ |
79 | 0 | if (cclp != lastccl) { |
80 | 0 | flexfatal(_("internal error: trying to add a char to a non-last ccl.\n")); |
81 | 0 | } |
82 | |
|
83 | 0 | if (newpos >= current_max_ccl_tbl_size) { |
84 | 0 | current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT; |
85 | |
|
86 | 0 | ++num_reallocs; |
87 | |
|
88 | 0 | ccltbl = reallocate_Character_array (ccltbl, |
89 | 0 | current_max_ccl_tbl_size); |
90 | 0 | } |
91 | |
|
92 | 0 | ccllen[cclp] = len + 1; |
93 | 0 | ccltbl[newpos] = (unsigned char) ch; |
94 | 0 | } |
95 | | |
96 | | /* dump_cclp - same thing as list_character_set, but for cclps. */ |
97 | | |
98 | | static void dump_cclp (FILE* file, int cclp) |
99 | 0 | { |
100 | 0 | int i; |
101 | 0 |
|
102 | 0 | putc ('[', file); |
103 | 0 |
|
104 | 0 | for (i = 0; i < ctrl.csize; ++i) { |
105 | 0 | if (ccl_contains(cclp, i)){ |
106 | 0 | int start_char = i; |
107 | 0 |
|
108 | 0 | putc (' ', file); |
109 | 0 |
|
110 | 0 | fputs (readable_form (i), file); |
111 | 0 |
|
112 | 0 | while (++i < ctrl.csize && ccl_contains(cclp,i)) ; |
113 | 0 |
|
114 | 0 | if (i - 1 > start_char) |
115 | 0 | /* this was a run */ |
116 | 0 | fprintf (file, "-%s", |
117 | 0 | readable_form (i - 1)); |
118 | 0 |
|
119 | 0 | putc (' ', file); |
120 | 0 | } |
121 | 0 | } |
122 | 0 |
|
123 | 0 | putc (']', file); |
124 | 0 | } |
125 | | |
126 | | |
127 | | |
128 | | /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */ |
129 | | int |
130 | | ccl_set_diff (int a, int b) |
131 | 0 | { |
132 | 0 | int d, ch; |
133 | | |
134 | | /* create new class */ |
135 | 0 | d = cclinit(); |
136 | | |
137 | | /* In order to handle negation, we spin through all possible chars, |
138 | | * adding each char in a that is not in b. |
139 | | * (This could be O(n^2), but n is small and bounded.) |
140 | | */ |
141 | 0 | for ( ch = 0; ch < ctrl.csize; ++ch ) |
142 | 0 | if (ccl_contains (a, ch) && !ccl_contains(b, ch)) |
143 | 0 | ccladd (d, ch); |
144 | | |
145 | | /* debug */ |
146 | 0 | if (0){ |
147 | 0 | fprintf(stderr, "ccl_set_diff ("); |
148 | 0 | fprintf(stderr, "\n "); |
149 | 0 | dump_cclp (stderr, a); |
150 | 0 | fprintf(stderr, "\n "); |
151 | 0 | dump_cclp (stderr, b); |
152 | 0 | fprintf(stderr, "\n "); |
153 | 0 | dump_cclp (stderr, d); |
154 | 0 | fprintf(stderr, "\n)\n"); |
155 | 0 | } |
156 | 0 | return d; |
157 | 0 | } |
158 | | |
159 | | /* ccl_set_union - create a new ccl as the set union of the two given ccls. */ |
160 | | int |
161 | | ccl_set_union (int a, int b) |
162 | 0 | { |
163 | 0 | int d, i; |
164 | | |
165 | | /* create new class */ |
166 | 0 | d = cclinit(); |
167 | | |
168 | | /* Add all of a */ |
169 | 0 | for (i = 0; i < ccllen[a]; ++i) |
170 | 0 | ccladd (d, ccltbl[cclmap[a] + i]); |
171 | | |
172 | | /* Add all of b */ |
173 | 0 | for (i = 0; i < ccllen[b]; ++i) |
174 | 0 | ccladd (d, ccltbl[cclmap[b] + i]); |
175 | | |
176 | | /* debug */ |
177 | 0 | if (0){ |
178 | 0 | fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d); |
179 | 0 | fprintf(stderr, "\n "); |
180 | 0 | dump_cclp (stderr, a); |
181 | 0 | fprintf(stderr, "\n "); |
182 | 0 | dump_cclp (stderr, b); |
183 | 0 | fprintf(stderr, "\n "); |
184 | 0 | dump_cclp (stderr, d); |
185 | 0 | fprintf(stderr, "\n)\n"); |
186 | 0 | } |
187 | 0 | return d; |
188 | 0 | } |
189 | | |
190 | | |
191 | | /* cclinit - return an empty ccl */ |
192 | | |
193 | | int cclinit (void) |
194 | 0 | { |
195 | 0 | if (++lastccl >= current_maxccls) { |
196 | 0 | current_maxccls += MAX_CCLS_INCREMENT; |
197 | |
|
198 | 0 | ++num_reallocs; |
199 | |
|
200 | 0 | cclmap = |
201 | 0 | reallocate_integer_array (cclmap, current_maxccls); |
202 | 0 | ccllen = |
203 | 0 | reallocate_integer_array (ccllen, current_maxccls); |
204 | 0 | cclng = reallocate_integer_array (cclng, current_maxccls); |
205 | 0 | ccl_has_nl = reallocate_array(ccl_has_nl, current_maxccls, sizeof(char)); |
206 | 0 | } |
207 | |
|
208 | 0 | if (lastccl == 1) |
209 | | /* we're making the first ccl */ |
210 | 0 | cclmap[lastccl] = 0; |
211 | | |
212 | 0 | else |
213 | | /* The new pointer is just past the end of the last ccl. |
214 | | * Since the cclmap points to the \first/ character of a |
215 | | * ccl, adding the length of the ccl to the cclmap pointer |
216 | | * will produce a cursor to the first free space. |
217 | | */ |
218 | 0 | cclmap[lastccl] = |
219 | 0 | cclmap[lastccl - 1] + ccllen[lastccl - 1]; |
220 | |
|
221 | 0 | ccllen[lastccl] = 0; |
222 | 0 | cclng[lastccl] = 0; /* ccl's start out life un-negated */ |
223 | 0 | ccl_has_nl[lastccl] = false; |
224 | |
|
225 | 0 | return lastccl; |
226 | 0 | } |
227 | | |
228 | | |
229 | | /* cclnegate - negate the given ccl */ |
230 | | |
231 | | void cclnegate (int cclp) |
232 | 0 | { |
233 | 0 | cclng[cclp] = 1; |
234 | 0 | ccl_has_nl[cclp] = !ccl_has_nl[cclp]; |
235 | 0 | } |
236 | | |
237 | | |
238 | | /* list_character_set - list the members of a set of characters in CCL form |
239 | | * |
240 | | * Writes to the given file a character-class representation of those |
241 | | * characters present in the given CCL. A character is present if it |
242 | | * has a non-zero value in the cset array. |
243 | | */ |
244 | | |
245 | | void list_character_set (FILE *file, int cset[]) |
246 | 0 | { |
247 | 0 | int i; |
248 | |
|
249 | 0 | putc ('[', file); |
250 | |
|
251 | 0 | for (i = 0; i < ctrl.csize; ++i) { |
252 | 0 | if (cset[i]) { |
253 | 0 | int start_char = i; |
254 | |
|
255 | 0 | putc (' ', file); |
256 | |
|
257 | 0 | fputs (readable_form (i), file); |
258 | |
|
259 | 0 | while (++i < ctrl.csize && cset[i]) ; |
260 | |
|
261 | 0 | if (i - 1 > start_char) |
262 | | /* this was a run */ |
263 | 0 | fprintf (file, "-%s", |
264 | 0 | readable_form (i - 1)); |
265 | |
|
266 | 0 | putc (' ', file); |
267 | 0 | } |
268 | 0 | } |
269 | |
|
270 | 0 | putc (']', file); |
271 | 0 | } |
272 | | |
273 | | /** Determines if the range [c1-c2] is unambiguous in a case-insensitive |
274 | | * scanner. Specifically, if a lowercase or uppercase character, x, is in the |
275 | | * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also |
276 | | * be in the range. If not, then this range is ambiguous, and the function |
277 | | * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that |
278 | | * [a-z] will be labeled ambiguous because it does not include [A-Z]. |
279 | | * |
280 | | * @param c1 the lower end of the range |
281 | | * @param c2 the upper end of the range |
282 | | * @return true if [c1-c2] is not ambiguous for a caseless scanner. |
283 | | */ |
284 | | bool range_covers_case (int c1, int c2) |
285 | 0 | { |
286 | 0 | int i, o; |
287 | |
|
288 | 0 | for (i = c1; i <= c2; i++) { |
289 | 0 | if (has_case (i)) { |
290 | 0 | o = reverse_case (i); |
291 | 0 | if (o < c1 || c2 < o) |
292 | 0 | return false; |
293 | 0 | } |
294 | 0 | } |
295 | 0 | return true; |
296 | 0 | } |
297 | | |
298 | | /** Reverse the case of a character, if possible. |
299 | | * @return c if case-reversal does not apply. |
300 | | */ |
301 | | int reverse_case (int c) |
302 | 0 | { |
303 | 0 | return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c); |
304 | 0 | } |
305 | | |
306 | | /** Return true if c is uppercase or lowercase. */ |
307 | | bool has_case (int c) |
308 | 0 | { |
309 | 0 | return (isupper (c) || islower (c)) ? true : false; |
310 | 0 | } |