Coverage Report

Created: 2026-06-10 06:27

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/flex/src/ccl.c
Line
Count
Source
1
/* ccl - routines for character classes */
2
3
/* SPDX-License-Identifier: BSD-3-Clause-flex */
4
5
/*  Copyright (c) 1990 The Regents of the University of California. */
6
/*  All rights reserved. */
7
8
/*  This code is derived from software contributed to Berkeley by */
9
/*  Vern Paxson. */
10
11
/*  The United States Government has rights in this work pursuant */
12
/*  to contract no. DE-AC03-76SF00098 between the United States */
13
 /*  Department of Energy and the University of California. */
14
15
/*  This file is part of flex. */
16
17
/*  Redistribution and use in source and binary forms, with or without */
18
/*  modification, are permitted provided that the following conditions */
19
/*  are met: */
20
21
/*  1. Redistributions of source code must retain the above copyright */
22
/*     notice, this list of conditions and the following disclaimer. */
23
/*  2. Redistributions in binary form must reproduce the above copyright */
24
/*     notice, this list of conditions and the following disclaimer in the */
25
/*     documentation and/or other materials provided with the distribution. */
26
27
/*  Neither the name of the University nor the names of its contributors */
28
/*  may be used to endorse or promote products derived from this software */
29
/*  without specific prior written permission. */
30
31
/*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
32
/*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
33
/*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
34
/*  PURPOSE. */
35
36
#include "flexdef.h"
37
38
/* return true if the chr is in the ccl. Takes negation into account. */
39
static bool
40
ccl_contains (const int cclp, const int ch)
41
0
{
42
0
  int     ind, len, i;
43
44
0
  len = ccllen[cclp];
45
0
  ind = cclmap[cclp];
46
47
0
  for (i = 0; i < len; ++i)
48
0
    if (ccltbl[ind + i] == ch)
49
0
      return !cclng[cclp];
50
51
0
    return cclng[cclp];
52
0
}
53
54
55
/* ccladd - add a single character to a ccl */
56
57
void ccladd (int cclp, int ch)
58
0
{
59
0
  int     ind, len, newpos, i;
60
61
0
  check_char (ch);
62
63
0
  len = ccllen[cclp];
64
0
  ind = cclmap[cclp];
65
66
  /* check to see if the character is already in the ccl */
67
68
0
  for (i = 0; i < len; ++i)
69
0
    if (ccltbl[ind + i] == ch)
70
0
      return;
71
72
  /* mark newlines */
73
0
  if (ch == nlch)
74
0
    ccl_has_nl[cclp] = true;
75
76
0
  newpos = ind + len;
77
78
  /* For a non-last cclp, expanding the set will overflow and overwrite a
79
   * char in the next cclp.
80
   * FIXME: Need another allocation scheme for ccl's. */
81
0
  if (cclp != lastccl) {
82
0
    flexfatal(_("internal error: trying to add a char to a non-last ccl.\n"));
83
0
  }
84
85
0
  if (newpos >= current_max_ccl_tbl_size) {
86
0
    current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
87
88
0
    ++num_reallocs;
89
90
0
    ccltbl = reallocate_Character_array (ccltbl,
91
0
                 current_max_ccl_tbl_size);
92
0
  }
93
94
0
  ccllen[cclp] = len + 1;
95
0
  ccltbl[newpos] = (unsigned char) ch;
96
0
}
97
98
/* dump_cclp - same thing as list_character_set, but for cclps.  */
99
100
static void    dump_cclp (FILE* file, int cclp)
101
0
{
102
0
  int i;
103
0
104
0
  putc ('[', file);
105
0
106
0
  for (i = 0; i < ctrl.csize; ++i) {
107
0
    if (ccl_contains(cclp, i)){
108
0
      int start_char = i;
109
0
110
0
      putc (' ', file);
111
0
112
0
      fputs (readable_form (i), file);
113
0
114
0
      while (++i < ctrl.csize && ccl_contains(cclp,i)) ;
115
0
116
0
      if (i - 1 > start_char)
117
0
        /* this was a run */
118
0
        fprintf (file, "-%s",
119
0
           readable_form (i - 1));
120
0
121
0
      putc (' ', file);
122
0
    }
123
0
  }
124
0
125
0
  putc (']', file);
126
0
}
127
128
129
130
/* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
131
int
132
ccl_set_diff (int a, int b)
133
0
{
134
0
    int  d, ch;
135
136
    /* create new class  */
137
0
    d = cclinit();
138
139
    /* In order to handle negation, we spin through all possible chars,
140
     * adding each char in a that is not in b.
141
     * (This could be O(n^2), but n is small and bounded.)
142
     */
143
0
  for ( ch = 0; ch < ctrl.csize; ++ch )
144
0
        if (ccl_contains (a, ch) && !ccl_contains(b, ch))
145
0
            ccladd (d, ch);
146
147
    /* debug */
148
0
    if (0){
149
0
        fprintf(stderr, "ccl_set_diff (");
150
0
            fprintf(stderr, "\n    ");
151
0
            dump_cclp (stderr, a);
152
0
            fprintf(stderr, "\n    ");
153
0
            dump_cclp (stderr, b);
154
0
            fprintf(stderr, "\n    ");
155
0
            dump_cclp (stderr, d);
156
0
        fprintf(stderr, "\n)\n");
157
0
    }
158
0
    return d;
159
0
}
160
161
/* ccl_set_union - create a new ccl as the set union of the two given ccls. */
162
int
163
ccl_set_union (int a, int b)
164
0
{
165
0
    int  d, i;
166
167
    /* create new class  */
168
0
    d = cclinit();
169
170
    /* Add all of a */
171
0
    for (i = 0; i < ccllen[a]; ++i)
172
0
    ccladd (d, ccltbl[cclmap[a] + i]);
173
174
    /* Add all of b */
175
0
    for (i = 0; i < ccllen[b]; ++i)
176
0
    ccladd (d, ccltbl[cclmap[b] + i]);
177
178
    /* debug */
179
0
    if (0){
180
0
        fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
181
0
            fprintf(stderr, "\n    ");
182
0
            dump_cclp (stderr, a);
183
0
            fprintf(stderr, "\n    ");
184
0
            dump_cclp (stderr, b);
185
0
            fprintf(stderr, "\n    ");
186
0
            dump_cclp (stderr, d);
187
0
        fprintf(stderr, "\n)\n");
188
0
    }
189
0
    return d;
190
0
}
191
192
193
/* cclinit - return an empty ccl */
194
195
int     cclinit (void)
196
0
{
197
0
  if (++lastccl >= current_maxccls) {
198
0
    current_maxccls += MAX_CCLS_INCREMENT;
199
200
0
    ++num_reallocs;
201
202
0
    cclmap =
203
0
      reallocate_integer_array (cclmap, current_maxccls);
204
0
    ccllen =
205
0
      reallocate_integer_array (ccllen, current_maxccls);
206
0
    cclng = reallocate_integer_array (cclng, current_maxccls);
207
0
    ccl_has_nl = reallocate_array(ccl_has_nl, current_maxccls, sizeof(char));
208
0
  }
209
210
0
  if (lastccl == 1)
211
    /* we're making the first ccl */
212
0
    cclmap[lastccl] = 0;
213
214
0
  else
215
    /* The new pointer is just past the end of the last ccl.
216
     * Since the cclmap points to the \first/ character of a
217
     * ccl, adding the length of the ccl to the cclmap pointer
218
     * will produce a cursor to the first free space.
219
     */
220
0
    cclmap[lastccl] =
221
0
      cclmap[lastccl - 1] + ccllen[lastccl - 1];
222
223
0
  ccllen[lastccl] = 0;
224
0
  cclng[lastccl] = 0; /* ccl's start out life un-negated */
225
0
  ccl_has_nl[lastccl] = false;
226
227
0
  return lastccl;
228
0
}
229
230
231
/* cclnegate - negate the given ccl */
232
233
void    cclnegate (int cclp)
234
0
{
235
0
  cclng[cclp] = 1;
236
0
  ccl_has_nl[cclp] = !ccl_has_nl[cclp];
237
0
}
238
239
240
/* list_character_set - list the members of a set of characters in CCL form
241
 *
242
 * Writes to the given file a character-class representation of those
243
 * characters present in the given CCL.  A character is present if it
244
 * has a non-zero value in the cset array.
245
 */
246
247
void    list_character_set (FILE *file, int cset[])
248
0
{
249
0
  int i;
250
251
0
  putc ('[', file);
252
253
0
  for (i = 0; i < ctrl.csize; ++i) {
254
0
    if (cset[i]) {
255
0
      int start_char = i;
256
257
0
      putc (' ', file);
258
259
0
      fputs (readable_form (i), file);
260
261
0
      while (++i < ctrl.csize && cset[i]) ;
262
263
0
      if (i - 1 > start_char)
264
        /* this was a run */
265
0
        fprintf (file, "-%s",
266
0
           readable_form (i - 1));
267
268
0
      putc (' ', file);
269
0
    }
270
0
  }
271
272
0
  putc (']', file);
273
0
}
274
275
/** Determines if the range [c1-c2] is unambiguous in a case-insensitive
276
 * scanner.  Specifically, if a lowercase or uppercase character, x, is in the
277
 * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
278
 * be in the range. If not, then this range is ambiguous, and the function
279
 * returns false.  For example, [@-_] spans [a-z] but not [A-Z].  Beware that
280
 * [a-z] will be labeled ambiguous because it does not include [A-Z].
281
 *
282
 * @param c1 the lower end of the range
283
 * @param c2 the upper end of the range
284
 * @return true if [c1-c2] is not ambiguous for a caseless scanner.
285
 */
286
bool range_covers_case (int c1, int c2)
287
0
{
288
0
  int     i, o;
289
290
0
  for (i = c1; i <= c2; i++) {
291
0
    if (has_case (i)) {
292
0
      o = reverse_case (i);
293
0
      if (o < c1 || c2 < o)
294
0
        return false;
295
0
    }
296
0
  }
297
0
  return true;
298
0
}
299
300
/** Reverse the case of a character, if possible.
301
 * @return c if case-reversal does not apply.
302
 */
303
int reverse_case (int c)
304
0
{
305
0
  return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
306
0
}
307
308
/** Return true if c is uppercase or lowercase. */
309
bool has_case (int c)
310
0
{
311
0
  return (isupper (c) || islower (c)) ? true : false;
312
0
}