Coverage Report

Created: 2026-03-31 07:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ruby/prism/char.c
Line
Count
Source
1
#include "prism/internal/char.h"
2
3
#include "prism/compiler/inline.h"
4
#include "prism/internal/line_offset_list.h"
5
6
0
#define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2)
7
8
0
#define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0)
9
0
#define PRISM_NUMBER_BIT_BINARY_NUMBER (1 << 1)
10
0
#define PRISM_NUMBER_BIT_OCTAL_DIGIT (1 << 2)
11
0
#define PRISM_NUMBER_BIT_OCTAL_NUMBER (1 << 3)
12
0
#define PRISM_NUMBER_BIT_DECIMAL_DIGIT (1 << 4)
13
0
#define PRISM_NUMBER_BIT_DECIMAL_NUMBER (1 << 5)
14
0
#define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
15
0
#define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
16
17
const uint8_t pm_byte_table[256] = {
18
//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
19
    0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
20
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
21
    3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
22
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
23
    0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 4x
24
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 5x
25
    0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 6x
26
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 7x
27
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
28
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
29
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
30
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
31
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
32
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
33
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
34
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
35
};
36
37
static const uint8_t pm_number_table[256] = {
38
    // 0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F
39
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
40
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
41
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x
42
    0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x
43
    0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x
44
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x
45
    0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x
46
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x
47
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x
48
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x
49
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax
50
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx
51
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx
52
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx
53
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex
54
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx
55
};
56
57
/**
58
 * Returns the number of characters at the start of the string that match the
59
 * given kind. Disallows searching past the given maximum number of characters.
60
 */
61
static PRISM_INLINE size_t
62
0
pm_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
63
0
    if (length <= 0) return 0;
64
65
0
    size_t size = 0;
66
0
    size_t maximum = (size_t) length;
67
68
0
    while (size < maximum && (pm_byte_table[string[size]] & kind)) size++;
69
0
    return size;
70
0
}
71
72
/**
73
 * Returns the number of characters at the start of the string that are
74
 * whitespace. Disallows searching past the given maximum number of characters.
75
 */
76
size_t
77
0
pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
78
0
    return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_WHITESPACE);
79
0
}
80
81
/**
82
 * Returns the number of characters at the start of the string that are
83
 * whitespace while also tracking the location of each newline. Disallows
84
 * searching past the given maximum number of characters.
85
 */
86
size_t
87
0
pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_arena_t *arena, pm_line_offset_list_t *line_offsets, uint32_t start_offset) {
88
0
    if (length <= 0) return 0;
89
90
0
    uint32_t size = 0;
91
0
    uint32_t maximum = (uint32_t) length;
92
93
0
    while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) {
94
0
        if (string[size] == '\n') {
95
0
            pm_line_offset_list_append(arena, line_offsets, start_offset + size + 1);
96
0
        }
97
98
0
        size++;
99
0
    }
100
101
0
    return size;
102
0
}
103
104
/**
105
 * Returns the number of characters at the start of the string that are regexp
106
 * options. Disallows searching past the given maximum number of characters.
107
 */
108
size_t
109
0
pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
110
0
    return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION);
111
0
}
112
113
114
/**
115
 * Scan through the string and return the number of characters at the start of
116
 * the string that match the given kind. Disallows searching past the given
117
 * maximum number of characters.
118
 */
119
static PRISM_INLINE size_t
120
0
pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
121
0
    if (length <= 0) return 0;
122
123
0
    size_t size = 0;
124
0
    size_t maximum = (size_t) length;
125
126
0
    while (size < maximum && (pm_number_table[string[size]] & kind)) size++;
127
0
    return size;
128
0
}
129
130
/**
131
 * Scan through the string and return the number of characters at the start of
132
 * the string that match the given kind. Disallows searching past the given
133
 * maximum number of characters.
134
 *
135
 * Additionally, report the location of the last invalid underscore character
136
 * found in the string through the out invalid parameter.
137
 */
138
static PRISM_INLINE size_t
139
0
pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
140
0
    if (length <= 0) return 0;
141
142
0
    size_t size = 0;
143
0
    size_t maximum = (size_t) length;
144
145
0
    bool underscore = false;
146
0
    while (size < maximum && (pm_number_table[string[size]] & kind)) {
147
0
        if (string[size] == '_') {
148
0
            if (underscore) *invalid = string + size;
149
0
            underscore = true;
150
0
        } else {
151
0
            underscore = false;
152
0
        }
153
154
0
        size++;
155
0
    }
156
157
0
    if (size > 0 && string[size - 1] == '_') *invalid = string + size - 1;
158
0
    return size;
159
0
}
160
161
/**
162
 * Returns the number of characters at the start of the string that are binary
163
 * digits or underscores. Disallows searching past the given maximum number of
164
 * characters.
165
 *
166
 * If multiple underscores are found in a row or if an underscore is
167
 * found at the end of the number, then the invalid pointer is set to the index
168
 * of the first invalid underscore.
169
 */
170
size_t
171
0
pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
172
0
    return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_BINARY_NUMBER);
173
0
}
174
175
/**
176
 * Returns the number of characters at the start of the string that are octal
177
 * digits or underscores. Disallows searching past the given maximum number of
178
 * characters.
179
 *
180
 * If multiple underscores are found in a row or if an underscore is
181
 * found at the end of the number, then the invalid pointer is set to the index
182
 * of the first invalid underscore.
183
 */
184
size_t
185
0
pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
186
0
    return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_OCTAL_NUMBER);
187
0
}
188
189
/**
190
 * Returns the number of characters at the start of the string that are decimal
191
 * digits. Disallows searching past the given maximum number of characters.
192
 */
193
size_t
194
0
pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
195
0
    return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
196
0
}
197
198
/**
199
 * Returns the number of characters at the start of the string that are decimal
200
 * digits or underscores. Disallows searching past the given maximum number of
201
 * characters.
202
 *
203
 * If multiple underscores are found in a row or if an underscore is
204
 * found at the end of the number, then the invalid pointer is set to the index
205
 * of the first invalid underscore
206
 */
207
size_t
208
0
pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
209
0
    return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_DECIMAL_NUMBER);
210
0
}
211
212
/**
213
 * Returns the number of characters at the start of the string that are
214
 * hexadecimal digits. Disallows searching past the given maximum number of
215
 * characters.
216
 */
217
size_t
218
0
pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
219
0
    return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
220
0
}
221
222
/**
223
 * Returns the number of characters at the start of the string that are
224
 * hexadecimal digits or underscores. Disallows searching past the given maximum
225
 * number of characters.
226
 *
227
 * If multiple underscores are found in a row or if an underscore is
228
 * found at the end of the number, then the invalid pointer is set to the index
229
 * of the first invalid underscore.
230
 */
231
size_t
232
0
pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
233
0
    return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER);
234
0
}
235
236
/**
237
 * Returns true if the given character matches the given kind.
238
 */
239
static PRISM_INLINE bool
240
0
pm_char_is_number_kind(const uint8_t b, uint8_t kind) {
241
0
    return (pm_number_table[b] & kind) != 0;
242
0
}
243
244
/**
245
 * Returns true if the given character is a binary digit.
246
 */
247
bool
248
0
pm_char_is_binary_digit(const uint8_t b) {
249
0
    return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_BINARY_DIGIT);
250
0
}
251
252
/**
253
 * Returns true if the given character is an octal digit.
254
 */
255
bool
256
0
pm_char_is_octal_digit(const uint8_t b) {
257
0
    return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_OCTAL_DIGIT);
258
0
}
259
260
/**
261
 * Returns true if the given character is a decimal digit.
262
 */
263
bool
264
0
pm_char_is_decimal_digit(const uint8_t b) {
265
0
    return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
266
0
}
267
268
/**
269
 * Returns true if the given character is a hexadecimal digit.
270
 */
271
bool
272
0
pm_char_is_hexadecimal_digit(const uint8_t b) {
273
0
    return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
274
0
}