Line | Count | Source |
1 | | #include "prism/internal/char.h" |
2 | | |
3 | | #include "prism/compiler/inline.h" |
4 | | #include "prism/internal/line_offset_list.h" |
5 | | |
6 | 0 | #define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2) |
7 | | |
8 | 0 | #define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0) |
9 | 0 | #define PRISM_NUMBER_BIT_BINARY_NUMBER (1 << 1) |
10 | 0 | #define PRISM_NUMBER_BIT_OCTAL_DIGIT (1 << 2) |
11 | 0 | #define PRISM_NUMBER_BIT_OCTAL_NUMBER (1 << 3) |
12 | 0 | #define PRISM_NUMBER_BIT_DECIMAL_DIGIT (1 << 4) |
13 | 0 | #define PRISM_NUMBER_BIT_DECIMAL_NUMBER (1 << 5) |
14 | 0 | #define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6) |
15 | 0 | #define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7) |
16 | | |
17 | | const uint8_t pm_byte_table[256] = { |
18 | | // 0 1 2 3 4 5 6 7 8 9 A B C D E F |
19 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x |
20 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x |
21 | | 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x |
22 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x |
23 | | 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 4x |
24 | | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 5x |
25 | | 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 6x |
26 | | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 7x |
27 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x |
28 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x |
29 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax |
30 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx |
31 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx |
32 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx |
33 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex |
34 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx |
35 | | }; |
36 | | |
37 | | static const uint8_t pm_number_table[256] = { |
38 | | // 0 1 2 3 4 5 6 7 8 9 A B C D E F |
39 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x |
40 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x |
41 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x |
42 | | 0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x |
43 | | 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x |
44 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x |
45 | | 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x |
46 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x |
47 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x |
48 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x |
49 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax |
50 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx |
51 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx |
52 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx |
53 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex |
54 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx |
55 | | }; |
56 | | |
57 | | /** |
58 | | * Returns the number of characters at the start of the string that match the |
59 | | * given kind. Disallows searching past the given maximum number of characters. |
60 | | */ |
61 | | static PRISM_INLINE size_t |
62 | 0 | pm_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) { |
63 | 0 | if (length <= 0) return 0; |
64 | | |
65 | 0 | size_t size = 0; |
66 | 0 | size_t maximum = (size_t) length; |
67 | |
|
68 | 0 | while (size < maximum && (pm_byte_table[string[size]] & kind)) size++; |
69 | 0 | return size; |
70 | 0 | } |
71 | | |
72 | | /** |
73 | | * Returns the number of characters at the start of the string that are |
74 | | * whitespace. Disallows searching past the given maximum number of characters. |
75 | | */ |
76 | | size_t |
77 | 0 | pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) { |
78 | 0 | return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_WHITESPACE); |
79 | 0 | } |
80 | | |
81 | | /** |
82 | | * Returns the number of characters at the start of the string that are |
83 | | * whitespace while also tracking the location of each newline. Disallows |
84 | | * searching past the given maximum number of characters. |
85 | | */ |
86 | | size_t |
87 | 0 | pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_arena_t *arena, pm_line_offset_list_t *line_offsets, uint32_t start_offset) { |
88 | 0 | if (length <= 0) return 0; |
89 | | |
90 | 0 | uint32_t size = 0; |
91 | 0 | uint32_t maximum = (uint32_t) length; |
92 | |
|
93 | 0 | while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) { |
94 | 0 | if (string[size] == '\n') { |
95 | 0 | pm_line_offset_list_append(arena, line_offsets, start_offset + size + 1); |
96 | 0 | } |
97 | |
|
98 | 0 | size++; |
99 | 0 | } |
100 | |
|
101 | 0 | return size; |
102 | 0 | } |
103 | | |
104 | | /** |
105 | | * Returns the number of characters at the start of the string that are regexp |
106 | | * options. Disallows searching past the given maximum number of characters. |
107 | | */ |
108 | | size_t |
109 | 0 | pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) { |
110 | 0 | return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION); |
111 | 0 | } |
112 | | |
113 | | |
114 | | /** |
115 | | * Scan through the string and return the number of characters at the start of |
116 | | * the string that match the given kind. Disallows searching past the given |
117 | | * maximum number of characters. |
118 | | */ |
119 | | static PRISM_INLINE size_t |
120 | 0 | pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) { |
121 | 0 | if (length <= 0) return 0; |
122 | | |
123 | 0 | size_t size = 0; |
124 | 0 | size_t maximum = (size_t) length; |
125 | |
|
126 | 0 | while (size < maximum && (pm_number_table[string[size]] & kind)) size++; |
127 | 0 | return size; |
128 | 0 | } |
129 | | |
130 | | /** |
131 | | * Scan through the string and return the number of characters at the start of |
132 | | * the string that match the given kind. Disallows searching past the given |
133 | | * maximum number of characters. |
134 | | * |
135 | | * Additionally, report the location of the last invalid underscore character |
136 | | * found in the string through the out invalid parameter. |
137 | | */ |
138 | | static PRISM_INLINE size_t |
139 | 0 | pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) { |
140 | 0 | if (length <= 0) return 0; |
141 | | |
142 | 0 | size_t size = 0; |
143 | 0 | size_t maximum = (size_t) length; |
144 | |
|
145 | 0 | bool underscore = false; |
146 | 0 | while (size < maximum && (pm_number_table[string[size]] & kind)) { |
147 | 0 | if (string[size] == '_') { |
148 | 0 | if (underscore) *invalid = string + size; |
149 | 0 | underscore = true; |
150 | 0 | } else { |
151 | 0 | underscore = false; |
152 | 0 | } |
153 | |
|
154 | 0 | size++; |
155 | 0 | } |
156 | |
|
157 | 0 | if (size > 0 && string[size - 1] == '_') *invalid = string + size - 1; |
158 | 0 | return size; |
159 | 0 | } |
160 | | |
161 | | /** |
162 | | * Returns the number of characters at the start of the string that are binary |
163 | | * digits or underscores. Disallows searching past the given maximum number of |
164 | | * characters. |
165 | | * |
166 | | * If multiple underscores are found in a row or if an underscore is |
167 | | * found at the end of the number, then the invalid pointer is set to the index |
168 | | * of the first invalid underscore. |
169 | | */ |
170 | | size_t |
171 | 0 | pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { |
172 | 0 | return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_BINARY_NUMBER); |
173 | 0 | } |
174 | | |
175 | | /** |
176 | | * Returns the number of characters at the start of the string that are octal |
177 | | * digits or underscores. Disallows searching past the given maximum number of |
178 | | * characters. |
179 | | * |
180 | | * If multiple underscores are found in a row or if an underscore is |
181 | | * found at the end of the number, then the invalid pointer is set to the index |
182 | | * of the first invalid underscore. |
183 | | */ |
184 | | size_t |
185 | 0 | pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { |
186 | 0 | return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_OCTAL_NUMBER); |
187 | 0 | } |
188 | | |
189 | | /** |
190 | | * Returns the number of characters at the start of the string that are decimal |
191 | | * digits. Disallows searching past the given maximum number of characters. |
192 | | */ |
193 | | size_t |
194 | 0 | pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) { |
195 | 0 | return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_DECIMAL_DIGIT); |
196 | 0 | } |
197 | | |
198 | | /** |
199 | | * Returns the number of characters at the start of the string that are decimal |
200 | | * digits or underscores. Disallows searching past the given maximum number of |
201 | | * characters. |
202 | | * |
203 | | * If multiple underscores are found in a row or if an underscore is |
204 | | * found at the end of the number, then the invalid pointer is set to the index |
205 | | * of the first invalid underscore |
206 | | */ |
207 | | size_t |
208 | 0 | pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { |
209 | 0 | return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_DECIMAL_NUMBER); |
210 | 0 | } |
211 | | |
212 | | /** |
213 | | * Returns the number of characters at the start of the string that are |
214 | | * hexadecimal digits. Disallows searching past the given maximum number of |
215 | | * characters. |
216 | | */ |
217 | | size_t |
218 | 0 | pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) { |
219 | 0 | return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT); |
220 | 0 | } |
221 | | |
222 | | /** |
223 | | * Returns the number of characters at the start of the string that are |
224 | | * hexadecimal digits or underscores. Disallows searching past the given maximum |
225 | | * number of characters. |
226 | | * |
227 | | * If multiple underscores are found in a row or if an underscore is |
228 | | * found at the end of the number, then the invalid pointer is set to the index |
229 | | * of the first invalid underscore. |
230 | | */ |
231 | | size_t |
232 | 0 | pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { |
233 | 0 | return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER); |
234 | 0 | } |
235 | | |
236 | | /** |
237 | | * Returns true if the given character matches the given kind. |
238 | | */ |
239 | | static PRISM_INLINE bool |
240 | 0 | pm_char_is_number_kind(const uint8_t b, uint8_t kind) { |
241 | 0 | return (pm_number_table[b] & kind) != 0; |
242 | 0 | } |
243 | | |
244 | | /** |
245 | | * Returns true if the given character is a binary digit. |
246 | | */ |
247 | | bool |
248 | 0 | pm_char_is_binary_digit(const uint8_t b) { |
249 | 0 | return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_BINARY_DIGIT); |
250 | 0 | } |
251 | | |
252 | | /** |
253 | | * Returns true if the given character is an octal digit. |
254 | | */ |
255 | | bool |
256 | 0 | pm_char_is_octal_digit(const uint8_t b) { |
257 | 0 | return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_OCTAL_DIGIT); |
258 | 0 | } |
259 | | |
260 | | /** |
261 | | * Returns true if the given character is a decimal digit. |
262 | | */ |
263 | | bool |
264 | 0 | pm_char_is_decimal_digit(const uint8_t b) { |
265 | 0 | return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_DECIMAL_DIGIT); |
266 | 0 | } |
267 | | |
268 | | /** |
269 | | * Returns true if the given character is a hexadecimal digit. |
270 | | */ |
271 | | bool |
272 | 0 | pm_char_is_hexadecimal_digit(const uint8_t b) { |
273 | 0 | return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT); |
274 | 0 | } |