/src/pjsip/pjlib-util/include/pjlib-util/scanner.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) 2008-2011 Teluu Inc. (http://www.teluu.com) |
3 | | * Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org> |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | | */ |
19 | | #ifndef __PJ_SCANNER_H__ |
20 | | #define __PJ_SCANNER_H__ |
21 | | |
22 | | /** |
23 | | * @file scanner.h |
24 | | * @brief Text Scanning. |
25 | | */ |
26 | | |
27 | | #include <pjlib-util/types.h> |
28 | | |
29 | | PJ_BEGIN_DECL |
30 | | |
31 | | /** |
32 | | * @defgroup PJ_SCAN Fast Text Scanning |
33 | | * @ingroup PJLIB_TEXT |
34 | | * @brief Text scanning utility. |
35 | | * |
36 | | * This module describes a fast text scanning functions. |
37 | | * |
38 | | * @{ |
39 | | */ |
40 | | #if defined(PJ_SCANNER_USE_BITWISE) && PJ_SCANNER_USE_BITWISE != 0 |
41 | | # include <pjlib-util/scanner_cis_bitwise.h> |
42 | | #else |
43 | | # include <pjlib-util/scanner_cis_uint.h> |
44 | | #endif |
45 | | |
46 | | /** |
47 | | * Initialize scanner input specification buffer. |
48 | | * |
49 | | * @param cs_buf The scanner character specification. |
50 | | */ |
51 | | PJ_DECL(void) pj_cis_buf_init(pj_cis_buf_t *cs_buf); |
52 | | |
53 | | /** |
54 | | * Create a new input specification. |
55 | | * |
56 | | * @param cs_buf Specification buffer. |
57 | | * @param cis Character input specification to be initialized. |
58 | | * |
59 | | * @return PJ_SUCCESS if new specification has been successfully |
60 | | * created, or PJ_ETOOMANY if there are already too many |
61 | | * specifications in the buffer. |
62 | | */ |
63 | | PJ_DECL(pj_status_t) pj_cis_init(pj_cis_buf_t *cs_buf, pj_cis_t *cis); |
64 | | |
65 | | /** |
66 | | * Create a new input specification based on an existing specification. |
67 | | * |
68 | | * @param new_cis The new specification to be initialized. |
69 | | * @param existing The existing specification, from which the input |
70 | | * bitmask will be copied to the new specification. |
71 | | * |
72 | | * @return PJ_SUCCESS if new specification has been successfully |
73 | | * created, or PJ_ETOOMANY if there are already too many |
74 | | * specifications in the buffer. |
75 | | */ |
76 | | PJ_DECL(pj_status_t) pj_cis_dup(pj_cis_t *new_cis, pj_cis_t *existing); |
77 | | |
78 | | /** |
79 | | * Add the characters in the specified range '[cstart, cend)' to the |
80 | | * specification (the last character itself ('cend') is not added). |
81 | | * |
82 | | * @param cis The scanner character specification. |
83 | | * @param cstart The first character in the range. |
84 | | * @param cend The next character after the last character in the range. |
85 | | */ |
86 | | PJ_DECL(void) pj_cis_add_range( pj_cis_t *cis, int cstart, int cend); |
87 | | |
88 | | /** |
89 | | * Add alphabetic characters to the specification. |
90 | | * |
91 | | * @param cis The scanner character specification. |
92 | | */ |
93 | | PJ_DECL(void) pj_cis_add_alpha( pj_cis_t *cis); |
94 | | |
95 | | /** |
96 | | * Add numeric characters to the specification. |
97 | | * |
98 | | * @param cis The scanner character specification. |
99 | | */ |
100 | | PJ_DECL(void) pj_cis_add_num( pj_cis_t *cis); |
101 | | |
102 | | /** |
103 | | * Add the characters in the string to the specification. |
104 | | * |
105 | | * @param cis The scanner character specification. |
106 | | * @param str The string. |
107 | | */ |
108 | | PJ_DECL(void) pj_cis_add_str( pj_cis_t *cis, const char *str); |
109 | | |
110 | | /** |
111 | | * Add specification from another specification. |
112 | | * |
113 | | * @param cis The specification is to be set. |
114 | | * @param rhs The specification to be copied. |
115 | | */ |
116 | | PJ_DECL(void) pj_cis_add_cis( pj_cis_t *cis, const pj_cis_t *rhs); |
117 | | |
118 | | /** |
119 | | * Delete characters in the specified range from the specification. |
120 | | * |
121 | | * @param cis The scanner character specification. |
122 | | * @param cstart The first character in the range. |
123 | | * @param cend The next character after the last character in the range. |
124 | | */ |
125 | | PJ_DECL(void) pj_cis_del_range( pj_cis_t *cis, int cstart, int cend); |
126 | | |
127 | | /** |
128 | | * Delete characters in the specified string from the specification. |
129 | | * |
130 | | * @param cis The scanner character specification. |
131 | | * @param str The string. |
132 | | */ |
133 | | PJ_DECL(void) pj_cis_del_str( pj_cis_t *cis, const char *str); |
134 | | |
135 | | /** |
136 | | * Invert specification. |
137 | | * |
138 | | * @param cis The scanner character specification. |
139 | | */ |
140 | | PJ_DECL(void) pj_cis_invert( pj_cis_t *cis ); |
141 | | |
142 | | /** |
143 | | * Check whether the specified character belongs to the specification. |
144 | | * |
145 | | * @param cis The scanner character specification. |
146 | | * @param c The character to check for matching. |
147 | | * |
148 | | * @return Non-zero if match (not necessarily one). |
149 | | */ |
150 | | PJ_INLINE(int) pj_cis_match( const pj_cis_t *cis, pj_uint8_t c ) |
151 | 0 | { |
152 | 0 | return PJ_CIS_ISSET(cis, c); |
153 | 0 | } Unexecuted instantiation: fuzz-xml.c:pj_cis_match Unexecuted instantiation: xml.c:pj_cis_match Unexecuted instantiation: scanner.c:pj_cis_match |
154 | | |
155 | | |
156 | | /** |
157 | | * Flags for scanner. |
158 | | */ |
159 | | enum |
160 | | { |
161 | | /** This flags specifies that the scanner should automatically skip |
162 | | whitespaces |
163 | | */ |
164 | | PJ_SCAN_AUTOSKIP_WS = 1, |
165 | | |
166 | | /** This flags specifies that the scanner should automatically skip |
167 | | SIP header continuation. This flag implies PJ_SCAN_AUTOSKIP_WS. |
168 | | */ |
169 | | PJ_SCAN_AUTOSKIP_WS_HEADER = 3, |
170 | | |
171 | | /** Auto-skip new lines. |
172 | | */ |
173 | | PJ_SCAN_AUTOSKIP_NEWLINE = 4 |
174 | | }; |
175 | | |
176 | | |
177 | | /* Forward decl. */ |
178 | | struct pj_scanner; |
179 | | |
180 | | |
181 | | /** |
182 | | * The callback function type to be called by the scanner when it encounters |
183 | | * syntax error. |
184 | | * |
185 | | * @param scanner The scanner instance that calls the callback . |
186 | | */ |
187 | | typedef void (*pj_syn_err_func_ptr)(struct pj_scanner *scanner); |
188 | | |
189 | | |
190 | | /** |
191 | | * The text scanner structure. |
192 | | */ |
193 | | typedef struct pj_scanner |
194 | | { |
195 | | char *begin; /**< Start of input buffer. */ |
196 | | char *end; /**< End of input buffer. */ |
197 | | char *curptr; /**< Current pointer. */ |
198 | | int line; /**< Current line. */ |
199 | | char *start_line; /**< Where current line starts. */ |
200 | | int skip_ws; /**< Skip whitespace flag. */ |
201 | | pj_syn_err_func_ptr callback; /**< Syntax error callback. */ |
202 | | } pj_scanner; |
203 | | |
204 | | |
205 | | /** |
206 | | * This structure can be used by application to store the state of the parser, |
207 | | * so that the scanner state can be rollback to this state when necessary. |
208 | | */ |
209 | | typedef struct pj_scan_state |
210 | | { |
211 | | char *curptr; /**< Current scanner's pointer. */ |
212 | | int line; /**< Current line. */ |
213 | | char *start_line; /**< Start of current line. */ |
214 | | } pj_scan_state; |
215 | | |
216 | | |
217 | | /** |
218 | | * Initialize the scanner. |
219 | | * Note that the input string buffer MUST be NULL terminated and have |
220 | | * length at least buflen+1 (buflen MUST NOT include the NULL terminator). |
221 | | * |
222 | | * @param scanner The scanner to be initialized. |
223 | | * @param bufstart The input buffer to scan, which must be NULL terminated. |
224 | | * @param buflen The length of the input buffer, which normally is |
225 | | * strlen(bufstart), hence not counting the NULL terminator. |
226 | | * @param options Zero, or combination of PJ_SCAN_AUTOSKIP_WS or |
227 | | * PJ_SCAN_AUTOSKIP_WS_HEADER |
228 | | * @param callback Callback to be called when the scanner encounters syntax |
229 | | * error condition. |
230 | | */ |
231 | | PJ_DECL(void) pj_scan_init( pj_scanner *scanner, char *bufstart, |
232 | | pj_size_t buflen, |
233 | | unsigned options, |
234 | | pj_syn_err_func_ptr callback ); |
235 | | |
236 | | |
237 | | /** |
238 | | * Call this function when application has finished using the scanner. |
239 | | * |
240 | | * @param scanner The scanner. |
241 | | */ |
242 | | PJ_DECL(void) pj_scan_fini( pj_scanner *scanner ); |
243 | | |
244 | | |
245 | | /** |
246 | | * Determine whether the EOF condition for the scanner has been met. |
247 | | * |
248 | | * @param scanner The scanner. |
249 | | * |
250 | | * @return Non-zero if scanner is EOF. |
251 | | */ |
252 | | PJ_INLINE(int) pj_scan_is_eof( const pj_scanner *scanner) |
253 | 120k | { |
254 | 120k | return scanner->curptr >= scanner->end; |
255 | 120k | } Unexecuted instantiation: fuzz-xml.c:pj_scan_is_eof Line | Count | Source | 253 | 6.52k | { | 254 | 6.52k | return scanner->curptr >= scanner->end; | 255 | 6.52k | } |
Line | Count | Source | 253 | 114k | { | 254 | 114k | return scanner->curptr >= scanner->end; | 255 | 114k | } |
|
256 | | |
257 | | |
258 | | /** |
259 | | * Peek strings in current position according to parameter spec, and return |
260 | | * the strings in parameter out. The current scanner position will not be |
261 | | * moved. If the scanner is already in EOF state, syntax error callback will |
262 | | * be called thrown. |
263 | | * |
264 | | * @param scanner The scanner. |
265 | | * @param spec The spec to match input string. |
266 | | * @param out String to store the result. |
267 | | * |
268 | | * @return the character right after the peek-ed position or zero if there's |
269 | | * no more characters. |
270 | | */ |
271 | | PJ_DECL(int) pj_scan_peek( pj_scanner *scanner, |
272 | | const pj_cis_t *spec, pj_str_t *out); |
273 | | |
274 | | |
275 | | /** |
276 | | * Peek len characters in current position, and return them in out parameter. |
277 | | * Note that whitespaces or newlines will be returned as it is, regardless |
278 | | * of PJ_SCAN_AUTOSKIP_WS settings. If the character left is less than len, |
279 | | * syntax error callback will be called. |
280 | | * |
281 | | * @param scanner The scanner. |
282 | | * @param len Length to peek. |
283 | | * @param out String to store the result. |
284 | | * |
285 | | * @return the character right after the peek-ed position or zero if there's |
286 | | * no more characters. |
287 | | */ |
288 | | PJ_DECL(int) pj_scan_peek_n( pj_scanner *scanner, |
289 | | pj_size_t len, pj_str_t *out); |
290 | | |
291 | | |
292 | | /** |
293 | | * Peek strings in current position until spec is matched, and return |
294 | | * the strings in parameter out. The current scanner position will not be |
295 | | * moved. If the scanner is already in EOF state, syntax error callback will |
296 | | * be called. |
297 | | * |
298 | | * @param scanner The scanner. |
299 | | * @param spec The peeking will stop when the input match this spec. |
300 | | * @param out String to store the result. |
301 | | * |
302 | | * @return the character right after the peek-ed position. |
303 | | */ |
304 | | PJ_DECL(int) pj_scan_peek_until( pj_scanner *scanner, |
305 | | const pj_cis_t *spec, |
306 | | pj_str_t *out); |
307 | | |
308 | | |
309 | | /** |
310 | | * Get characters from the buffer according to the spec, and return them |
311 | | * in out parameter. The scanner will attempt to get as many characters as |
312 | | * possible as long as the spec matches. If the first character doesn't |
313 | | * match the spec, or scanner is already in EOF when this function is called, |
314 | | * an exception will be thrown. |
315 | | * |
316 | | * @param scanner The scanner. |
317 | | * @param spec The spec to match input string. |
318 | | * @param out String to store the result. |
319 | | */ |
320 | | PJ_DECL(void) pj_scan_get( pj_scanner *scanner, |
321 | | const pj_cis_t *spec, pj_str_t *out); |
322 | | |
323 | | |
324 | | /** |
325 | | * Just like #pj_scan_get(), but additionally performs unescaping when |
326 | | * escaped ('%') character is found. The input spec MUST NOT contain the |
327 | | * specification for '%' characted. |
328 | | * |
329 | | * @param scanner The scanner. |
330 | | * @param spec The spec to match input string. |
331 | | * @param out String to store the result. |
332 | | */ |
333 | | PJ_DECL(void) pj_scan_get_unescape( pj_scanner *scanner, |
334 | | const pj_cis_t *spec, pj_str_t *out); |
335 | | |
336 | | |
337 | | /** |
338 | | * Get characters between quotes. If current input doesn't match begin_quote, |
339 | | * syntax error will be thrown. Note that the resulting string will contain |
340 | | * the enclosing quote. |
341 | | * |
342 | | * @param scanner The scanner. |
343 | | * @param begin_quote The character to begin the quote. |
344 | | * @param end_quote The character to end the quote. |
345 | | * @param out String to store the result. |
346 | | */ |
347 | | PJ_DECL(void) pj_scan_get_quote( pj_scanner *scanner, |
348 | | int begin_quote, int end_quote, |
349 | | pj_str_t *out); |
350 | | |
351 | | /** |
352 | | * Get characters between quotes. If current input doesn't match begin_quote, |
353 | | * syntax error will be thrown. Note that the resulting string will contain |
354 | | * the enclosing quote. |
355 | | * |
356 | | * @param scanner The scanner. |
357 | | * @param begin_quotes The character array to begin the quotes. For example, |
358 | | * the two characters " and '. |
359 | | * @param end_quotes The character array to end the quotes. The position |
360 | | * found in the begin_quotes array will be used to match |
361 | | * the end quotes. So if the begin_quotes was the array |
362 | | * of "'< the end_quotes should be "'>. If begin_array |
363 | | * matched the ' then the end_quotes will look for ' to |
364 | | * match at the end. |
365 | | * @param qsize The size of the begin_quotes and end_quotes arrays. |
366 | | * @param out String to store the result. |
367 | | */ |
368 | | PJ_DECL(void) pj_scan_get_quotes(pj_scanner *scanner, |
369 | | const char *begin_quotes, |
370 | | const char *end_quotes, int qsize, |
371 | | pj_str_t *out); |
372 | | |
373 | | |
374 | | /** |
375 | | * Get N characters from the scanner. |
376 | | * |
377 | | * @param scanner The scanner. |
378 | | * @param N Number of characters to get. |
379 | | * @param out String to store the result. |
380 | | */ |
381 | | PJ_DECL(void) pj_scan_get_n( pj_scanner *scanner, |
382 | | unsigned N, pj_str_t *out); |
383 | | |
384 | | |
385 | | /** |
386 | | * Get one character from the scanner. |
387 | | * |
388 | | * @param scanner The scanner. |
389 | | * |
390 | | * @return The character. |
391 | | */ |
392 | | PJ_DECL(int) pj_scan_get_char( pj_scanner *scanner ); |
393 | | |
394 | | |
395 | | /** |
396 | | * Get characters from the scanner and move the scanner position until the |
397 | | * current character matches the spec. |
398 | | * |
399 | | * @param scanner The scanner. |
400 | | * @param spec Get until the input match this spec. |
401 | | * @param out String to store the result. |
402 | | */ |
403 | | PJ_DECL(void) pj_scan_get_until( pj_scanner *scanner, |
404 | | const pj_cis_t *spec, pj_str_t *out); |
405 | | |
406 | | |
407 | | /** |
408 | | * Get characters from the scanner and move the scanner position until the |
409 | | * current character matches until_char. |
410 | | * |
411 | | * @param scanner The scanner. |
412 | | * @param until_char Get until the input match this character. |
413 | | * @param out String to store the result. |
414 | | */ |
415 | | PJ_DECL(void) pj_scan_get_until_ch( pj_scanner *scanner, |
416 | | int until_char, pj_str_t *out); |
417 | | |
418 | | |
419 | | /** |
420 | | * Get characters from the scanner and move the scanner position until the |
421 | | * current character matches until_char. |
422 | | * |
423 | | * @param scanner The scanner. |
424 | | * @param until_spec Get until the input match any of these characters. |
425 | | * @param out String to store the result. |
426 | | */ |
427 | | PJ_DECL(void) pj_scan_get_until_chr( pj_scanner *scanner, |
428 | | const char *until_spec, pj_str_t *out); |
429 | | |
430 | | /** |
431 | | * Advance the scanner N characters, and skip whitespace |
432 | | * if necessary. |
433 | | * |
434 | | * @param scanner The scanner. |
435 | | * @param N Number of characters to skip. |
436 | | * @param skip Flag to specify whether whitespace should be skipped |
437 | | * after skipping the characters. |
438 | | */ |
439 | | PJ_DECL(void) pj_scan_advance_n( pj_scanner *scanner, |
440 | | unsigned N, pj_bool_t skip); |
441 | | |
442 | | |
443 | | /** |
444 | | * Compare string in current position with the specified string. |
445 | | * |
446 | | * @param scanner The scanner. |
447 | | * @param s The string to compare with. |
448 | | * @param len Length of the string to compare. |
449 | | * |
450 | | * @return zero, <0, or >0 (just like strcmp()). |
451 | | */ |
452 | | PJ_DECL(int) pj_scan_strcmp( pj_scanner *scanner, const char *s, int len); |
453 | | |
454 | | |
455 | | /** |
456 | | * Case-less string comparison of current position with the specified |
457 | | * string. |
458 | | * |
459 | | * @param scanner The scanner. |
460 | | * @param s The string to compare with. |
461 | | * @param len Length of the string to compare with. |
462 | | * |
463 | | * @return zero, <0, or >0 (just like strcmp()). |
464 | | */ |
465 | | PJ_DECL(int) pj_scan_stricmp( pj_scanner *scanner, const char *s, int len); |
466 | | |
467 | | /** |
468 | | * Perform case insensitive string comparison of string in current position, |
469 | | * knowing that the string to compare only consists of alphanumeric |
470 | | * characters. |
471 | | * |
472 | | * Note that unlike #pj_scan_stricmp, this function can only return zero or |
473 | | * -1. |
474 | | * |
475 | | * @param scanner The scanner. |
476 | | * @param s The string to compare with. |
477 | | * @param len Length of the string to compare with. |
478 | | * |
479 | | * @return zero if equal or -1. |
480 | | * |
481 | | * @see strnicmp_alnum, pj_stricmp_alnum |
482 | | */ |
483 | | PJ_DECL(int) pj_scan_stricmp_alnum( pj_scanner *scanner, const char *s, |
484 | | int len); |
485 | | |
486 | | |
487 | | /** |
488 | | * Get a newline from the scanner. A newline is defined as '\\n', or '\\r', or |
489 | | * "\\r\\n". If current input is not newline, syntax error will be thrown. |
490 | | * |
491 | | * @param scanner The scanner. |
492 | | */ |
493 | | PJ_DECL(void) pj_scan_get_newline( pj_scanner *scanner ); |
494 | | |
495 | | |
496 | | /** |
497 | | * Manually skip whitespaces according to flag that was specified when |
498 | | * the scanner was initialized. |
499 | | * |
500 | | * @param scanner The scanner. |
501 | | */ |
502 | | PJ_DECL(void) pj_scan_skip_whitespace( pj_scanner *scanner ); |
503 | | |
504 | | |
505 | | /** |
506 | | * Skip current line. |
507 | | * |
508 | | * @param scanner The scanner. |
509 | | */ |
510 | | PJ_DECL(void) pj_scan_skip_line( pj_scanner *scanner ); |
511 | | |
512 | | /** |
513 | | * Save the full scanner state. |
514 | | * |
515 | | * @param scanner The scanner. |
516 | | * @param state Variable to store scanner's state. |
517 | | */ |
518 | | PJ_DECL(void) pj_scan_save_state( const pj_scanner *scanner, |
519 | | pj_scan_state *state); |
520 | | |
521 | | |
522 | | /** |
523 | | * Restore the full scanner state. |
524 | | * Note that this would not restore the string if application has modified |
525 | | * it. This will only restore the scanner scanning position. |
526 | | * |
527 | | * @param scanner The scanner. |
528 | | * @param state State of the scanner. |
529 | | */ |
530 | | PJ_DECL(void) pj_scan_restore_state( pj_scanner *scanner, |
531 | | pj_scan_state *state); |
532 | | |
533 | | /** |
534 | | * Get current column position. |
535 | | * |
536 | | * @param scanner The scanner. |
537 | | * |
538 | | * @return The column position. |
539 | | */ |
540 | | PJ_INLINE(int) pj_scan_get_col( const pj_scanner *scanner ) |
541 | 0 | { |
542 | 0 | return (int)(scanner->curptr - scanner->start_line); |
543 | 0 | } Unexecuted instantiation: fuzz-xml.c:pj_scan_get_col Unexecuted instantiation: xml.c:pj_scan_get_col Unexecuted instantiation: scanner.c:pj_scan_get_col |
544 | | |
545 | | /** |
546 | | * @} |
547 | | */ |
548 | | |
549 | | |
550 | | PJ_END_DECL |
551 | | |
552 | | #endif |
553 | | |