/src/mupdf/thirdparty/mujs/jslex.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "jsi.h" |
2 | | #include "utf.h" |
3 | | |
4 | | JS_NORETURN static void jsY_error(js_State *J, const char *fmt, ...) JS_PRINTFLIKE(2,3); |
5 | | |
6 | | static void jsY_error(js_State *J, const char *fmt, ...) |
7 | 0 | { |
8 | 0 | va_list ap; |
9 | 0 | char buf[512]; |
10 | 0 | char msgbuf[256]; |
11 | |
|
12 | 0 | va_start(ap, fmt); |
13 | 0 | vsnprintf(msgbuf, 256, fmt, ap); |
14 | 0 | va_end(ap); |
15 | |
|
16 | 0 | snprintf(buf, 256, "%s:%d: ", J->filename, J->lexline); |
17 | 0 | strcat(buf, msgbuf); |
18 | |
|
19 | 0 | js_newsyntaxerror(J, buf); |
20 | 0 | js_throw(J); |
21 | 0 | } |
22 | | |
23 | | static const char *tokenstring[] = { |
24 | | "(end-of-file)", |
25 | | "'\\x01'", "'\\x02'", "'\\x03'", "'\\x04'", "'\\x05'", "'\\x06'", "'\\x07'", |
26 | | "'\\x08'", "'\\x09'", "'\\x0A'", "'\\x0B'", "'\\x0C'", "'\\x0D'", "'\\x0E'", "'\\x0F'", |
27 | | "'\\x10'", "'\\x11'", "'\\x12'", "'\\x13'", "'\\x14'", "'\\x15'", "'\\x16'", "'\\x17'", |
28 | | "'\\x18'", "'\\x19'", "'\\x1A'", "'\\x1B'", "'\\x1C'", "'\\x1D'", "'\\x1E'", "'\\x1F'", |
29 | | "' '", "'!'", "'\"'", "'#'", "'$'", "'%'", "'&'", "'\\''", |
30 | | "'('", "')'", "'*'", "'+'", "','", "'-'", "'.'", "'/'", |
31 | | "'0'", "'1'", "'2'", "'3'", "'4'", "'5'", "'6'", "'7'", |
32 | | "'8'", "'9'", "':'", "';'", "'<'", "'='", "'>'", "'?'", |
33 | | "'@'", "'A'", "'B'", "'C'", "'D'", "'E'", "'F'", "'G'", |
34 | | "'H'", "'I'", "'J'", "'K'", "'L'", "'M'", "'N'", "'O'", |
35 | | "'P'", "'Q'", "'R'", "'S'", "'T'", "'U'", "'V'", "'W'", |
36 | | "'X'", "'Y'", "'Z'", "'['", "'\'", "']'", "'^'", "'_'", |
37 | | "'`'", "'a'", "'b'", "'c'", "'d'", "'e'", "'f'", "'g'", |
38 | | "'h'", "'i'", "'j'", "'k'", "'l'", "'m'", "'n'", "'o'", |
39 | | "'p'", "'q'", "'r'", "'s'", "'t'", "'u'", "'v'", "'w'", |
40 | | "'x'", "'y'", "'z'", "'{'", "'|'", "'}'", "'~'", "'\\x7F'", |
41 | | |
42 | | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
43 | | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
44 | | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
45 | | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
46 | | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
47 | | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
48 | | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
49 | | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
50 | | |
51 | | "(identifier)", "(number)", "(string)", "(regexp)", |
52 | | |
53 | | "'<='", "'>='", "'=='", "'!='", "'==='", "'!=='", |
54 | | "'<<'", "'>>'", "'>>>'", "'&&'", "'||'", |
55 | | "'+='", "'-='", "'*='", "'/='", "'%='", |
56 | | "'<<='", "'>>='", "'>>>='", "'&='", "'|='", "'^='", |
57 | | "'++'", "'--'", |
58 | | |
59 | | "'break'", "'case'", "'catch'", "'continue'", "'debugger'", |
60 | | "'default'", "'delete'", "'do'", "'else'", "'false'", "'finally'", "'for'", |
61 | | "'function'", "'if'", "'in'", "'instanceof'", "'new'", "'null'", "'return'", |
62 | | "'switch'", "'this'", "'throw'", "'true'", "'try'", "'typeof'", "'var'", |
63 | | "'void'", "'while'", "'with'", |
64 | | }; |
65 | | |
66 | | const char *jsY_tokenstring(int token) |
67 | 0 | { |
68 | 0 | if (token >= 0 && token < (int)nelem(tokenstring)) |
69 | 0 | if (tokenstring[token]) |
70 | 0 | return tokenstring[token]; |
71 | 0 | return "<unknown>"; |
72 | 0 | } |
73 | | |
74 | | static const char *keywords[] = { |
75 | | "break", "case", "catch", "continue", "debugger", "default", "delete", |
76 | | "do", "else", "false", "finally", "for", "function", "if", "in", |
77 | | "instanceof", "new", "null", "return", "switch", "this", "throw", |
78 | | "true", "try", "typeof", "var", "void", "while", "with", |
79 | | }; |
80 | | |
81 | | int jsY_findword(const char *s, const char **list, int num) |
82 | 0 | { |
83 | 0 | int l = 0; |
84 | 0 | int r = num - 1; |
85 | 0 | while (l <= r) { |
86 | 0 | int m = (l + r) >> 1; |
87 | 0 | int c = strcmp(s, list[m]); |
88 | 0 | if (c < 0) |
89 | 0 | r = m - 1; |
90 | 0 | else if (c > 0) |
91 | 0 | l = m + 1; |
92 | 0 | else |
93 | 0 | return m; |
94 | 0 | } |
95 | 0 | return -1; |
96 | 0 | } |
97 | | |
98 | | static int jsY_findkeyword(js_State *J, const char *s) |
99 | 0 | { |
100 | 0 | int i = jsY_findword(s, keywords, nelem(keywords)); |
101 | 0 | if (i >= 0) { |
102 | 0 | J->text = keywords[i]; |
103 | 0 | return TK_BREAK + i; /* first keyword + i */ |
104 | 0 | } |
105 | 0 | J->text = js_intern(J, s); |
106 | 0 | return TK_IDENTIFIER; |
107 | 0 | } |
108 | | |
109 | | int jsY_iswhite(int c) |
110 | 0 | { |
111 | 0 | return c == 0x9 || c == 0xB || c == 0xC || c == 0x20 || c == 0xA0 || c == 0xFEFF; |
112 | 0 | } |
113 | | |
114 | | int jsY_isnewline(int c) |
115 | 0 | { |
116 | 0 | return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029; |
117 | 0 | } |
118 | | |
119 | | #ifndef isalpha |
120 | 0 | #define isalpha(c) ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) |
121 | | #endif |
122 | | #ifndef isdigit |
123 | 0 | #define isdigit(c) (c >= '0' && c <= '9') |
124 | | #endif |
125 | | #ifndef ishex |
126 | 0 | #define ishex(c) ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) |
127 | | #endif |
128 | | |
129 | | static int jsY_isidentifierstart(int c) |
130 | 0 | { |
131 | 0 | return isalpha(c) || c == '$' || c == '_' || isalpharune(c); |
132 | 0 | } |
133 | | |
134 | | static int jsY_isidentifierpart(int c) |
135 | 0 | { |
136 | 0 | return isdigit(c) || isalpha(c) || c == '$' || c == '_' || isalpharune(c); |
137 | 0 | } |
138 | | |
139 | | static int jsY_isdec(int c) |
140 | 0 | { |
141 | 0 | return isdigit(c); |
142 | 0 | } |
143 | | |
144 | | int jsY_ishex(int c) |
145 | 0 | { |
146 | 0 | return isdigit(c) || ishex(c); |
147 | 0 | } |
148 | | |
149 | | int jsY_tohex(int c) |
150 | 0 | { |
151 | 0 | if (c >= '0' && c <= '9') return c - '0'; |
152 | 0 | if (c >= 'a' && c <= 'f') return c - 'a' + 0xA; |
153 | 0 | if (c >= 'A' && c <= 'F') return c - 'A' + 0xA; |
154 | 0 | return 0; |
155 | 0 | } |
156 | | |
157 | | static void jsY_next(js_State *J) |
158 | 0 | { |
159 | 0 | Rune c; |
160 | 0 | if (*J->source == 0) { |
161 | 0 | J->lexchar = EOF; |
162 | 0 | return; |
163 | 0 | } |
164 | 0 | J->source += chartorune(&c, J->source); |
165 | | /* consume CR LF as one unit */ |
166 | 0 | if (c == '\r' && *J->source == '\n') |
167 | 0 | ++J->source; |
168 | 0 | if (jsY_isnewline(c)) { |
169 | 0 | J->line++; |
170 | 0 | c = '\n'; |
171 | 0 | } |
172 | 0 | J->lexchar = c; |
173 | 0 | } |
174 | | |
175 | 0 | #define jsY_accept(J, x) (J->lexchar == x ? (jsY_next(J), 1) : 0) |
176 | | |
177 | 0 | #define jsY_expect(J, x) if (!jsY_accept(J, x)) jsY_error(J, "expected '%c'", x) |
178 | | |
179 | | static void jsY_unescape(js_State *J) |
180 | 0 | { |
181 | 0 | if (jsY_accept(J, '\\')) { |
182 | 0 | if (jsY_accept(J, 'u')) { |
183 | 0 | int x = 0; |
184 | 0 | if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); |
185 | 0 | if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); |
186 | 0 | if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); |
187 | 0 | if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar); |
188 | 0 | J->lexchar = x; |
189 | 0 | return; |
190 | 0 | } |
191 | 0 | error: |
192 | 0 | jsY_error(J, "unexpected escape sequence"); |
193 | 0 | } |
194 | 0 | } |
195 | | |
196 | | static void textinit(js_State *J) |
197 | 0 | { |
198 | 0 | if (!J->lexbuf.text) { |
199 | 0 | J->lexbuf.cap = 4096; |
200 | 0 | J->lexbuf.text = js_malloc(J, J->lexbuf.cap); |
201 | 0 | } |
202 | 0 | J->lexbuf.len = 0; |
203 | 0 | } |
204 | | |
205 | | static void textpush(js_State *J, Rune c) |
206 | 0 | { |
207 | 0 | int n; |
208 | 0 | if (c == EOF) |
209 | 0 | n = 1; |
210 | 0 | else |
211 | 0 | n = runelen(c); |
212 | 0 | if (J->lexbuf.len + n > J->lexbuf.cap) { |
213 | 0 | J->lexbuf.cap = J->lexbuf.cap * 2; |
214 | 0 | J->lexbuf.text = js_realloc(J, J->lexbuf.text, J->lexbuf.cap); |
215 | 0 | } |
216 | 0 | if (c == EOF) |
217 | 0 | J->lexbuf.text[J->lexbuf.len++] = 0; |
218 | 0 | else |
219 | 0 | J->lexbuf.len += runetochar(J->lexbuf.text + J->lexbuf.len, &c); |
220 | 0 | } |
221 | | |
222 | | static char *textend(js_State *J) |
223 | 0 | { |
224 | 0 | textpush(J, EOF); |
225 | 0 | return J->lexbuf.text; |
226 | 0 | } |
227 | | |
228 | | static void lexlinecomment(js_State *J) |
229 | 0 | { |
230 | 0 | while (J->lexchar != EOF && J->lexchar != '\n') |
231 | 0 | jsY_next(J); |
232 | 0 | } |
233 | | |
234 | | static int lexcomment(js_State *J) |
235 | 0 | { |
236 | | /* already consumed initial '/' '*' sequence */ |
237 | 0 | while (J->lexchar != EOF) { |
238 | 0 | if (jsY_accept(J, '*')) { |
239 | 0 | while (J->lexchar == '*') |
240 | 0 | jsY_next(J); |
241 | 0 | if (jsY_accept(J, '/')) |
242 | 0 | return 0; |
243 | 0 | } |
244 | 0 | else |
245 | 0 | jsY_next(J); |
246 | 0 | } |
247 | 0 | return -1; |
248 | 0 | } |
249 | | |
250 | | static double lexhex(js_State *J) |
251 | 0 | { |
252 | 0 | double n = 0; |
253 | 0 | if (!jsY_ishex(J->lexchar)) |
254 | 0 | jsY_error(J, "malformed hexadecimal number"); |
255 | 0 | while (jsY_ishex(J->lexchar)) { |
256 | 0 | n = n * 16 + jsY_tohex(J->lexchar); |
257 | 0 | jsY_next(J); |
258 | 0 | } |
259 | 0 | return n; |
260 | 0 | } |
261 | | |
262 | | #if 0 |
263 | | |
264 | | static double lexinteger(js_State *J) |
265 | | { |
266 | | double n = 0; |
267 | | if (!jsY_isdec(J->lexchar)) |
268 | | jsY_error(J, "malformed number"); |
269 | | while (jsY_isdec(J->lexchar)) { |
270 | | n = n * 10 + (J->lexchar - '0'); |
271 | | jsY_next(J); |
272 | | } |
273 | | return n; |
274 | | } |
275 | | |
276 | | static double lexfraction(js_State *J) |
277 | | { |
278 | | double n = 0; |
279 | | double d = 1; |
280 | | while (jsY_isdec(J->lexchar)) { |
281 | | n = n * 10 + (J->lexchar - '0'); |
282 | | d = d * 10; |
283 | | jsY_next(J); |
284 | | } |
285 | | return n / d; |
286 | | } |
287 | | |
288 | | static double lexexponent(js_State *J) |
289 | | { |
290 | | double sign; |
291 | | if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) { |
292 | | if (jsY_accept(J, '-')) sign = -1; |
293 | | else if (jsY_accept(J, '+')) sign = 1; |
294 | | else sign = 1; |
295 | | return sign * lexinteger(J); |
296 | | } |
297 | | return 0; |
298 | | } |
299 | | |
300 | | static int lexnumber(js_State *J) |
301 | | { |
302 | | double n; |
303 | | double e; |
304 | | |
305 | | if (jsY_accept(J, '0')) { |
306 | | if (jsY_accept(J, 'x') || jsY_accept(J, 'X')) { |
307 | | J->number = lexhex(J); |
308 | | return TK_NUMBER; |
309 | | } |
310 | | if (jsY_isdec(J->lexchar)) |
311 | | jsY_error(J, "number with leading zero"); |
312 | | n = 0; |
313 | | if (jsY_accept(J, '.')) |
314 | | n += lexfraction(J); |
315 | | } else if (jsY_accept(J, '.')) { |
316 | | if (!jsY_isdec(J->lexchar)) |
317 | | return '.'; |
318 | | n = lexfraction(J); |
319 | | } else { |
320 | | n = lexinteger(J); |
321 | | if (jsY_accept(J, '.')) |
322 | | n += lexfraction(J); |
323 | | } |
324 | | |
325 | | e = lexexponent(J); |
326 | | if (e < 0) |
327 | | n /= pow(10, -e); |
328 | | else if (e > 0) |
329 | | n *= pow(10, e); |
330 | | |
331 | | if (jsY_isidentifierstart(J->lexchar)) |
332 | | jsY_error(J, "number with letter suffix"); |
333 | | |
334 | | J->number = n; |
335 | | return TK_NUMBER; |
336 | | } |
337 | | |
338 | | #else |
339 | | |
340 | | static int lexnumber(js_State *J) |
341 | 0 | { |
342 | 0 | const char *s = J->source - 1; |
343 | |
|
344 | 0 | if (jsY_accept(J, '0')) { |
345 | 0 | if (jsY_accept(J, 'x') || jsY_accept(J, 'X')) { |
346 | 0 | J->number = lexhex(J); |
347 | 0 | return TK_NUMBER; |
348 | 0 | } |
349 | 0 | if (jsY_isdec(J->lexchar)) |
350 | 0 | jsY_error(J, "number with leading zero"); |
351 | 0 | if (jsY_accept(J, '.')) { |
352 | 0 | while (jsY_isdec(J->lexchar)) |
353 | 0 | jsY_next(J); |
354 | 0 | } |
355 | 0 | } else if (jsY_accept(J, '.')) { |
356 | 0 | if (!jsY_isdec(J->lexchar)) |
357 | 0 | return '.'; |
358 | 0 | while (jsY_isdec(J->lexchar)) |
359 | 0 | jsY_next(J); |
360 | 0 | } else { |
361 | 0 | while (jsY_isdec(J->lexchar)) |
362 | 0 | jsY_next(J); |
363 | 0 | if (jsY_accept(J, '.')) { |
364 | 0 | while (jsY_isdec(J->lexchar)) |
365 | 0 | jsY_next(J); |
366 | 0 | } |
367 | 0 | } |
368 | | |
369 | 0 | if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) { |
370 | 0 | if (J->lexchar == '-' || J->lexchar == '+') |
371 | 0 | jsY_next(J); |
372 | 0 | if (jsY_isdec(J->lexchar)) |
373 | 0 | while (jsY_isdec(J->lexchar)) |
374 | 0 | jsY_next(J); |
375 | 0 | else |
376 | 0 | jsY_error(J, "missing exponent"); |
377 | 0 | } |
378 | | |
379 | 0 | if (jsY_isidentifierstart(J->lexchar)) |
380 | 0 | jsY_error(J, "number with letter suffix"); |
381 | | |
382 | 0 | J->number = js_strtod(s, NULL); |
383 | 0 | return TK_NUMBER; |
384 | 0 | } |
385 | | |
386 | | #endif |
387 | | |
388 | | static int lexescape(js_State *J) |
389 | 0 | { |
390 | 0 | int x = 0; |
391 | | |
392 | | /* already consumed '\' */ |
393 | |
|
394 | 0 | if (jsY_accept(J, '\n')) |
395 | 0 | return 0; |
396 | | |
397 | 0 | switch (J->lexchar) { |
398 | 0 | case EOF: jsY_error(J, "unterminated escape sequence"); |
399 | 0 | case 'u': |
400 | 0 | jsY_next(J); |
401 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); } |
402 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); } |
403 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); } |
404 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); } |
405 | 0 | textpush(J, x); |
406 | 0 | break; |
407 | 0 | case 'x': |
408 | 0 | jsY_next(J); |
409 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); } |
410 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); } |
411 | 0 | textpush(J, x); |
412 | 0 | break; |
413 | 0 | case '0': textpush(J, 0); jsY_next(J); break; |
414 | 0 | case '\\': textpush(J, '\\'); jsY_next(J); break; |
415 | 0 | case '\'': textpush(J, '\''); jsY_next(J); break; |
416 | 0 | case '"': textpush(J, '"'); jsY_next(J); break; |
417 | 0 | case 'b': textpush(J, '\b'); jsY_next(J); break; |
418 | 0 | case 'f': textpush(J, '\f'); jsY_next(J); break; |
419 | 0 | case 'n': textpush(J, '\n'); jsY_next(J); break; |
420 | 0 | case 'r': textpush(J, '\r'); jsY_next(J); break; |
421 | 0 | case 't': textpush(J, '\t'); jsY_next(J); break; |
422 | 0 | case 'v': textpush(J, '\v'); jsY_next(J); break; |
423 | 0 | default: textpush(J, J->lexchar); jsY_next(J); break; |
424 | 0 | } |
425 | 0 | return 0; |
426 | 0 | } |
427 | | |
428 | | static int lexstring(js_State *J) |
429 | 0 | { |
430 | 0 | const char *s; |
431 | |
|
432 | 0 | int q = J->lexchar; |
433 | 0 | jsY_next(J); |
434 | |
|
435 | 0 | textinit(J); |
436 | |
|
437 | 0 | while (J->lexchar != q) { |
438 | 0 | if (J->lexchar == EOF || J->lexchar == '\n') |
439 | 0 | jsY_error(J, "string not terminated"); |
440 | 0 | if (jsY_accept(J, '\\')) { |
441 | 0 | if (lexescape(J)) |
442 | 0 | jsY_error(J, "malformed escape sequence"); |
443 | 0 | } else { |
444 | 0 | textpush(J, J->lexchar); |
445 | 0 | jsY_next(J); |
446 | 0 | } |
447 | 0 | } |
448 | 0 | jsY_expect(J, q); |
449 | | |
450 | 0 | s = textend(J); |
451 | |
|
452 | 0 | J->text = js_intern(J, s); |
453 | 0 | return TK_STRING; |
454 | 0 | } |
455 | | |
456 | | /* the ugliest language wart ever... */ |
457 | | static int isregexpcontext(int last) |
458 | 0 | { |
459 | 0 | switch (last) { |
460 | 0 | case ']': |
461 | 0 | case ')': |
462 | 0 | case '}': |
463 | 0 | case TK_IDENTIFIER: |
464 | 0 | case TK_NUMBER: |
465 | 0 | case TK_STRING: |
466 | 0 | case TK_FALSE: |
467 | 0 | case TK_NULL: |
468 | 0 | case TK_THIS: |
469 | 0 | case TK_TRUE: |
470 | 0 | return 0; |
471 | 0 | default: |
472 | 0 | return 1; |
473 | 0 | } |
474 | 0 | } |
475 | | |
476 | | static int lexregexp(js_State *J) |
477 | 0 | { |
478 | 0 | const char *s; |
479 | 0 | int g, m, i; |
480 | 0 | int inclass = 0; |
481 | | |
482 | | /* already consumed initial '/' */ |
483 | |
|
484 | 0 | textinit(J); |
485 | | |
486 | | /* regexp body */ |
487 | 0 | while (J->lexchar != '/' || inclass) { |
488 | 0 | if (J->lexchar == EOF || J->lexchar == '\n') { |
489 | 0 | jsY_error(J, "regular expression not terminated"); |
490 | 0 | } else if (jsY_accept(J, '\\')) { |
491 | 0 | if (jsY_accept(J, '/')) { |
492 | 0 | textpush(J, '/'); |
493 | 0 | } else { |
494 | 0 | textpush(J, '\\'); |
495 | 0 | if (J->lexchar == EOF || J->lexchar == '\n') |
496 | 0 | jsY_error(J, "regular expression not terminated"); |
497 | 0 | textpush(J, J->lexchar); |
498 | 0 | jsY_next(J); |
499 | 0 | } |
500 | 0 | } else { |
501 | 0 | if (J->lexchar == '[' && !inclass) |
502 | 0 | inclass = 1; |
503 | 0 | if (J->lexchar == ']' && inclass) |
504 | 0 | inclass = 0; |
505 | 0 | textpush(J, J->lexchar); |
506 | 0 | jsY_next(J); |
507 | 0 | } |
508 | 0 | } |
509 | 0 | jsY_expect(J, '/'); |
510 | | |
511 | 0 | s = textend(J); |
512 | | |
513 | | /* regexp flags */ |
514 | 0 | g = i = m = 0; |
515 | |
|
516 | 0 | while (jsY_isidentifierpart(J->lexchar)) { |
517 | 0 | if (jsY_accept(J, 'g')) ++g; |
518 | 0 | else if (jsY_accept(J, 'i')) ++i; |
519 | 0 | else if (jsY_accept(J, 'm')) ++m; |
520 | 0 | else jsY_error(J, "illegal flag in regular expression: %c", J->lexchar); |
521 | 0 | } |
522 | | |
523 | 0 | if (g > 1 || i > 1 || m > 1) |
524 | 0 | jsY_error(J, "duplicated flag in regular expression"); |
525 | | |
526 | 0 | J->text = js_intern(J, s); |
527 | 0 | J->number = 0; |
528 | 0 | if (g) J->number += JS_REGEXP_G; |
529 | 0 | if (i) J->number += JS_REGEXP_I; |
530 | 0 | if (m) J->number += JS_REGEXP_M; |
531 | 0 | return TK_REGEXP; |
532 | 0 | } |
533 | | |
534 | | /* simple "return [no Line Terminator here] ..." contexts */ |
535 | | static int isnlthcontext(int last) |
536 | 0 | { |
537 | 0 | switch (last) { |
538 | 0 | case TK_BREAK: |
539 | 0 | case TK_CONTINUE: |
540 | 0 | case TK_RETURN: |
541 | 0 | case TK_THROW: |
542 | 0 | return 1; |
543 | 0 | default: |
544 | 0 | return 0; |
545 | 0 | } |
546 | 0 | } |
547 | | |
548 | | static int jsY_lexx(js_State *J) |
549 | 0 | { |
550 | 0 | J->newline = 0; |
551 | |
|
552 | 0 | while (1) { |
553 | 0 | J->lexline = J->line; /* save location of beginning of token */ |
554 | |
|
555 | 0 | while (jsY_iswhite(J->lexchar)) |
556 | 0 | jsY_next(J); |
557 | |
|
558 | 0 | if (jsY_accept(J, '\n')) { |
559 | 0 | J->newline = 1; |
560 | 0 | if (isnlthcontext(J->lasttoken)) |
561 | 0 | return ';'; |
562 | 0 | continue; |
563 | 0 | } |
564 | | |
565 | 0 | if (jsY_accept(J, '/')) { |
566 | 0 | if (jsY_accept(J, '/')) { |
567 | 0 | lexlinecomment(J); |
568 | 0 | continue; |
569 | 0 | } else if (jsY_accept(J, '*')) { |
570 | 0 | if (lexcomment(J)) |
571 | 0 | jsY_error(J, "multi-line comment not terminated"); |
572 | 0 | continue; |
573 | 0 | } else if (isregexpcontext(J->lasttoken)) { |
574 | 0 | return lexregexp(J); |
575 | 0 | } else if (jsY_accept(J, '=')) { |
576 | 0 | return TK_DIV_ASS; |
577 | 0 | } else { |
578 | 0 | return '/'; |
579 | 0 | } |
580 | 0 | } |
581 | | |
582 | 0 | if (J->lexchar >= '0' && J->lexchar <= '9') { |
583 | 0 | return lexnumber(J); |
584 | 0 | } |
585 | | |
586 | 0 | switch (J->lexchar) { |
587 | 0 | case '(': jsY_next(J); return '('; |
588 | 0 | case ')': jsY_next(J); return ')'; |
589 | 0 | case ',': jsY_next(J); return ','; |
590 | 0 | case ':': jsY_next(J); return ':'; |
591 | 0 | case ';': jsY_next(J); return ';'; |
592 | 0 | case '?': jsY_next(J); return '?'; |
593 | 0 | case '[': jsY_next(J); return '['; |
594 | 0 | case ']': jsY_next(J); return ']'; |
595 | 0 | case '{': jsY_next(J); return '{'; |
596 | 0 | case '}': jsY_next(J); return '}'; |
597 | 0 | case '~': jsY_next(J); return '~'; |
598 | | |
599 | 0 | case '\'': |
600 | 0 | case '"': |
601 | 0 | return lexstring(J); |
602 | | |
603 | 0 | case '.': |
604 | 0 | return lexnumber(J); |
605 | | |
606 | 0 | case '<': |
607 | 0 | jsY_next(J); |
608 | 0 | if (jsY_accept(J, '<')) { |
609 | 0 | if (jsY_accept(J, '=')) |
610 | 0 | return TK_SHL_ASS; |
611 | 0 | return TK_SHL; |
612 | 0 | } |
613 | 0 | if (jsY_accept(J, '=')) |
614 | 0 | return TK_LE; |
615 | 0 | return '<'; |
616 | | |
617 | 0 | case '>': |
618 | 0 | jsY_next(J); |
619 | 0 | if (jsY_accept(J, '>')) { |
620 | 0 | if (jsY_accept(J, '>')) { |
621 | 0 | if (jsY_accept(J, '=')) |
622 | 0 | return TK_USHR_ASS; |
623 | 0 | return TK_USHR; |
624 | 0 | } |
625 | 0 | if (jsY_accept(J, '=')) |
626 | 0 | return TK_SHR_ASS; |
627 | 0 | return TK_SHR; |
628 | 0 | } |
629 | 0 | if (jsY_accept(J, '=')) |
630 | 0 | return TK_GE; |
631 | 0 | return '>'; |
632 | | |
633 | 0 | case '=': |
634 | 0 | jsY_next(J); |
635 | 0 | if (jsY_accept(J, '=')) { |
636 | 0 | if (jsY_accept(J, '=')) |
637 | 0 | return TK_STRICTEQ; |
638 | 0 | return TK_EQ; |
639 | 0 | } |
640 | 0 | return '='; |
641 | | |
642 | 0 | case '!': |
643 | 0 | jsY_next(J); |
644 | 0 | if (jsY_accept(J, '=')) { |
645 | 0 | if (jsY_accept(J, '=')) |
646 | 0 | return TK_STRICTNE; |
647 | 0 | return TK_NE; |
648 | 0 | } |
649 | 0 | return '!'; |
650 | | |
651 | 0 | case '+': |
652 | 0 | jsY_next(J); |
653 | 0 | if (jsY_accept(J, '+')) |
654 | 0 | return TK_INC; |
655 | 0 | if (jsY_accept(J, '=')) |
656 | 0 | return TK_ADD_ASS; |
657 | 0 | return '+'; |
658 | | |
659 | 0 | case '-': |
660 | 0 | jsY_next(J); |
661 | 0 | if (jsY_accept(J, '-')) |
662 | 0 | return TK_DEC; |
663 | 0 | if (jsY_accept(J, '=')) |
664 | 0 | return TK_SUB_ASS; |
665 | 0 | return '-'; |
666 | | |
667 | 0 | case '*': |
668 | 0 | jsY_next(J); |
669 | 0 | if (jsY_accept(J, '=')) |
670 | 0 | return TK_MUL_ASS; |
671 | 0 | return '*'; |
672 | | |
673 | 0 | case '%': |
674 | 0 | jsY_next(J); |
675 | 0 | if (jsY_accept(J, '=')) |
676 | 0 | return TK_MOD_ASS; |
677 | 0 | return '%'; |
678 | | |
679 | 0 | case '&': |
680 | 0 | jsY_next(J); |
681 | 0 | if (jsY_accept(J, '&')) |
682 | 0 | return TK_AND; |
683 | 0 | if (jsY_accept(J, '=')) |
684 | 0 | return TK_AND_ASS; |
685 | 0 | return '&'; |
686 | | |
687 | 0 | case '|': |
688 | 0 | jsY_next(J); |
689 | 0 | if (jsY_accept(J, '|')) |
690 | 0 | return TK_OR; |
691 | 0 | if (jsY_accept(J, '=')) |
692 | 0 | return TK_OR_ASS; |
693 | 0 | return '|'; |
694 | | |
695 | 0 | case '^': |
696 | 0 | jsY_next(J); |
697 | 0 | if (jsY_accept(J, '=')) |
698 | 0 | return TK_XOR_ASS; |
699 | 0 | return '^'; |
700 | | |
701 | 0 | case EOF: |
702 | 0 | return 0; /* EOF */ |
703 | 0 | } |
704 | | |
705 | | /* Handle \uXXXX escapes in identifiers */ |
706 | 0 | jsY_unescape(J); |
707 | 0 | if (jsY_isidentifierstart(J->lexchar)) { |
708 | 0 | textinit(J); |
709 | 0 | textpush(J, J->lexchar); |
710 | |
|
711 | 0 | jsY_next(J); |
712 | 0 | jsY_unescape(J); |
713 | 0 | while (jsY_isidentifierpart(J->lexchar)) { |
714 | 0 | textpush(J, J->lexchar); |
715 | 0 | jsY_next(J); |
716 | 0 | jsY_unescape(J); |
717 | 0 | } |
718 | |
|
719 | 0 | textend(J); |
720 | |
|
721 | 0 | return jsY_findkeyword(J, J->lexbuf.text); |
722 | 0 | } |
723 | | |
724 | 0 | if (J->lexchar >= 0x20 && J->lexchar <= 0x7E) |
725 | 0 | jsY_error(J, "unexpected character: '%c'", J->lexchar); |
726 | 0 | jsY_error(J, "unexpected character: \\u%04X", J->lexchar); |
727 | 0 | } |
728 | 0 | } |
729 | | |
730 | | void jsY_initlex(js_State *J, const char *filename, const char *source) |
731 | 0 | { |
732 | 0 | J->filename = filename; |
733 | 0 | J->source = source; |
734 | 0 | J->line = 1; |
735 | 0 | J->lasttoken = 0; |
736 | 0 | jsY_next(J); /* load first lookahead character */ |
737 | 0 | } |
738 | | |
739 | | int jsY_lex(js_State *J) |
740 | 0 | { |
741 | 0 | return J->lasttoken = jsY_lexx(J); |
742 | 0 | } |
743 | | |
744 | | static int lexjsonnumber(js_State *J) |
745 | 0 | { |
746 | 0 | const char *s = J->source - 1; |
747 | |
|
748 | 0 | if (J->lexchar == '-') |
749 | 0 | jsY_next(J); |
750 | |
|
751 | 0 | if (J->lexchar == '0') |
752 | 0 | jsY_next(J); |
753 | 0 | else if (J->lexchar >= '1' && J->lexchar <= '9') |
754 | 0 | while (isdigit(J->lexchar)) |
755 | 0 | jsY_next(J); |
756 | 0 | else |
757 | 0 | jsY_error(J, "unexpected non-digit"); |
758 | | |
759 | 0 | if (jsY_accept(J, '.')) { |
760 | 0 | if (isdigit(J->lexchar)) |
761 | 0 | while (isdigit(J->lexchar)) |
762 | 0 | jsY_next(J); |
763 | 0 | else |
764 | 0 | jsY_error(J, "missing digits after decimal point"); |
765 | 0 | } |
766 | | |
767 | 0 | if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) { |
768 | 0 | if (J->lexchar == '-' || J->lexchar == '+') |
769 | 0 | jsY_next(J); |
770 | 0 | if (isdigit(J->lexchar)) |
771 | 0 | while (isdigit(J->lexchar)) |
772 | 0 | jsY_next(J); |
773 | 0 | else |
774 | 0 | jsY_error(J, "missing digits after exponent indicator"); |
775 | 0 | } |
776 | | |
777 | 0 | J->number = js_strtod(s, NULL); |
778 | 0 | return TK_NUMBER; |
779 | 0 | } |
780 | | |
781 | | static int lexjsonescape(js_State *J) |
782 | 0 | { |
783 | 0 | int x = 0; |
784 | | |
785 | | /* already consumed '\' */ |
786 | |
|
787 | 0 | switch (J->lexchar) { |
788 | 0 | default: jsY_error(J, "invalid escape sequence"); |
789 | 0 | case 'u': |
790 | 0 | jsY_next(J); |
791 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); } |
792 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); } |
793 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); } |
794 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); } |
795 | 0 | textpush(J, x); |
796 | 0 | break; |
797 | 0 | case '"': textpush(J, '"'); jsY_next(J); break; |
798 | 0 | case '\\': textpush(J, '\\'); jsY_next(J); break; |
799 | 0 | case '/': textpush(J, '/'); jsY_next(J); break; |
800 | 0 | case 'b': textpush(J, '\b'); jsY_next(J); break; |
801 | 0 | case 'f': textpush(J, '\f'); jsY_next(J); break; |
802 | 0 | case 'n': textpush(J, '\n'); jsY_next(J); break; |
803 | 0 | case 'r': textpush(J, '\r'); jsY_next(J); break; |
804 | 0 | case 't': textpush(J, '\t'); jsY_next(J); break; |
805 | 0 | } |
806 | 0 | return 0; |
807 | 0 | } |
808 | | |
809 | | static int lexjsonstring(js_State *J) |
810 | 0 | { |
811 | 0 | const char *s; |
812 | |
|
813 | 0 | textinit(J); |
814 | |
|
815 | 0 | while (J->lexchar != '"') { |
816 | 0 | if (J->lexchar == EOF) |
817 | 0 | jsY_error(J, "unterminated string"); |
818 | 0 | else if (J->lexchar < 32) |
819 | 0 | jsY_error(J, "invalid control character in string"); |
820 | 0 | else if (jsY_accept(J, '\\')) |
821 | 0 | lexjsonescape(J); |
822 | 0 | else { |
823 | 0 | textpush(J, J->lexchar); |
824 | 0 | jsY_next(J); |
825 | 0 | } |
826 | 0 | } |
827 | 0 | jsY_expect(J, '"'); |
828 | | |
829 | 0 | s = textend(J); |
830 | |
|
831 | 0 | J->text = js_intern(J, s); |
832 | 0 | return TK_STRING; |
833 | 0 | } |
834 | | |
835 | | int jsY_lexjson(js_State *J) |
836 | 0 | { |
837 | 0 | while (1) { |
838 | 0 | J->lexline = J->line; /* save location of beginning of token */ |
839 | |
|
840 | 0 | while (jsY_iswhite(J->lexchar) || J->lexchar == '\n') |
841 | 0 | jsY_next(J); |
842 | |
|
843 | 0 | if ((J->lexchar >= '0' && J->lexchar <= '9') || J->lexchar == '-') |
844 | 0 | return lexjsonnumber(J); |
845 | | |
846 | 0 | switch (J->lexchar) { |
847 | 0 | case ',': jsY_next(J); return ','; |
848 | 0 | case ':': jsY_next(J); return ':'; |
849 | 0 | case '[': jsY_next(J); return '['; |
850 | 0 | case ']': jsY_next(J); return ']'; |
851 | 0 | case '{': jsY_next(J); return '{'; |
852 | 0 | case '}': jsY_next(J); return '}'; |
853 | | |
854 | 0 | case '"': |
855 | 0 | jsY_next(J); |
856 | 0 | return lexjsonstring(J); |
857 | | |
858 | 0 | case 'f': |
859 | 0 | jsY_next(J); jsY_expect(J, 'a'); jsY_expect(J, 'l'); jsY_expect(J, 's'); jsY_expect(J, 'e'); |
860 | 0 | return TK_FALSE; |
861 | | |
862 | 0 | case 'n': |
863 | 0 | jsY_next(J); jsY_expect(J, 'u'); jsY_expect(J, 'l'); jsY_expect(J, 'l'); |
864 | 0 | return TK_NULL; |
865 | | |
866 | 0 | case 't': |
867 | 0 | jsY_next(J); jsY_expect(J, 'r'); jsY_expect(J, 'u'); jsY_expect(J, 'e'); |
868 | 0 | return TK_TRUE; |
869 | | |
870 | 0 | case EOF: |
871 | 0 | return 0; /* EOF */ |
872 | 0 | } |
873 | | |
874 | 0 | if (J->lexchar >= 0x20 && J->lexchar <= 0x7E) |
875 | 0 | jsY_error(J, "unexpected character: '%c'", J->lexchar); |
876 | 0 | jsY_error(J, "unexpected character: \\u%04X", J->lexchar); |
877 | 0 | } |
878 | 0 | } |