/src/mupdf/thirdparty/mujs/jslex.c
Line | Count | Source |
1 | | #include "jsi.h" |
2 | | #include "utf.h" |
3 | | |
4 | | JS_NORETURN static void jsY_error(js_State *J, const char *fmt, ...) JS_PRINTFLIKE(2,3); |
5 | | |
6 | | static void jsY_error(js_State *J, const char *fmt, ...) |
7 | 0 | { |
8 | 0 | va_list ap; |
9 | 0 | char buf[512]; |
10 | 0 | char msgbuf[256]; |
11 | |
|
12 | 0 | va_start(ap, fmt); |
13 | 0 | vsnprintf(msgbuf, 256, fmt, ap); |
14 | 0 | va_end(ap); |
15 | |
|
16 | 0 | snprintf(buf, 256, "%s:%d: ", J->filename, J->lexline); |
17 | 0 | strcat(buf, msgbuf); |
18 | |
|
19 | 0 | js_newsyntaxerror(J, buf); |
20 | 0 | js_throw(J); |
21 | 0 | } |
22 | | |
23 | | static const char *tokenstring[] = { |
24 | | "(end-of-file)", |
25 | | "'\\x01'", "'\\x02'", "'\\x03'", "'\\x04'", "'\\x05'", "'\\x06'", "'\\x07'", |
26 | | "'\\x08'", "'\\x09'", "'\\x0A'", "'\\x0B'", "'\\x0C'", "'\\x0D'", "'\\x0E'", "'\\x0F'", |
27 | | "'\\x10'", "'\\x11'", "'\\x12'", "'\\x13'", "'\\x14'", "'\\x15'", "'\\x16'", "'\\x17'", |
28 | | "'\\x18'", "'\\x19'", "'\\x1A'", "'\\x1B'", "'\\x1C'", "'\\x1D'", "'\\x1E'", "'\\x1F'", |
29 | | "' '", "'!'", "'\"'", "'#'", "'$'", "'%'", "'&'", "'\\''", |
30 | | "'('", "')'", "'*'", "'+'", "','", "'-'", "'.'", "'/'", |
31 | | "'0'", "'1'", "'2'", "'3'", "'4'", "'5'", "'6'", "'7'", |
32 | | "'8'", "'9'", "':'", "';'", "'<'", "'='", "'>'", "'?'", |
33 | | "'@'", "'A'", "'B'", "'C'", "'D'", "'E'", "'F'", "'G'", |
34 | | "'H'", "'I'", "'J'", "'K'", "'L'", "'M'", "'N'", "'O'", |
35 | | "'P'", "'Q'", "'R'", "'S'", "'T'", "'U'", "'V'", "'W'", |
36 | | "'X'", "'Y'", "'Z'", "'['", "'\'", "']'", "'^'", "'_'", |
37 | | "'`'", "'a'", "'b'", "'c'", "'d'", "'e'", "'f'", "'g'", |
38 | | "'h'", "'i'", "'j'", "'k'", "'l'", "'m'", "'n'", "'o'", |
39 | | "'p'", "'q'", "'r'", "'s'", "'t'", "'u'", "'v'", "'w'", |
40 | | "'x'", "'y'", "'z'", "'{'", "'|'", "'}'", "'~'", "'\\x7F'", |
41 | | |
42 | | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
43 | | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
44 | | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
45 | | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
46 | | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
47 | | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
48 | | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
49 | | 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
50 | | |
51 | | "(identifier)", "(number)", "(string)", "(regexp)", |
52 | | |
53 | | "'<='", "'>='", "'=='", "'!='", "'==='", "'!=='", |
54 | | "'<<'", "'>>'", "'>>>'", "'&&'", "'||'", |
55 | | "'+='", "'-='", "'*='", "'/='", "'%='", |
56 | | "'<<='", "'>>='", "'>>>='", "'&='", "'|='", "'^='", |
57 | | "'++'", "'--'", |
58 | | |
59 | | "'break'", "'case'", "'catch'", "'continue'", "'debugger'", |
60 | | "'default'", "'delete'", "'do'", "'else'", "'false'", "'finally'", "'for'", |
61 | | "'function'", "'if'", "'in'", "'instanceof'", "'new'", "'null'", "'return'", |
62 | | "'switch'", "'this'", "'throw'", "'true'", "'try'", "'typeof'", "'var'", |
63 | | "'void'", "'while'", "'with'", |
64 | | }; |
65 | | |
66 | | const char *jsY_tokenstring(int token) |
67 | 0 | { |
68 | 0 | if (token >= 0 && token < (int)nelem(tokenstring)) |
69 | 0 | if (tokenstring[token]) |
70 | 0 | return tokenstring[token]; |
71 | 0 | return "<unknown>"; |
72 | 0 | } |
73 | | |
74 | | static const char *keywords[] = { |
75 | | "break", "case", "catch", "continue", "debugger", "default", "delete", |
76 | | "do", "else", "false", "finally", "for", "function", "if", "in", |
77 | | "instanceof", "new", "null", "return", "switch", "this", "throw", |
78 | | "true", "try", "typeof", "var", "void", "while", "with", |
79 | | }; |
80 | | |
81 | | int jsY_findword(const char *s, const char **list, int num) |
82 | 0 | { |
83 | 0 | int l = 0; |
84 | 0 | int r = num - 1; |
85 | 0 | while (l <= r) { |
86 | 0 | int m = (l + r) >> 1; |
87 | 0 | int c = strcmp(s, list[m]); |
88 | 0 | if (c < 0) |
89 | 0 | r = m - 1; |
90 | 0 | else if (c > 0) |
91 | 0 | l = m + 1; |
92 | 0 | else |
93 | 0 | return m; |
94 | 0 | } |
95 | 0 | return -1; |
96 | 0 | } |
97 | | |
98 | | static int jsY_findkeyword(js_State *J, const char *s) |
99 | 0 | { |
100 | 0 | int i = jsY_findword(s, keywords, nelem(keywords)); |
101 | 0 | if (i >= 0) { |
102 | 0 | J->text = keywords[i]; |
103 | 0 | return TK_BREAK + i; /* first keyword + i */ |
104 | 0 | } |
105 | 0 | J->text = s; |
106 | 0 | return TK_IDENTIFIER; |
107 | 0 | } |
108 | | |
109 | | int jsY_iswhite(int c) |
110 | 0 | { |
111 | 0 | return c == 0x9 || c == 0xB || c == 0xC || c == 0x20 || c == 0xA0 || c == 0xFEFF; |
112 | 0 | } |
113 | | |
114 | | int jsY_isnewline(int c) |
115 | 0 | { |
116 | 0 | return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029; |
117 | 0 | } |
118 | | |
119 | | #ifndef isalpha |
120 | 0 | #define isalpha(c) ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) |
121 | | #endif |
122 | | #ifndef isdigit |
123 | 0 | #define isdigit(c) (c >= '0' && c <= '9') |
124 | | #endif |
125 | | #ifndef ishex |
126 | 0 | #define ishex(c) ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) |
127 | | #endif |
128 | | |
129 | | static int jsY_isidentifierstart(int c) |
130 | 0 | { |
131 | 0 | return isalpha(c) || c == '$' || c == '_' || isalpharune(c); |
132 | 0 | } |
133 | | |
134 | | static int jsY_isidentifierpart(int c) |
135 | 0 | { |
136 | 0 | return isdigit(c) || isalpha(c) || c == '$' || c == '_' || isalpharune(c); |
137 | 0 | } |
138 | | |
139 | | static int jsY_isdec(int c) |
140 | 0 | { |
141 | 0 | return isdigit(c); |
142 | 0 | } |
143 | | |
144 | | int jsY_ishex(int c) |
145 | 0 | { |
146 | 0 | return isdigit(c) || ishex(c); |
147 | 0 | } |
148 | | |
149 | | int jsY_tohex(int c) |
150 | 0 | { |
151 | 0 | if (c >= '0' && c <= '9') return c - '0'; |
152 | 0 | if (c >= 'a' && c <= 'f') return c - 'a' + 0xA; |
153 | 0 | if (c >= 'A' && c <= 'F') return c - 'A' + 0xA; |
154 | 0 | return 0; |
155 | 0 | } |
156 | | |
157 | | static void jsY_next(js_State *J) |
158 | 0 | { |
159 | 0 | Rune c; |
160 | 0 | if (*J->source == 0) { |
161 | 0 | J->lexchar = EOF; |
162 | 0 | return; |
163 | 0 | } |
164 | 0 | J->source += chartorune(&c, J->source); |
165 | | /* consume CR LF as one unit */ |
166 | 0 | if (c == '\r' && *J->source == '\n') |
167 | 0 | ++J->source; |
168 | 0 | if (jsY_isnewline(c)) { |
169 | 0 | J->line++; |
170 | 0 | c = '\n'; |
171 | 0 | } |
172 | 0 | J->lexchar = c; |
173 | 0 | } |
174 | | |
175 | 0 | #define jsY_accept(J, x) (J->lexchar == x ? (jsY_next(J), 1) : 0) |
176 | | |
177 | 0 | #define jsY_expect(J, x) if (!jsY_accept(J, x)) jsY_error(J, "expected '%c'", x) |
178 | | |
179 | | static void jsY_unescape(js_State *J) |
180 | 0 | { |
181 | 0 | if (jsY_accept(J, '\\')) { |
182 | 0 | if (jsY_accept(J, 'u')) { |
183 | 0 | int x = 0; |
184 | 0 | if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); |
185 | 0 | if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); |
186 | 0 | if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); |
187 | 0 | if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar); |
188 | 0 | J->lexchar = x; |
189 | 0 | return; |
190 | 0 | } |
191 | 0 | error: |
192 | 0 | jsY_error(J, "unexpected escape sequence"); |
193 | 0 | } |
194 | 0 | } |
195 | | |
196 | | static void textinit(js_State *J) |
197 | 0 | { |
198 | 0 | if (!J->lexbuf.text) { |
199 | 0 | J->lexbuf.cap = 4096; |
200 | 0 | J->lexbuf.text = js_malloc(J, J->lexbuf.cap); |
201 | 0 | } |
202 | 0 | J->lexbuf.len = 0; |
203 | 0 | } |
204 | | |
205 | | static void textpush(js_State *J, Rune c) |
206 | 0 | { |
207 | 0 | int n, newcap; |
208 | 0 | if (c == EOF) |
209 | 0 | n = 1; |
210 | 0 | else |
211 | 0 | n = runelen(c); |
212 | 0 | if (J->lexbuf.len + n > J->lexbuf.cap) { |
213 | 0 | newcap = J->lexbuf.cap * 2; |
214 | 0 | J->lexbuf.text = js_realloc(J, J->lexbuf.text, J->lexbuf.cap); |
215 | 0 | J->lexbuf.cap = newcap; |
216 | 0 | } |
217 | 0 | if (c == EOF) |
218 | 0 | J->lexbuf.text[J->lexbuf.len++] = 0; |
219 | 0 | else |
220 | 0 | J->lexbuf.len += runetochar(J->lexbuf.text + J->lexbuf.len, &c); |
221 | 0 | } |
222 | | |
223 | | static char *textend(js_State *J) |
224 | 0 | { |
225 | 0 | textpush(J, EOF); |
226 | 0 | return J->lexbuf.text; |
227 | 0 | } |
228 | | |
229 | | static void lexlinecomment(js_State *J) |
230 | 0 | { |
231 | 0 | while (J->lexchar != EOF && J->lexchar != '\n') |
232 | 0 | jsY_next(J); |
233 | 0 | } |
234 | | |
235 | | static int lexcomment(js_State *J) |
236 | 0 | { |
237 | | /* already consumed initial '/' '*' sequence */ |
238 | 0 | while (J->lexchar != EOF) { |
239 | 0 | if (jsY_accept(J, '*')) { |
240 | 0 | while (J->lexchar == '*') |
241 | 0 | jsY_next(J); |
242 | 0 | if (jsY_accept(J, '/')) |
243 | 0 | return 0; |
244 | 0 | } |
245 | 0 | else |
246 | 0 | jsY_next(J); |
247 | 0 | } |
248 | 0 | return -1; |
249 | 0 | } |
250 | | |
251 | | static double lexhex(js_State *J) |
252 | 0 | { |
253 | 0 | double n = 0; |
254 | 0 | if (!jsY_ishex(J->lexchar)) |
255 | 0 | jsY_error(J, "malformed hexadecimal number"); |
256 | 0 | while (jsY_ishex(J->lexchar)) { |
257 | 0 | n = n * 16 + jsY_tohex(J->lexchar); |
258 | 0 | jsY_next(J); |
259 | 0 | } |
260 | 0 | return n; |
261 | 0 | } |
262 | | |
263 | | #if 0 |
264 | | |
265 | | static double lexinteger(js_State *J) |
266 | | { |
267 | | double n = 0; |
268 | | if (!jsY_isdec(J->lexchar)) |
269 | | jsY_error(J, "malformed number"); |
270 | | while (jsY_isdec(J->lexchar)) { |
271 | | n = n * 10 + (J->lexchar - '0'); |
272 | | jsY_next(J); |
273 | | } |
274 | | return n; |
275 | | } |
276 | | |
277 | | static double lexfraction(js_State *J) |
278 | | { |
279 | | double n = 0; |
280 | | double d = 1; |
281 | | while (jsY_isdec(J->lexchar)) { |
282 | | n = n * 10 + (J->lexchar - '0'); |
283 | | d = d * 10; |
284 | | jsY_next(J); |
285 | | } |
286 | | return n / d; |
287 | | } |
288 | | |
289 | | static double lexexponent(js_State *J) |
290 | | { |
291 | | double sign; |
292 | | if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) { |
293 | | if (jsY_accept(J, '-')) sign = -1; |
294 | | else if (jsY_accept(J, '+')) sign = 1; |
295 | | else sign = 1; |
296 | | return sign * lexinteger(J); |
297 | | } |
298 | | return 0; |
299 | | } |
300 | | |
301 | | static int lexnumber(js_State *J) |
302 | | { |
303 | | double n; |
304 | | double e; |
305 | | |
306 | | if (jsY_accept(J, '0')) { |
307 | | if (jsY_accept(J, 'x') || jsY_accept(J, 'X')) { |
308 | | J->number = lexhex(J); |
309 | | return TK_NUMBER; |
310 | | } |
311 | | if (jsY_isdec(J->lexchar)) |
312 | | jsY_error(J, "number with leading zero"); |
313 | | n = 0; |
314 | | if (jsY_accept(J, '.')) |
315 | | n += lexfraction(J); |
316 | | } else if (jsY_accept(J, '.')) { |
317 | | if (!jsY_isdec(J->lexchar)) |
318 | | return '.'; |
319 | | n = lexfraction(J); |
320 | | } else { |
321 | | n = lexinteger(J); |
322 | | if (jsY_accept(J, '.')) |
323 | | n += lexfraction(J); |
324 | | } |
325 | | |
326 | | e = lexexponent(J); |
327 | | if (e < 0) |
328 | | n /= pow(10, -e); |
329 | | else if (e > 0) |
330 | | n *= pow(10, e); |
331 | | |
332 | | if (jsY_isidentifierstart(J->lexchar)) |
333 | | jsY_error(J, "number with letter suffix"); |
334 | | |
335 | | J->number = n; |
336 | | return TK_NUMBER; |
337 | | } |
338 | | |
339 | | #else |
340 | | |
341 | | static int lexnumber(js_State *J) |
342 | 0 | { |
343 | 0 | const char *s = J->source - 1; |
344 | |
|
345 | 0 | if (jsY_accept(J, '0')) { |
346 | 0 | if (jsY_accept(J, 'x') || jsY_accept(J, 'X')) { |
347 | 0 | J->number = lexhex(J); |
348 | 0 | return TK_NUMBER; |
349 | 0 | } |
350 | 0 | if (jsY_isdec(J->lexchar)) |
351 | 0 | jsY_error(J, "number with leading zero"); |
352 | 0 | if (jsY_accept(J, '.')) { |
353 | 0 | while (jsY_isdec(J->lexchar)) |
354 | 0 | jsY_next(J); |
355 | 0 | } |
356 | 0 | } else if (jsY_accept(J, '.')) { |
357 | 0 | if (!jsY_isdec(J->lexchar)) |
358 | 0 | return '.'; |
359 | 0 | while (jsY_isdec(J->lexchar)) |
360 | 0 | jsY_next(J); |
361 | 0 | } else { |
362 | 0 | while (jsY_isdec(J->lexchar)) |
363 | 0 | jsY_next(J); |
364 | 0 | if (jsY_accept(J, '.')) { |
365 | 0 | while (jsY_isdec(J->lexchar)) |
366 | 0 | jsY_next(J); |
367 | 0 | } |
368 | 0 | } |
369 | | |
370 | 0 | if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) { |
371 | 0 | if (J->lexchar == '-' || J->lexchar == '+') |
372 | 0 | jsY_next(J); |
373 | 0 | if (jsY_isdec(J->lexchar)) |
374 | 0 | while (jsY_isdec(J->lexchar)) |
375 | 0 | jsY_next(J); |
376 | 0 | else |
377 | 0 | jsY_error(J, "missing exponent"); |
378 | 0 | } |
379 | | |
380 | 0 | if (jsY_isidentifierstart(J->lexchar)) |
381 | 0 | jsY_error(J, "number with letter suffix"); |
382 | | |
383 | 0 | J->number = js_strtod(s, NULL); |
384 | 0 | return TK_NUMBER; |
385 | 0 | } |
386 | | |
387 | | #endif |
388 | | |
389 | | static int lexescape(js_State *J) |
390 | 0 | { |
391 | 0 | int x = 0; |
392 | | |
393 | | /* already consumed '\' */ |
394 | |
|
395 | 0 | if (jsY_accept(J, '\n')) |
396 | 0 | return 0; |
397 | | |
398 | 0 | switch (J->lexchar) { |
399 | 0 | case EOF: jsY_error(J, "unterminated escape sequence"); |
400 | 0 | case 'u': |
401 | 0 | jsY_next(J); |
402 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); } |
403 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); } |
404 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); } |
405 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); } |
406 | 0 | textpush(J, x); |
407 | 0 | break; |
408 | 0 | case 'x': |
409 | 0 | jsY_next(J); |
410 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); } |
411 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); } |
412 | 0 | textpush(J, x); |
413 | 0 | break; |
414 | 0 | case '0': textpush(J, 0); jsY_next(J); break; |
415 | 0 | case '\\': textpush(J, '\\'); jsY_next(J); break; |
416 | 0 | case '\'': textpush(J, '\''); jsY_next(J); break; |
417 | 0 | case '"': textpush(J, '"'); jsY_next(J); break; |
418 | 0 | case 'b': textpush(J, '\b'); jsY_next(J); break; |
419 | 0 | case 'f': textpush(J, '\f'); jsY_next(J); break; |
420 | 0 | case 'n': textpush(J, '\n'); jsY_next(J); break; |
421 | 0 | case 'r': textpush(J, '\r'); jsY_next(J); break; |
422 | 0 | case 't': textpush(J, '\t'); jsY_next(J); break; |
423 | 0 | case 'v': textpush(J, '\v'); jsY_next(J); break; |
424 | 0 | default: textpush(J, J->lexchar); jsY_next(J); break; |
425 | 0 | } |
426 | 0 | return 0; |
427 | 0 | } |
428 | | |
429 | | static int lexstring(js_State *J) |
430 | 0 | { |
431 | 0 | const char *s; |
432 | |
|
433 | 0 | int q = J->lexchar; |
434 | 0 | jsY_next(J); |
435 | |
|
436 | 0 | textinit(J); |
437 | |
|
438 | 0 | while (J->lexchar != q) { |
439 | 0 | if (J->lexchar == EOF || J->lexchar == '\n') |
440 | 0 | jsY_error(J, "string not terminated"); |
441 | 0 | if (jsY_accept(J, '\\')) { |
442 | 0 | if (lexescape(J)) |
443 | 0 | jsY_error(J, "malformed escape sequence"); |
444 | 0 | } else { |
445 | 0 | textpush(J, J->lexchar); |
446 | 0 | jsY_next(J); |
447 | 0 | } |
448 | 0 | } |
449 | 0 | jsY_expect(J, q); |
450 | | |
451 | 0 | s = textend(J); |
452 | |
|
453 | 0 | J->text = s; |
454 | 0 | return TK_STRING; |
455 | 0 | } |
456 | | |
457 | | /* the ugliest language wart ever... */ |
458 | | static int isregexpcontext(int last) |
459 | 0 | { |
460 | 0 | switch (last) { |
461 | 0 | case ']': |
462 | 0 | case ')': |
463 | 0 | case '}': |
464 | 0 | case TK_IDENTIFIER: |
465 | 0 | case TK_NUMBER: |
466 | 0 | case TK_STRING: |
467 | 0 | case TK_FALSE: |
468 | 0 | case TK_NULL: |
469 | 0 | case TK_THIS: |
470 | 0 | case TK_TRUE: |
471 | 0 | return 0; |
472 | 0 | default: |
473 | 0 | return 1; |
474 | 0 | } |
475 | 0 | } |
476 | | |
477 | | static int lexregexp(js_State *J) |
478 | 0 | { |
479 | 0 | const char *s; |
480 | 0 | int g, m, i; |
481 | 0 | int inclass = 0; |
482 | | |
483 | | /* already consumed initial '/' */ |
484 | |
|
485 | 0 | textinit(J); |
486 | | |
487 | | /* regexp body */ |
488 | 0 | while (J->lexchar != '/' || inclass) { |
489 | 0 | if (J->lexchar == EOF || J->lexchar == '\n') { |
490 | 0 | jsY_error(J, "regular expression not terminated"); |
491 | 0 | } else if (jsY_accept(J, '\\')) { |
492 | 0 | if (jsY_accept(J, '/')) { |
493 | 0 | textpush(J, '/'); |
494 | 0 | } else { |
495 | 0 | textpush(J, '\\'); |
496 | 0 | if (J->lexchar == EOF || J->lexchar == '\n') |
497 | 0 | jsY_error(J, "regular expression not terminated"); |
498 | 0 | textpush(J, J->lexchar); |
499 | 0 | jsY_next(J); |
500 | 0 | } |
501 | 0 | } else { |
502 | 0 | if (J->lexchar == '[' && !inclass) |
503 | 0 | inclass = 1; |
504 | 0 | if (J->lexchar == ']' && inclass) |
505 | 0 | inclass = 0; |
506 | 0 | textpush(J, J->lexchar); |
507 | 0 | jsY_next(J); |
508 | 0 | } |
509 | 0 | } |
510 | 0 | jsY_expect(J, '/'); |
511 | | |
512 | 0 | s = textend(J); |
513 | | |
514 | | /* regexp flags */ |
515 | 0 | g = i = m = 0; |
516 | |
|
517 | 0 | while (jsY_isidentifierpart(J->lexchar)) { |
518 | 0 | if (jsY_accept(J, 'g')) ++g; |
519 | 0 | else if (jsY_accept(J, 'i')) ++i; |
520 | 0 | else if (jsY_accept(J, 'm')) ++m; |
521 | 0 | else jsY_error(J, "illegal flag in regular expression: %c", J->lexchar); |
522 | 0 | } |
523 | | |
524 | 0 | if (g > 1 || i > 1 || m > 1) |
525 | 0 | jsY_error(J, "duplicated flag in regular expression"); |
526 | | |
527 | 0 | J->text = s; |
528 | 0 | J->number = 0; |
529 | 0 | if (g) J->number += JS_REGEXP_G; |
530 | 0 | if (i) J->number += JS_REGEXP_I; |
531 | 0 | if (m) J->number += JS_REGEXP_M; |
532 | 0 | return TK_REGEXP; |
533 | 0 | } |
534 | | |
535 | | /* simple "return [no Line Terminator here] ..." contexts */ |
536 | | static int isnlthcontext(int last) |
537 | 0 | { |
538 | 0 | switch (last) { |
539 | 0 | case TK_BREAK: |
540 | 0 | case TK_CONTINUE: |
541 | 0 | case TK_RETURN: |
542 | 0 | case TK_THROW: |
543 | 0 | return 1; |
544 | 0 | default: |
545 | 0 | return 0; |
546 | 0 | } |
547 | 0 | } |
548 | | |
549 | | static int jsY_lexx(js_State *J) |
550 | 0 | { |
551 | 0 | J->newline = 0; |
552 | |
|
553 | 0 | while (1) { |
554 | 0 | J->lexline = J->line; /* save location of beginning of token */ |
555 | |
|
556 | 0 | while (jsY_iswhite(J->lexchar)) |
557 | 0 | jsY_next(J); |
558 | |
|
559 | 0 | if (jsY_accept(J, '\n')) { |
560 | 0 | J->newline = 1; |
561 | 0 | if (isnlthcontext(J->lasttoken)) |
562 | 0 | return ';'; |
563 | 0 | continue; |
564 | 0 | } |
565 | | |
566 | 0 | if (jsY_accept(J, '/')) { |
567 | 0 | if (jsY_accept(J, '/')) { |
568 | 0 | lexlinecomment(J); |
569 | 0 | continue; |
570 | 0 | } else if (jsY_accept(J, '*')) { |
571 | 0 | if (lexcomment(J)) |
572 | 0 | jsY_error(J, "multi-line comment not terminated"); |
573 | 0 | continue; |
574 | 0 | } else if (isregexpcontext(J->lasttoken)) { |
575 | 0 | return lexregexp(J); |
576 | 0 | } else if (jsY_accept(J, '=')) { |
577 | 0 | return TK_DIV_ASS; |
578 | 0 | } else { |
579 | 0 | return '/'; |
580 | 0 | } |
581 | 0 | } |
582 | | |
583 | 0 | if (J->lexchar >= '0' && J->lexchar <= '9') { |
584 | 0 | return lexnumber(J); |
585 | 0 | } |
586 | | |
587 | 0 | switch (J->lexchar) { |
588 | 0 | case '(': jsY_next(J); return '('; |
589 | 0 | case ')': jsY_next(J); return ')'; |
590 | 0 | case ',': jsY_next(J); return ','; |
591 | 0 | case ':': jsY_next(J); return ':'; |
592 | 0 | case ';': jsY_next(J); return ';'; |
593 | 0 | case '?': jsY_next(J); return '?'; |
594 | 0 | case '[': jsY_next(J); return '['; |
595 | 0 | case ']': jsY_next(J); return ']'; |
596 | 0 | case '{': jsY_next(J); return '{'; |
597 | 0 | case '}': jsY_next(J); return '}'; |
598 | 0 | case '~': jsY_next(J); return '~'; |
599 | | |
600 | 0 | case '\'': |
601 | 0 | case '"': |
602 | 0 | return lexstring(J); |
603 | | |
604 | 0 | case '.': |
605 | 0 | return lexnumber(J); |
606 | | |
607 | 0 | case '<': |
608 | 0 | jsY_next(J); |
609 | 0 | if (jsY_accept(J, '<')) { |
610 | 0 | if (jsY_accept(J, '=')) |
611 | 0 | return TK_SHL_ASS; |
612 | 0 | return TK_SHL; |
613 | 0 | } |
614 | 0 | if (jsY_accept(J, '=')) |
615 | 0 | return TK_LE; |
616 | 0 | return '<'; |
617 | | |
618 | 0 | case '>': |
619 | 0 | jsY_next(J); |
620 | 0 | if (jsY_accept(J, '>')) { |
621 | 0 | if (jsY_accept(J, '>')) { |
622 | 0 | if (jsY_accept(J, '=')) |
623 | 0 | return TK_USHR_ASS; |
624 | 0 | return TK_USHR; |
625 | 0 | } |
626 | 0 | if (jsY_accept(J, '=')) |
627 | 0 | return TK_SHR_ASS; |
628 | 0 | return TK_SHR; |
629 | 0 | } |
630 | 0 | if (jsY_accept(J, '=')) |
631 | 0 | return TK_GE; |
632 | 0 | return '>'; |
633 | | |
634 | 0 | case '=': |
635 | 0 | jsY_next(J); |
636 | 0 | if (jsY_accept(J, '=')) { |
637 | 0 | if (jsY_accept(J, '=')) |
638 | 0 | return TK_STRICTEQ; |
639 | 0 | return TK_EQ; |
640 | 0 | } |
641 | 0 | return '='; |
642 | | |
643 | 0 | case '!': |
644 | 0 | jsY_next(J); |
645 | 0 | if (jsY_accept(J, '=')) { |
646 | 0 | if (jsY_accept(J, '=')) |
647 | 0 | return TK_STRICTNE; |
648 | 0 | return TK_NE; |
649 | 0 | } |
650 | 0 | return '!'; |
651 | | |
652 | 0 | case '+': |
653 | 0 | jsY_next(J); |
654 | 0 | if (jsY_accept(J, '+')) |
655 | 0 | return TK_INC; |
656 | 0 | if (jsY_accept(J, '=')) |
657 | 0 | return TK_ADD_ASS; |
658 | 0 | return '+'; |
659 | | |
660 | 0 | case '-': |
661 | 0 | jsY_next(J); |
662 | 0 | if (jsY_accept(J, '-')) |
663 | 0 | return TK_DEC; |
664 | 0 | if (jsY_accept(J, '=')) |
665 | 0 | return TK_SUB_ASS; |
666 | 0 | return '-'; |
667 | | |
668 | 0 | case '*': |
669 | 0 | jsY_next(J); |
670 | 0 | if (jsY_accept(J, '=')) |
671 | 0 | return TK_MUL_ASS; |
672 | 0 | return '*'; |
673 | | |
674 | 0 | case '%': |
675 | 0 | jsY_next(J); |
676 | 0 | if (jsY_accept(J, '=')) |
677 | 0 | return TK_MOD_ASS; |
678 | 0 | return '%'; |
679 | | |
680 | 0 | case '&': |
681 | 0 | jsY_next(J); |
682 | 0 | if (jsY_accept(J, '&')) |
683 | 0 | return TK_AND; |
684 | 0 | if (jsY_accept(J, '=')) |
685 | 0 | return TK_AND_ASS; |
686 | 0 | return '&'; |
687 | | |
688 | 0 | case '|': |
689 | 0 | jsY_next(J); |
690 | 0 | if (jsY_accept(J, '|')) |
691 | 0 | return TK_OR; |
692 | 0 | if (jsY_accept(J, '=')) |
693 | 0 | return TK_OR_ASS; |
694 | 0 | return '|'; |
695 | | |
696 | 0 | case '^': |
697 | 0 | jsY_next(J); |
698 | 0 | if (jsY_accept(J, '=')) |
699 | 0 | return TK_XOR_ASS; |
700 | 0 | return '^'; |
701 | | |
702 | 0 | case EOF: |
703 | 0 | return 0; /* EOF */ |
704 | 0 | } |
705 | | |
706 | | /* Handle \uXXXX escapes in identifiers */ |
707 | 0 | jsY_unescape(J); |
708 | 0 | if (jsY_isidentifierstart(J->lexchar)) { |
709 | 0 | textinit(J); |
710 | 0 | textpush(J, J->lexchar); |
711 | |
|
712 | 0 | jsY_next(J); |
713 | 0 | jsY_unescape(J); |
714 | 0 | while (jsY_isidentifierpart(J->lexchar)) { |
715 | 0 | textpush(J, J->lexchar); |
716 | 0 | jsY_next(J); |
717 | 0 | jsY_unescape(J); |
718 | 0 | } |
719 | |
|
720 | 0 | textend(J); |
721 | |
|
722 | 0 | return jsY_findkeyword(J, J->lexbuf.text); |
723 | 0 | } |
724 | | |
725 | 0 | if (J->lexchar >= 0x20 && J->lexchar <= 0x7E) |
726 | 0 | jsY_error(J, "unexpected character: '%c'", J->lexchar); |
727 | 0 | jsY_error(J, "unexpected character: \\u%04X", J->lexchar); |
728 | 0 | } |
729 | 0 | } |
730 | | |
731 | | void jsY_initlex(js_State *J, const char *filename, const char *source) |
732 | 0 | { |
733 | 0 | J->filename = filename; |
734 | 0 | J->source = source; |
735 | 0 | J->line = 1; |
736 | 0 | J->lasttoken = 0; |
737 | 0 | jsY_next(J); /* load first lookahead character */ |
738 | 0 | } |
739 | | |
740 | | int jsY_lex(js_State *J) |
741 | 0 | { |
742 | 0 | return J->lasttoken = jsY_lexx(J); |
743 | 0 | } |
744 | | |
745 | | static int lexjsonnumber(js_State *J) |
746 | 0 | { |
747 | 0 | const char *s = J->source - 1; |
748 | |
|
749 | 0 | if (J->lexchar == '-') |
750 | 0 | jsY_next(J); |
751 | |
|
752 | 0 | if (J->lexchar == '0') |
753 | 0 | jsY_next(J); |
754 | 0 | else if (J->lexchar >= '1' && J->lexchar <= '9') |
755 | 0 | while (isdigit(J->lexchar)) |
756 | 0 | jsY_next(J); |
757 | 0 | else |
758 | 0 | jsY_error(J, "unexpected non-digit"); |
759 | | |
760 | 0 | if (jsY_accept(J, '.')) { |
761 | 0 | if (isdigit(J->lexchar)) |
762 | 0 | while (isdigit(J->lexchar)) |
763 | 0 | jsY_next(J); |
764 | 0 | else |
765 | 0 | jsY_error(J, "missing digits after decimal point"); |
766 | 0 | } |
767 | | |
768 | 0 | if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) { |
769 | 0 | if (J->lexchar == '-' || J->lexchar == '+') |
770 | 0 | jsY_next(J); |
771 | 0 | if (isdigit(J->lexchar)) |
772 | 0 | while (isdigit(J->lexchar)) |
773 | 0 | jsY_next(J); |
774 | 0 | else |
775 | 0 | jsY_error(J, "missing digits after exponent indicator"); |
776 | 0 | } |
777 | | |
778 | 0 | J->number = js_strtod(s, NULL); |
779 | 0 | return TK_NUMBER; |
780 | 0 | } |
781 | | |
782 | | static int lexjsonescape(js_State *J) |
783 | 0 | { |
784 | 0 | int x = 0; |
785 | | |
786 | | /* already consumed '\' */ |
787 | |
|
788 | 0 | switch (J->lexchar) { |
789 | 0 | default: jsY_error(J, "invalid escape sequence"); |
790 | 0 | case 'u': |
791 | 0 | jsY_next(J); |
792 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); } |
793 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); } |
794 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); } |
795 | 0 | if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); } |
796 | 0 | textpush(J, x); |
797 | 0 | break; |
798 | 0 | case '"': textpush(J, '"'); jsY_next(J); break; |
799 | 0 | case '\\': textpush(J, '\\'); jsY_next(J); break; |
800 | 0 | case '/': textpush(J, '/'); jsY_next(J); break; |
801 | 0 | case 'b': textpush(J, '\b'); jsY_next(J); break; |
802 | 0 | case 'f': textpush(J, '\f'); jsY_next(J); break; |
803 | 0 | case 'n': textpush(J, '\n'); jsY_next(J); break; |
804 | 0 | case 'r': textpush(J, '\r'); jsY_next(J); break; |
805 | 0 | case 't': textpush(J, '\t'); jsY_next(J); break; |
806 | 0 | } |
807 | 0 | return 0; |
808 | 0 | } |
809 | | |
810 | | static int lexjsonstring(js_State *J) |
811 | 0 | { |
812 | 0 | const char *s; |
813 | |
|
814 | 0 | textinit(J); |
815 | |
|
816 | 0 | while (J->lexchar != '"') { |
817 | 0 | if (J->lexchar == EOF) |
818 | 0 | jsY_error(J, "unterminated string"); |
819 | 0 | else if (J->lexchar < 32) |
820 | 0 | jsY_error(J, "invalid control character in string"); |
821 | 0 | else if (jsY_accept(J, '\\')) |
822 | 0 | lexjsonescape(J); |
823 | 0 | else { |
824 | 0 | textpush(J, J->lexchar); |
825 | 0 | jsY_next(J); |
826 | 0 | } |
827 | 0 | } |
828 | 0 | jsY_expect(J, '"'); |
829 | | |
830 | 0 | s = textend(J); |
831 | |
|
832 | 0 | J->text = s; |
833 | 0 | return TK_STRING; |
834 | 0 | } |
835 | | |
836 | | int jsY_lexjson(js_State *J) |
837 | 0 | { |
838 | 0 | while (1) { |
839 | 0 | J->lexline = J->line; /* save location of beginning of token */ |
840 | |
|
841 | 0 | while (jsY_iswhite(J->lexchar) || J->lexchar == '\n') |
842 | 0 | jsY_next(J); |
843 | |
|
844 | 0 | if ((J->lexchar >= '0' && J->lexchar <= '9') || J->lexchar == '-') |
845 | 0 | return lexjsonnumber(J); |
846 | | |
847 | 0 | switch (J->lexchar) { |
848 | 0 | case ',': jsY_next(J); return ','; |
849 | 0 | case ':': jsY_next(J); return ':'; |
850 | 0 | case '[': jsY_next(J); return '['; |
851 | 0 | case ']': jsY_next(J); return ']'; |
852 | 0 | case '{': jsY_next(J); return '{'; |
853 | 0 | case '}': jsY_next(J); return '}'; |
854 | | |
855 | 0 | case '"': |
856 | 0 | jsY_next(J); |
857 | 0 | return lexjsonstring(J); |
858 | | |
859 | 0 | case 'f': |
860 | 0 | jsY_next(J); jsY_expect(J, 'a'); jsY_expect(J, 'l'); jsY_expect(J, 's'); jsY_expect(J, 'e'); |
861 | 0 | return TK_FALSE; |
862 | | |
863 | 0 | case 'n': |
864 | 0 | jsY_next(J); jsY_expect(J, 'u'); jsY_expect(J, 'l'); jsY_expect(J, 'l'); |
865 | 0 | return TK_NULL; |
866 | | |
867 | 0 | case 't': |
868 | 0 | jsY_next(J); jsY_expect(J, 'r'); jsY_expect(J, 'u'); jsY_expect(J, 'e'); |
869 | 0 | return TK_TRUE; |
870 | | |
871 | 0 | case EOF: |
872 | 0 | return 0; /* EOF */ |
873 | 0 | } |
874 | | |
875 | 0 | if (J->lexchar >= 0x20 && J->lexchar <= 0x7E) |
876 | 0 | jsY_error(J, "unexpected character: '%c'", J->lexchar); |
877 | 0 | jsY_error(J, "unexpected character: \\u%04X", J->lexchar); |
878 | 0 | } |
879 | 0 | } |