/src/keystone/llvm/lib/MC/MCParser/AsmLexer.cpp
Line | Count | Source |
1 | | //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This class implements the lexer for assembly files. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | // |
14 | | #include "llvm/MC/MCParser/AsmLexer.h" |
15 | | #include "llvm/MC/MCAsmInfo.h" |
16 | | #include "llvm/Support/MemoryBuffer.h" |
17 | | #include "llvm/Support/SMLoc.h" |
18 | | #include <cctype> |
19 | | #include <cerrno> |
20 | | #include <cstdio> |
21 | | #include <cstdlib> |
22 | | using namespace llvm_ks; |
23 | | |
24 | 148k | AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) { |
25 | 148k | CurPtr = nullptr; |
26 | 148k | isAtStartOfLine = true; |
27 | 148k | AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); |
28 | 148k | defaultRadix = MAI.getRadix(); |
29 | 148k | } |
30 | | |
31 | 148k | AsmLexer::~AsmLexer() { |
32 | 148k | } |
33 | | |
34 | 1.00M | void AsmLexer::setBuffer(StringRef Buf, const char *ptr) { |
35 | 1.00M | CurBuf = Buf; |
36 | | |
37 | 1.00M | if (ptr) |
38 | 423k | CurPtr = ptr; |
39 | 582k | else |
40 | 582k | CurPtr = CurBuf.begin(); |
41 | | |
42 | 1.00M | TokStart = nullptr; |
43 | 1.00M | } |
44 | | |
45 | | /// ReturnError - Set the error to the specified string at the specified |
46 | | /// location. This is defined to always return AsmToken::Error. |
47 | | AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) |
48 | 15.0M | { |
49 | | //SetError(SMLoc::getFromPointer(Loc), Msg); |
50 | | |
51 | 15.0M | return AsmToken(AsmToken::Error, StringRef(Loc, 0)); |
52 | 15.0M | } |
53 | | |
54 | 199M | int AsmLexer::getNextChar() { |
55 | 199M | char CurChar = *CurPtr++; |
56 | 199M | switch (CurChar) { |
57 | 198M | default: |
58 | 198M | return (unsigned char)CurChar; |
59 | 250k | case 0: |
60 | | // A nul character in the stream is either the end of the current buffer or |
61 | | // a random nul in the file. Disambiguate that here. |
62 | 250k | if (CurPtr - 1 != CurBuf.end()) |
63 | 0 | return 0; // Just whitespace. |
64 | | |
65 | | // Otherwise, return end of file. |
66 | 250k | --CurPtr; // Another call to lex will return EOF again. |
67 | 250k | return EOF; |
68 | 199M | } |
69 | 199M | } |
70 | | |
71 | | /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)? |
72 | | /// |
73 | | /// The leading integral digit sequence and dot should have already been |
74 | | /// consumed, some or all of the fractional digit sequence *can* have been |
75 | | /// consumed. |
76 | 552k | AsmToken AsmLexer::LexFloatLiteral() { |
77 | | // Skip the fractional digit sequence. |
78 | 552k | while (isdigit(*CurPtr)) |
79 | 0 | ++CurPtr; |
80 | | |
81 | | // Check for exponent; we intentionally accept a slighlty wider set of |
82 | | // literals here and rely on the upstream client to reject invalid ones (e.g., |
83 | | // "1e+"). |
84 | 552k | if (*CurPtr == 'e' || *CurPtr == 'E') { |
85 | 117k | ++CurPtr; |
86 | 117k | if (*CurPtr == '-' || *CurPtr == '+') |
87 | 42.7k | ++CurPtr; |
88 | 420k | while (isdigit(*CurPtr)) |
89 | 302k | ++CurPtr; |
90 | 117k | } |
91 | | |
92 | 552k | return AsmToken(AsmToken::Real, |
93 | 552k | StringRef(TokStart, CurPtr - TokStart)); |
94 | 552k | } |
95 | | |
96 | | /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+ |
97 | | /// while making sure there are enough actual digits around for the constant to |
98 | | /// be valid. |
99 | | /// |
100 | | /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed |
101 | | /// before we get here. |
102 | | AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) |
103 | 213k | { |
104 | 213k | assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') && |
105 | 213k | "unexpected parse state in floating hex"); |
106 | 213k | bool NoFracDigits = true; |
107 | | |
108 | | // Skip the fractional part if there is one |
109 | 213k | if (*CurPtr == '.') { |
110 | 94.6k | ++CurPtr; |
111 | | |
112 | 94.6k | const char *FracStart = CurPtr; |
113 | 448k | while (isxdigit(*CurPtr)) |
114 | 353k | ++CurPtr; |
115 | | |
116 | 94.6k | NoFracDigits = CurPtr == FracStart; |
117 | 94.6k | } |
118 | | |
119 | 213k | if (NoIntDigits && NoFracDigits) |
120 | 17.2k | return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " |
121 | 17.2k | "expected at least one significand digit"); |
122 | | |
123 | | // Make sure we do have some kind of proper exponent part |
124 | 196k | if (*CurPtr != 'p' && *CurPtr != 'P') |
125 | 17.6k | return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " |
126 | 17.6k | "expected exponent part 'p'"); |
127 | 178k | ++CurPtr; |
128 | | |
129 | 178k | if (*CurPtr == '+' || *CurPtr == '-') |
130 | 59.8k | ++CurPtr; |
131 | | |
132 | | // N.b. exponent digits are *not* hex |
133 | 178k | const char *ExpStart = CurPtr; |
134 | 559k | while (isdigit(*CurPtr)) |
135 | 381k | ++CurPtr; |
136 | | |
137 | 178k | if (CurPtr == ExpStart) |
138 | 27.8k | return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " |
139 | 27.8k | "expected at least one exponent digit"); |
140 | | |
141 | 150k | return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); |
142 | 178k | } |
143 | | |
144 | | /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]* |
145 | 159M | static bool IsIdentifierChar(char c, bool AllowAt) { |
146 | 159M | return isalnum(c) || c == '_' || c == '$' || c == '.' || |
147 | 36.3M | (c == '@' && AllowAt) || c == '?'; |
148 | 159M | } |
149 | 35.7M | AsmToken AsmLexer::LexIdentifier() { |
150 | | // Check for floating point literals. |
151 | 35.7M | if (CurPtr[-1] == '.' && isdigit(*CurPtr)) { |
152 | | // Disambiguate a .1243foo identifier from a floating literal. |
153 | 5.55M | while (isdigit(*CurPtr)) |
154 | 4.56M | ++CurPtr; |
155 | 989k | if (*CurPtr == 'e' || *CurPtr == 'E' || |
156 | 871k | !IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) |
157 | 552k | return LexFloatLiteral(); |
158 | 989k | } |
159 | | |
160 | 158M | while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) |
161 | 123M | ++CurPtr; |
162 | | |
163 | | // Handle . as a special case. |
164 | 35.1M | if (CurPtr == TokStart+1 && TokStart[0] == '.') |
165 | 2.65M | return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); |
166 | | |
167 | 32.5M | return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); |
168 | 35.1M | } |
169 | | |
170 | | /// LexSlash: Slash: / |
171 | | /// C-Style Comment: /* ... */ |
172 | | AsmToken AsmLexer::LexSlash() |
173 | 266k | { |
174 | 266k | switch (*CurPtr) { |
175 | 22.1k | case '*': break; // C style comment. |
176 | 47.6k | case '/': return ++CurPtr, LexLineComment(); |
177 | 196k | default: return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1)); |
178 | 266k | } |
179 | | |
180 | | // C Style comment. |
181 | 22.1k | ++CurPtr; // skip the star. |
182 | 70.3k | while (1) { |
183 | 70.3k | int CurChar = getNextChar(); |
184 | 70.3k | switch (CurChar) { |
185 | 311 | case EOF: |
186 | 311 | return ReturnError(TokStart, "unterminated comment"); |
187 | 31.5k | case '*': |
188 | | // End of the comment? |
189 | 31.5k | if (CurPtr[0] != '/') break; |
190 | | |
191 | 21.8k | ++CurPtr; // End the */. |
192 | 21.8k | return LexToken(); |
193 | 70.3k | } |
194 | 70.3k | } |
195 | 22.1k | } |
196 | | |
197 | | /// LexLineComment: Comment: #[^\n]* |
198 | | /// : //[^\n]* |
199 | 451k | AsmToken AsmLexer::LexLineComment() { |
200 | | // FIXME: This is broken if we happen to a comment at the end of a file, which |
201 | | // was .included, and which doesn't end with a newline. |
202 | 451k | int CurChar = getNextChar(); |
203 | 9.72M | while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) |
204 | 9.27M | CurChar = getNextChar(); |
205 | | |
206 | 451k | if (CurChar == EOF) |
207 | 3.11k | return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); |
208 | 448k | return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0)); |
209 | 451k | } |
210 | | |
211 | 6.49M | static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { |
212 | | // Skip ULL, UL, U, L and LL suffices. |
213 | 6.49M | if (CurPtr[0] == 'U') |
214 | 9.58k | ++CurPtr; |
215 | 6.49M | if (CurPtr[0] == 'L') |
216 | 204k | ++CurPtr; |
217 | 6.49M | if (CurPtr[0] == 'L') |
218 | 102k | ++CurPtr; |
219 | 6.49M | } |
220 | | |
221 | | // Look ahead to search for first non-hex digit, if it's [hH], then we treat the |
222 | | // integer as a hexadecimal, possibly with leading zeroes. |
223 | 6.41M | static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) { |
224 | 6.41M | const char *FirstHex = nullptr; |
225 | 6.41M | const char *LookAhead = CurPtr; |
226 | 17.9M | while (1) { |
227 | 17.9M | if (isdigit(*LookAhead)) { |
228 | 10.9M | ++LookAhead; |
229 | 10.9M | } else if (isxdigit(*LookAhead)) { |
230 | 594k | if (!FirstHex) |
231 | 288k | FirstHex = LookAhead; |
232 | 594k | ++LookAhead; |
233 | 6.41M | } else { |
234 | 6.41M | break; |
235 | 6.41M | } |
236 | 17.9M | } |
237 | 6.41M | bool isHex = *LookAhead == 'h' || *LookAhead == 'H'; |
238 | 6.41M | CurPtr = isHex || !FirstHex ? LookAhead : FirstHex; |
239 | 6.41M | if (isHex) |
240 | 26.1k | return 16; |
241 | 6.38M | return DefaultRadix; |
242 | 6.41M | } |
243 | | |
244 | | static AsmToken intToken(StringRef Ref, APInt &Value) |
245 | 6.49M | { |
246 | 6.49M | if (Value.isIntN(64)) |
247 | 6.39M | return AsmToken(AsmToken::Integer, Ref, Value); |
248 | 105k | return AsmToken(AsmToken::BigNum, Ref, Value); |
249 | 6.49M | } |
250 | | |
251 | | /// LexDigit: First character is [0-9]. |
252 | | /// Local Label: [0-9][:] |
253 | | /// Forward/Backward Label: [0-9][fb] |
254 | | /// Binary integer: 0b[01]+ |
255 | | /// Octal integer: 0[0-7]+ |
256 | | /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] |
257 | | /// Decimal integer: [1-9][0-9]* |
258 | | AsmToken AsmLexer::LexDigit() |
259 | 7.02M | { |
260 | | // Decimal integer: [1-9][0-9]* |
261 | 7.02M | if (CurPtr[-1] != '0' || CurPtr[0] == '.') { |
262 | 4.82M | unsigned Radix = doLookAhead(CurPtr, 10); |
263 | | |
264 | 4.82M | if (defaultRadix == 16) |
265 | 4.82M | Radix = 16; |
266 | | |
267 | 4.82M | bool isHex = Radix == 16; |
268 | | // Check for floating point literals. |
269 | 4.82M | if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) { |
270 | 0 | ++CurPtr; |
271 | 0 | return LexFloatLiteral(); |
272 | 0 | } |
273 | | |
274 | 4.82M | StringRef Result(TokStart, CurPtr - TokStart); |
275 | | |
276 | 4.82M | APInt Value(128, 0, true); |
277 | 4.82M | if (Result.getAsInteger(Radix, Value)) |
278 | 0 | return ReturnError(TokStart, !isHex ? "invalid decimal number" : |
279 | 0 | "invalid hexdecimal number"); |
280 | | |
281 | | // Consume the [bB][hH]. |
282 | 4.82M | if (defaultRadix != 16) { |
283 | 0 | if (Radix == 2 || Radix == 16) |
284 | 0 | ++CurPtr; |
285 | 0 | } |
286 | | |
287 | | // The darwin/x86 (and x86-64) assembler accepts and ignores type |
288 | | // suffices on integer literals. |
289 | 4.82M | SkipIgnoredIntegerSuffix(CurPtr); |
290 | | |
291 | 4.82M | return intToken(Result, Value); |
292 | 4.82M | } |
293 | | |
294 | 2.20M | if (*CurPtr == 'b') { |
295 | 251k | ++CurPtr; |
296 | | // See if we actually have "0b" as part of something like "jmp 0b\n" |
297 | 251k | if (!isdigit(CurPtr[0])) { |
298 | 152k | --CurPtr; |
299 | 152k | StringRef Result(TokStart, CurPtr - TokStart); |
300 | 152k | return AsmToken(AsmToken::Integer, Result, 0); |
301 | 152k | } |
302 | 98.1k | const char *NumStart = CurPtr; |
303 | 451k | while (CurPtr[0] == '0' || CurPtr[0] == '1') |
304 | 352k | ++CurPtr; |
305 | | |
306 | | // Requires at least one binary digit. |
307 | 98.1k | if (CurPtr == NumStart) |
308 | 5.37k | return ReturnError(TokStart, "invalid binary number"); |
309 | | |
310 | 92.8k | StringRef Result(TokStart, CurPtr - TokStart); |
311 | | |
312 | 92.8k | APInt Value(128, 0, true); |
313 | 92.8k | if (Result.substr(2).getAsInteger(2, Value)) |
314 | 0 | return ReturnError(TokStart, "invalid binary number"); |
315 | | |
316 | | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL |
317 | | // suffixes on integer literals. |
318 | 92.8k | SkipIgnoredIntegerSuffix(CurPtr); |
319 | | |
320 | 92.8k | return intToken(Result, Value); |
321 | 92.8k | } |
322 | | |
323 | 1.94M | if (*CurPtr == 'x' || *CurPtr == 'X') { |
324 | 361k | ++CurPtr; |
325 | 361k | const char *NumStart = CurPtr; |
326 | 2.42M | while (isxdigit(CurPtr[0])) |
327 | 2.06M | ++CurPtr; |
328 | | |
329 | | // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be |
330 | | // diagnosed by LexHexFloatLiteral). |
331 | 361k | if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P') |
332 | 213k | return LexHexFloatLiteral(NumStart == CurPtr); |
333 | | |
334 | | // Otherwise requires at least one hex digit. |
335 | 147k | if (CurPtr == NumStart) |
336 | 17.1k | return ReturnError(CurPtr-2, "invalid hexadecimal number"); |
337 | | |
338 | 130k | APInt Result(128, 0); |
339 | 130k | if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) |
340 | 0 | return ReturnError(TokStart, "invalid hexadecimal number"); |
341 | | |
342 | | // Consume the optional [hH]. |
343 | 130k | if (*CurPtr == 'h' || *CurPtr == 'H') |
344 | 7.23k | ++CurPtr; |
345 | | |
346 | | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL |
347 | | // suffixes on integer literals. |
348 | 130k | SkipIgnoredIntegerSuffix(CurPtr); |
349 | | |
350 | 130k | return intToken(StringRef(TokStart, CurPtr - TokStart), Result); |
351 | 130k | } |
352 | | |
353 | | // Either octal or hexadecimal. |
354 | 1.58M | APInt Value(128, 0, true); |
355 | 1.58M | unsigned Radix = doLookAhead(CurPtr, 8); |
356 | 1.58M | bool isHex = Radix == 16; |
357 | 1.58M | StringRef Result(TokStart, CurPtr - TokStart); |
358 | 1.58M | if (Result.getAsInteger(Radix, Value)) |
359 | 138k | return ReturnError(TokStart, !isHex ? "invalid octal number" : |
360 | 138k | "invalid hexdecimal number"); |
361 | | |
362 | | // Consume the [hH]. |
363 | 1.44M | if (Radix == 16) |
364 | 8.85k | ++CurPtr; |
365 | | |
366 | | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL |
367 | | // suffixes on integer literals. |
368 | 1.44M | SkipIgnoredIntegerSuffix(CurPtr); |
369 | | |
370 | 1.44M | return intToken(Result, Value); |
371 | 1.58M | } |
372 | | |
373 | | /// LexSingleQuote: Integer: 'b' |
374 | | AsmToken AsmLexer::LexSingleQuote() |
375 | 246k | { |
376 | 246k | int CurChar = getNextChar(); |
377 | | |
378 | 246k | if (CurChar == '\\') |
379 | 39.9k | CurChar = getNextChar(); |
380 | | |
381 | 246k | if (CurChar == EOF) |
382 | 298 | return ReturnError(TokStart, "unterminated single quote"); |
383 | | |
384 | 246k | CurChar = getNextChar(); |
385 | | |
386 | 246k | if (CurChar != '\'') |
387 | 172k | return ReturnError(TokStart, "single quote way too long"); |
388 | | |
389 | | // The idea here being that 'c' is basically just an integral |
390 | | // constant. |
391 | 73.7k | StringRef Res = StringRef(TokStart,CurPtr - TokStart); |
392 | 73.7k | long long Value; |
393 | | |
394 | 73.7k | if (Res.startswith("\'\\")) { |
395 | 24.9k | char theChar = Res[2]; |
396 | 24.9k | switch (theChar) { |
397 | 8.26k | default: Value = theChar; break; |
398 | 3.37k | case '\'': Value = '\''; break; |
399 | 4.64k | case 't': Value = '\t'; break; |
400 | 3.94k | case 'n': Value = '\n'; break; |
401 | 4.76k | case 'b': Value = '\b'; break; |
402 | 24.9k | } |
403 | 24.9k | } else |
404 | 48.8k | Value = TokStart[1]; |
405 | | |
406 | 73.7k | return AsmToken(AsmToken::Integer, Res, Value); |
407 | 73.7k | } |
408 | | |
409 | | |
410 | | /// LexQuote: String: "..." |
411 | | AsmToken AsmLexer::LexQuote() |
412 | 1.06M | { |
413 | 1.06M | int CurChar = getNextChar(); |
414 | | // TODO: does gas allow multiline string constants? |
415 | 6.94M | while (CurChar != '"') { |
416 | 5.88M | if (CurChar == '\\') { |
417 | | // Allow \", etc. |
418 | 171k | CurChar = getNextChar(); |
419 | 171k | } |
420 | | |
421 | 5.88M | if (CurChar == EOF) |
422 | 1.70k | return ReturnError(TokStart, "unterminated string constant"); |
423 | | |
424 | 5.88M | CurChar = getNextChar(); |
425 | 5.88M | } |
426 | | |
427 | 1.05M | return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); |
428 | 1.06M | } |
429 | | |
430 | 24.0k | StringRef AsmLexer::LexUntilEndOfStatement() { |
431 | 24.0k | TokStart = CurPtr; |
432 | | |
433 | 1.36M | while (!isAtStartOfComment(CurPtr) && // Start of line comment. |
434 | 1.36M | !isAtStatementSeparator(CurPtr) && // End of statement marker. |
435 | 1.36M | *CurPtr != '\n' && *CurPtr != '\r' && |
436 | 1.34M | (*CurPtr != 0 || CurPtr != CurBuf.end())) { |
437 | 1.34M | ++CurPtr; |
438 | 1.34M | } |
439 | 24.0k | return StringRef(TokStart, CurPtr-TokStart); |
440 | 24.0k | } |
441 | | |
442 | 1.66M | StringRef AsmLexer::LexUntilEndOfLine() { |
443 | 1.66M | TokStart = CurPtr; |
444 | | |
445 | 9.42M | while (*CurPtr != '\n' && *CurPtr != '\r' && |
446 | 7.76M | (*CurPtr != 0 || CurPtr != CurBuf.end())) { |
447 | 7.75M | ++CurPtr; |
448 | 7.75M | } |
449 | 1.66M | return StringRef(TokStart, CurPtr-TokStart); |
450 | 1.66M | } |
451 | | |
452 | | size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf, |
453 | | bool ShouldSkipSpace) |
454 | 211k | { |
455 | 211k | const char *SavedTokStart = TokStart; |
456 | 211k | const char *SavedCurPtr = CurPtr; |
457 | 211k | bool SavedAtStartOfLine = isAtStartOfLine; |
458 | 211k | bool SavedSkipSpace = SkipSpace; |
459 | | |
460 | 211k | std::string SavedErr = getErr(); |
461 | 211k | SMLoc SavedErrLoc = getErrLoc(); |
462 | | |
463 | 211k | SkipSpace = ShouldSkipSpace; |
464 | | |
465 | 211k | size_t ReadCount; |
466 | 426k | for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) { |
467 | 214k | AsmToken Token = LexToken(); |
468 | | |
469 | 214k | Buf[ReadCount] = Token; |
470 | | |
471 | 214k | if (Token.is(AsmToken::Eof)) |
472 | 157 | break; |
473 | 214k | } |
474 | | |
475 | 211k | SetError(SavedErrLoc, SavedErr); |
476 | | |
477 | 211k | SkipSpace = SavedSkipSpace; |
478 | 211k | isAtStartOfLine = SavedAtStartOfLine; |
479 | 211k | CurPtr = SavedCurPtr; |
480 | 211k | TokStart = SavedTokStart; |
481 | | |
482 | 211k | return ReadCount; |
483 | 211k | } |
484 | | |
485 | 183M | bool AsmLexer::isAtStartOfComment(const char *Ptr) { |
486 | 183M | const char *CommentString = MAI.getCommentString(); |
487 | | |
488 | 183M | if (CommentString[1] == '\0') |
489 | 169M | return CommentString[0] == Ptr[0]; |
490 | | |
491 | | // FIXME: special case for the bogus "##" comment string in X86MCAsmInfoDarwin |
492 | 13.6M | if (CommentString[1] == '#') |
493 | 0 | return CommentString[0] == Ptr[0]; |
494 | | |
495 | 13.6M | return strncmp(Ptr, CommentString, strlen(CommentString)) == 0; |
496 | 13.6M | } |
497 | | |
498 | 181M | bool AsmLexer::isAtStatementSeparator(const char *Ptr) { |
499 | 181M | return strncmp(Ptr, MAI.getSeparatorString(), |
500 | 181M | strlen(MAI.getSeparatorString())) == 0; |
501 | 181M | } |
502 | | |
503 | | AsmToken AsmLexer::LexToken() |
504 | 181M | { |
505 | 181M | TokStart = CurPtr; |
506 | | // This always consumes at least one character. |
507 | 181M | int CurChar = getNextChar(); |
508 | | |
509 | 181M | if (isAtStartOfComment(TokStart)) { |
510 | | // If this comment starts with a '#', then return the Hash token and let |
511 | | // the assembler parser see if it can be parsed as a cpp line filename |
512 | | // comment. We do this only if we are at the start of a line. |
513 | 1.46M | if (CurChar == '#' && isAtStartOfLine) |
514 | 1.05M | return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); |
515 | 403k | isAtStartOfLine = true; |
516 | 403k | return LexLineComment(); |
517 | 1.46M | } |
518 | 180M | if (isAtStatementSeparator(TokStart)) { |
519 | 59.7M | CurPtr += strlen(MAI.getSeparatorString()) - 1; |
520 | 59.7M | return AsmToken(AsmToken::EndOfStatement, |
521 | 59.7M | StringRef(TokStart, strlen(MAI.getSeparatorString()))); |
522 | 59.7M | } |
523 | | |
524 | | // If we're missing a newline at EOF, make sure we still get an |
525 | | // EndOfStatement token before the Eof token. |
526 | 120M | if (CurChar == EOF && !isAtStartOfLine) { |
527 | 123k | isAtStartOfLine = true; |
528 | 123k | return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); |
529 | 123k | } |
530 | | |
531 | 120M | isAtStartOfLine = false; |
532 | 120M | switch (CurChar) { |
533 | 50.4M | default: |
534 | | // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* |
535 | 50.4M | if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') |
536 | 35.7M | return LexIdentifier(); |
537 | | |
538 | | // Unknown character, emit an error. |
539 | 14.6M | return ReturnError(TokStart, "invalid character in input"); |
540 | 121k | case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); |
541 | 0 | case 0: |
542 | 6.60M | case ' ': |
543 | 8.39M | case '\t': |
544 | 8.39M | if (SkipSpace) { |
545 | | // Ignore whitespace. |
546 | 8.39M | return LexToken(); |
547 | 8.39M | } else { |
548 | 194 | int len = 1; |
549 | 375 | while (*CurPtr==' ' || *CurPtr=='\t') { |
550 | 181 | CurPtr++; |
551 | 181 | len++; |
552 | 181 | } |
553 | 194 | return AsmToken(AsmToken::Space, StringRef(TokStart, len)); |
554 | 194 | } |
555 | 7.98M | case '\n': // FALL THROUGH. |
556 | 9.68M | case '\r': |
557 | 9.68M | isAtStartOfLine = true; |
558 | 9.68M | return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); |
559 | 124k | case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); |
560 | 1.92M | case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); |
561 | 4.36M | case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); |
562 | 375k | case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); |
563 | 1.51M | case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); |
564 | 784k | case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); |
565 | 193k | case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); |
566 | 69.6k | case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); |
567 | 220k | case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); |
568 | 74.9k | case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); |
569 | 526k | case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); |
570 | 17.8M | case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); |
571 | 6.54M | case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); |
572 | 399k | case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); |
573 | 444k | case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1)); |
574 | 2.21M | case '=': |
575 | 2.21M | if (*CurPtr == '=') |
576 | 30.0k | return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); |
577 | 2.18M | return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); |
578 | 431k | case '|': |
579 | 431k | if (*CurPtr == '|') |
580 | 196k | return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); |
581 | 235k | return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); |
582 | 249k | case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); |
583 | 335k | case '&': |
584 | 335k | if (*CurPtr == '&') |
585 | 24.3k | return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); |
586 | 311k | return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); |
587 | 166k | case '!': |
588 | 166k | if (*CurPtr == '=') |
589 | 15.4k | return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); |
590 | 150k | return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); |
591 | 197k | case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); |
592 | 266k | case '/': return LexSlash(); |
593 | 1.00M | case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); |
594 | 246k | case '\'': return LexSingleQuote(); |
595 | 1.06M | case '"': return LexQuote(); |
596 | 4.62M | case '0': case '1': case '2': case '3': case '4': |
597 | 7.02M | case '5': case '6': case '7': case '8': case '9': |
598 | 7.02M | return LexDigit(); |
599 | 2.96M | case '<': |
600 | 2.96M | switch (*CurPtr) { |
601 | 31.8k | case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, |
602 | 31.8k | StringRef(TokStart, 2)); |
603 | 15.4k | case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, |
604 | 15.4k | StringRef(TokStart, 2)); |
605 | 9.31k | case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, |
606 | 9.31k | StringRef(TokStart, 2)); |
607 | 2.90M | default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); |
608 | 2.96M | } |
609 | 244k | case '>': |
610 | 244k | switch (*CurPtr) { |
611 | 62.5k | case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, |
612 | 62.5k | StringRef(TokStart, 2)); |
613 | 15.6k | case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, |
614 | 15.6k | StringRef(TokStart, 2)); |
615 | 166k | default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); |
616 | 244k | } |
617 | | |
618 | | // TODO: Quoted identifiers (objc methods etc) |
619 | | // local labels: [0-9][:] |
620 | | // Forward/backward labels: [0-9][fb] |
621 | | // Integers, fp constants, character constants. |
622 | 120M | } |
623 | 120M | } |