Coverage Report

Created: 2026-03-31 08:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/keystone/llvm/lib/MC/MCParser/AsmLexer.cpp
Line
Count
Source
1
//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This class implements the lexer for assembly files.
11
//
12
//===----------------------------------------------------------------------===//
13
//
14
#include "llvm/MC/MCParser/AsmLexer.h"
15
#include "llvm/MC/MCAsmInfo.h"
16
#include "llvm/Support/MemoryBuffer.h"
17
#include "llvm/Support/SMLoc.h"
18
#include <cctype>
19
#include <cerrno>
20
#include <cstdio>
21
#include <cstdlib>
22
using namespace llvm_ks;
23
24
148k
AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
25
148k
  CurPtr = nullptr;
26
148k
  isAtStartOfLine = true;
27
148k
  AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
28
148k
  defaultRadix = MAI.getRadix();
29
148k
}
30
31
148k
AsmLexer::~AsmLexer() {
32
148k
}
33
34
1.00M
void AsmLexer::setBuffer(StringRef Buf, const char *ptr) {
35
1.00M
  CurBuf = Buf;
36
37
1.00M
  if (ptr)
38
423k
    CurPtr = ptr;
39
582k
  else
40
582k
    CurPtr = CurBuf.begin();
41
42
1.00M
  TokStart = nullptr;
43
1.00M
}
44
45
/// ReturnError - Set the error to the specified string at the specified
46
/// location.  This is defined to always return AsmToken::Error.
47
AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg)
48
15.0M
{
49
  //SetError(SMLoc::getFromPointer(Loc), Msg);
50
51
15.0M
  return AsmToken(AsmToken::Error, StringRef(Loc, 0));
52
15.0M
}
53
54
199M
int AsmLexer::getNextChar() {
55
199M
  char CurChar = *CurPtr++;
56
199M
  switch (CurChar) {
57
198M
  default:
58
198M
    return (unsigned char)CurChar;
59
250k
  case 0:
60
    // A nul character in the stream is either the end of the current buffer or
61
    // a random nul in the file.  Disambiguate that here.
62
250k
    if (CurPtr - 1 != CurBuf.end())
63
0
      return 0;  // Just whitespace.
64
65
    // Otherwise, return end of file.
66
250k
    --CurPtr;  // Another call to lex will return EOF again.
67
250k
    return EOF;
68
199M
  }
69
199M
}
70
71
/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
72
///
73
/// The leading integral digit sequence and dot should have already been
74
/// consumed, some or all of the fractional digit sequence *can* have been
75
/// consumed.
76
552k
AsmToken AsmLexer::LexFloatLiteral() {
77
  // Skip the fractional digit sequence.
78
552k
  while (isdigit(*CurPtr))
79
0
    ++CurPtr;
80
81
  // Check for exponent; we intentionally accept a slighlty wider set of
82
  // literals here and rely on the upstream client to reject invalid ones (e.g.,
83
  // "1e+").
84
552k
  if (*CurPtr == 'e' || *CurPtr == 'E') {
85
117k
    ++CurPtr;
86
117k
    if (*CurPtr == '-' || *CurPtr == '+')
87
42.7k
      ++CurPtr;
88
420k
    while (isdigit(*CurPtr))
89
302k
      ++CurPtr;
90
117k
  }
91
92
552k
  return AsmToken(AsmToken::Real,
93
552k
                  StringRef(TokStart, CurPtr - TokStart));
94
552k
}
95
96
/// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
97
/// while making sure there are enough actual digits around for the constant to
98
/// be valid.
99
///
100
/// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
101
/// before we get here.
102
AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits)
103
213k
{
104
213k
  assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
105
213k
         "unexpected parse state in floating hex");
106
213k
  bool NoFracDigits = true;
107
108
  // Skip the fractional part if there is one
109
213k
  if (*CurPtr == '.') {
110
94.6k
    ++CurPtr;
111
112
94.6k
    const char *FracStart = CurPtr;
113
448k
    while (isxdigit(*CurPtr))
114
353k
      ++CurPtr;
115
116
94.6k
    NoFracDigits = CurPtr == FracStart;
117
94.6k
  }
118
119
213k
  if (NoIntDigits && NoFracDigits)
120
17.2k
    return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
121
17.2k
                                 "expected at least one significand digit");
122
123
  // Make sure we do have some kind of proper exponent part
124
196k
  if (*CurPtr != 'p' && *CurPtr != 'P')
125
17.6k
    return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
126
17.6k
                                 "expected exponent part 'p'");
127
178k
  ++CurPtr;
128
129
178k
  if (*CurPtr == '+' || *CurPtr == '-')
130
59.8k
    ++CurPtr;
131
132
  // N.b. exponent digits are *not* hex
133
178k
  const char *ExpStart = CurPtr;
134
559k
  while (isdigit(*CurPtr))
135
381k
    ++CurPtr;
136
137
178k
  if (CurPtr == ExpStart)
138
27.8k
    return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
139
27.8k
                                 "expected at least one exponent digit");
140
141
150k
  return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
142
178k
}
143
144
/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
145
159M
static bool IsIdentifierChar(char c, bool AllowAt) {
146
159M
  return isalnum(c) || c == '_' || c == '$' || c == '.' ||
147
36.3M
         (c == '@' && AllowAt) || c == '?';
148
159M
}
149
35.7M
AsmToken AsmLexer::LexIdentifier() {
150
  // Check for floating point literals.
151
35.7M
  if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
152
    // Disambiguate a .1243foo identifier from a floating literal.
153
5.55M
    while (isdigit(*CurPtr))
154
4.56M
      ++CurPtr;
155
989k
    if (*CurPtr == 'e' || *CurPtr == 'E' ||
156
871k
        !IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
157
552k
      return LexFloatLiteral();
158
989k
  }
159
160
158M
  while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
161
123M
    ++CurPtr;
162
163
  // Handle . as a special case.
164
35.1M
  if (CurPtr == TokStart+1 && TokStart[0] == '.')
165
2.65M
    return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
166
167
32.5M
  return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
168
35.1M
}
169
170
/// LexSlash: Slash: /
171
///           C-Style Comment: /* ... */
172
AsmToken AsmLexer::LexSlash()
173
266k
{
174
266k
  switch (*CurPtr) {
175
22.1k
  case '*': break; // C style comment.
176
47.6k
  case '/': return ++CurPtr, LexLineComment();
177
196k
  default:  return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1));
178
266k
  }
179
180
  // C Style comment.
181
22.1k
  ++CurPtr;  // skip the star.
182
70.3k
  while (1) {
183
70.3k
    int CurChar = getNextChar();
184
70.3k
    switch (CurChar) {
185
311
    case EOF:
186
311
      return ReturnError(TokStart, "unterminated comment");
187
31.5k
    case '*':
188
      // End of the comment?
189
31.5k
      if (CurPtr[0] != '/') break;
190
191
21.8k
      ++CurPtr;   // End the */.
192
21.8k
      return LexToken();
193
70.3k
    }
194
70.3k
  }
195
22.1k
}
196
197
/// LexLineComment: Comment: #[^\n]*
198
///                        : //[^\n]*
199
451k
AsmToken AsmLexer::LexLineComment() {
200
  // FIXME: This is broken if we happen to a comment at the end of a file, which
201
  // was .included, and which doesn't end with a newline.
202
451k
  int CurChar = getNextChar();
203
9.72M
  while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
204
9.27M
    CurChar = getNextChar();
205
206
451k
  if (CurChar == EOF)
207
3.11k
    return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
208
448k
  return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0));
209
451k
}
210
211
6.49M
static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
212
  // Skip ULL, UL, U, L and LL suffices.
213
6.49M
  if (CurPtr[0] == 'U')
214
9.58k
    ++CurPtr;
215
6.49M
  if (CurPtr[0] == 'L')
216
204k
    ++CurPtr;
217
6.49M
  if (CurPtr[0] == 'L')
218
102k
    ++CurPtr;
219
6.49M
}
220
221
// Look ahead to search for first non-hex digit, if it's [hH], then we treat the
222
// integer as a hexadecimal, possibly with leading zeroes.
223
6.41M
static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
224
6.41M
  const char *FirstHex = nullptr;
225
6.41M
  const char *LookAhead = CurPtr;
226
17.9M
  while (1) {
227
17.9M
    if (isdigit(*LookAhead)) {
228
10.9M
      ++LookAhead;
229
10.9M
    } else if (isxdigit(*LookAhead)) {
230
594k
      if (!FirstHex)
231
288k
        FirstHex = LookAhead;
232
594k
      ++LookAhead;
233
6.41M
    } else {
234
6.41M
      break;
235
6.41M
    }
236
17.9M
  }
237
6.41M
  bool isHex = *LookAhead == 'h' || *LookAhead == 'H';
238
6.41M
  CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
239
6.41M
  if (isHex)
240
26.1k
    return 16;
241
6.38M
  return DefaultRadix;
242
6.41M
}
243
244
static AsmToken intToken(StringRef Ref, APInt &Value)
245
6.49M
{
246
6.49M
  if (Value.isIntN(64))
247
6.39M
    return AsmToken(AsmToken::Integer, Ref, Value);
248
105k
  return AsmToken(AsmToken::BigNum, Ref, Value);
249
6.49M
}
250
251
/// LexDigit: First character is [0-9].
252
///   Local Label: [0-9][:]
253
///   Forward/Backward Label: [0-9][fb]
254
///   Binary integer: 0b[01]+
255
///   Octal integer: 0[0-7]+
256
///   Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
257
///   Decimal integer: [1-9][0-9]*
258
AsmToken AsmLexer::LexDigit()
259
7.02M
{
260
  // Decimal integer: [1-9][0-9]*
261
7.02M
  if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
262
4.82M
    unsigned Radix = doLookAhead(CurPtr, 10);
263
264
4.82M
    if (defaultRadix == 16)
265
4.82M
      Radix = 16;
266
267
4.82M
    bool isHex = Radix == 16;
268
    // Check for floating point literals.
269
4.82M
    if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
270
0
      ++CurPtr;
271
0
      return LexFloatLiteral();
272
0
    }
273
274
4.82M
    StringRef Result(TokStart, CurPtr - TokStart);
275
276
4.82M
    APInt Value(128, 0, true);
277
4.82M
    if (Result.getAsInteger(Radix, Value))
278
0
      return ReturnError(TokStart, !isHex ? "invalid decimal number" :
279
0
                           "invalid hexdecimal number");
280
281
    // Consume the [bB][hH].
282
4.82M
    if (defaultRadix != 16) {
283
0
      if (Radix == 2 || Radix == 16)
284
0
        ++CurPtr;
285
0
    }
286
287
    // The darwin/x86 (and x86-64) assembler accepts and ignores type
288
    // suffices on integer literals.
289
4.82M
    SkipIgnoredIntegerSuffix(CurPtr);
290
291
4.82M
    return intToken(Result, Value);
292
4.82M
  }
293
294
2.20M
  if (*CurPtr == 'b') {
295
251k
    ++CurPtr;
296
    // See if we actually have "0b" as part of something like "jmp 0b\n"
297
251k
    if (!isdigit(CurPtr[0])) {
298
152k
      --CurPtr;
299
152k
      StringRef Result(TokStart, CurPtr - TokStart);
300
152k
      return AsmToken(AsmToken::Integer, Result, 0);
301
152k
    }
302
98.1k
    const char *NumStart = CurPtr;
303
451k
    while (CurPtr[0] == '0' || CurPtr[0] == '1')
304
352k
      ++CurPtr;
305
306
    // Requires at least one binary digit.
307
98.1k
    if (CurPtr == NumStart)
308
5.37k
      return ReturnError(TokStart, "invalid binary number");
309
310
92.8k
    StringRef Result(TokStart, CurPtr - TokStart);
311
312
92.8k
    APInt Value(128, 0, true);
313
92.8k
    if (Result.substr(2).getAsInteger(2, Value))
314
0
      return ReturnError(TokStart, "invalid binary number");
315
316
    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
317
    // suffixes on integer literals.
318
92.8k
    SkipIgnoredIntegerSuffix(CurPtr);
319
320
92.8k
    return intToken(Result, Value);
321
92.8k
  }
322
323
1.94M
  if (*CurPtr == 'x' || *CurPtr == 'X') {
324
361k
    ++CurPtr;
325
361k
    const char *NumStart = CurPtr;
326
2.42M
    while (isxdigit(CurPtr[0]))
327
2.06M
      ++CurPtr;
328
329
    // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
330
    // diagnosed by LexHexFloatLiteral).
331
361k
    if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
332
213k
      return LexHexFloatLiteral(NumStart == CurPtr);
333
334
    // Otherwise requires at least one hex digit.
335
147k
    if (CurPtr == NumStart)
336
17.1k
      return ReturnError(CurPtr-2, "invalid hexadecimal number");
337
338
130k
    APInt Result(128, 0);
339
130k
    if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
340
0
      return ReturnError(TokStart, "invalid hexadecimal number");
341
342
    // Consume the optional [hH].
343
130k
    if (*CurPtr == 'h' || *CurPtr == 'H')
344
7.23k
      ++CurPtr;
345
346
    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
347
    // suffixes on integer literals.
348
130k
    SkipIgnoredIntegerSuffix(CurPtr);
349
350
130k
    return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
351
130k
  }
352
353
  // Either octal or hexadecimal.
354
1.58M
  APInt Value(128, 0, true);
355
1.58M
  unsigned Radix = doLookAhead(CurPtr, 8);
356
1.58M
  bool isHex = Radix == 16;
357
1.58M
  StringRef Result(TokStart, CurPtr - TokStart);
358
1.58M
  if (Result.getAsInteger(Radix, Value))
359
138k
    return ReturnError(TokStart, !isHex ? "invalid octal number" :
360
138k
                       "invalid hexdecimal number");
361
362
  // Consume the [hH].
363
1.44M
  if (Radix == 16)
364
8.85k
    ++CurPtr;
365
366
  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
367
  // suffixes on integer literals.
368
1.44M
  SkipIgnoredIntegerSuffix(CurPtr);
369
370
1.44M
  return intToken(Result, Value);
371
1.58M
}
372
373
/// LexSingleQuote: Integer: 'b'
374
AsmToken AsmLexer::LexSingleQuote()
375
246k
{
376
246k
  int CurChar = getNextChar();
377
378
246k
  if (CurChar == '\\')
379
39.9k
    CurChar = getNextChar();
380
381
246k
  if (CurChar == EOF)
382
298
    return ReturnError(TokStart, "unterminated single quote");
383
384
246k
  CurChar = getNextChar();
385
386
246k
  if (CurChar != '\'')
387
172k
    return ReturnError(TokStart, "single quote way too long");
388
389
  // The idea here being that 'c' is basically just an integral
390
  // constant.
391
73.7k
  StringRef Res = StringRef(TokStart,CurPtr - TokStart);
392
73.7k
  long long Value;
393
394
73.7k
  if (Res.startswith("\'\\")) {
395
24.9k
    char theChar = Res[2];
396
24.9k
    switch (theChar) {
397
8.26k
      default: Value = theChar; break;
398
3.37k
      case '\'': Value = '\''; break;
399
4.64k
      case 't': Value = '\t'; break;
400
3.94k
      case 'n': Value = '\n'; break;
401
4.76k
      case 'b': Value = '\b'; break;
402
24.9k
    }
403
24.9k
  } else
404
48.8k
    Value = TokStart[1];
405
406
73.7k
  return AsmToken(AsmToken::Integer, Res, Value);
407
73.7k
}
408
409
410
/// LexQuote: String: "..."
411
AsmToken AsmLexer::LexQuote()
412
1.06M
{
413
1.06M
  int CurChar = getNextChar();
414
  // TODO: does gas allow multiline string constants?
415
6.94M
  while (CurChar != '"') {
416
5.88M
    if (CurChar == '\\') {
417
      // Allow \", etc.
418
171k
      CurChar = getNextChar();
419
171k
    }
420
421
5.88M
    if (CurChar == EOF)
422
1.70k
      return ReturnError(TokStart, "unterminated string constant");
423
424
5.88M
    CurChar = getNextChar();
425
5.88M
  }
426
427
1.05M
  return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
428
1.06M
}
429
430
24.0k
StringRef AsmLexer::LexUntilEndOfStatement() {
431
24.0k
  TokStart = CurPtr;
432
433
1.36M
  while (!isAtStartOfComment(CurPtr) &&     // Start of line comment.
434
1.36M
         !isAtStatementSeparator(CurPtr) && // End of statement marker.
435
1.36M
         *CurPtr != '\n' && *CurPtr != '\r' &&
436
1.34M
         (*CurPtr != 0 || CurPtr != CurBuf.end())) {
437
1.34M
    ++CurPtr;
438
1.34M
  }
439
24.0k
  return StringRef(TokStart, CurPtr-TokStart);
440
24.0k
}
441
442
1.66M
StringRef AsmLexer::LexUntilEndOfLine() {
443
1.66M
  TokStart = CurPtr;
444
445
9.42M
  while (*CurPtr != '\n' && *CurPtr != '\r' &&
446
7.76M
         (*CurPtr != 0 || CurPtr != CurBuf.end())) {
447
7.75M
    ++CurPtr;
448
7.75M
  }
449
1.66M
  return StringRef(TokStart, CurPtr-TokStart);
450
1.66M
}
451
452
size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
453
                            bool ShouldSkipSpace)
454
211k
{
455
211k
  const char *SavedTokStart = TokStart;
456
211k
  const char *SavedCurPtr = CurPtr;
457
211k
  bool SavedAtStartOfLine = isAtStartOfLine;
458
211k
  bool SavedSkipSpace = SkipSpace;
459
460
211k
  std::string SavedErr = getErr();
461
211k
  SMLoc SavedErrLoc = getErrLoc();
462
463
211k
  SkipSpace = ShouldSkipSpace;
464
465
211k
  size_t ReadCount;
466
426k
  for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
467
214k
    AsmToken Token = LexToken();
468
469
214k
    Buf[ReadCount] = Token;
470
471
214k
    if (Token.is(AsmToken::Eof))
472
157
      break;
473
214k
  }
474
475
211k
  SetError(SavedErrLoc, SavedErr);
476
477
211k
  SkipSpace = SavedSkipSpace;
478
211k
  isAtStartOfLine = SavedAtStartOfLine;
479
211k
  CurPtr = SavedCurPtr;
480
211k
  TokStart = SavedTokStart;
481
482
211k
  return ReadCount;
483
211k
}
484
485
183M
bool AsmLexer::isAtStartOfComment(const char *Ptr) {
486
183M
  const char *CommentString = MAI.getCommentString();
487
488
183M
  if (CommentString[1] == '\0')
489
169M
    return CommentString[0] == Ptr[0];
490
491
  // FIXME: special case for the bogus "##" comment string in X86MCAsmInfoDarwin
492
13.6M
  if (CommentString[1] == '#')
493
0
    return CommentString[0] == Ptr[0];
494
495
13.6M
  return strncmp(Ptr, CommentString, strlen(CommentString)) == 0;
496
13.6M
}
497
498
181M
bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
499
181M
  return strncmp(Ptr, MAI.getSeparatorString(),
500
181M
                 strlen(MAI.getSeparatorString())) == 0;
501
181M
}
502
503
AsmToken AsmLexer::LexToken()
504
181M
{
505
181M
  TokStart = CurPtr;
506
  // This always consumes at least one character.
507
181M
  int CurChar = getNextChar();
508
509
181M
  if (isAtStartOfComment(TokStart)) {
510
    // If this comment starts with a '#', then return the Hash token and let
511
    // the assembler parser see if it can be parsed as a cpp line filename
512
    // comment. We do this only if we are at the start of a line.
513
1.46M
    if (CurChar == '#' && isAtStartOfLine)
514
1.05M
      return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
515
403k
    isAtStartOfLine = true;
516
403k
    return LexLineComment();
517
1.46M
  }
518
180M
  if (isAtStatementSeparator(TokStart)) {
519
59.7M
    CurPtr += strlen(MAI.getSeparatorString()) - 1;
520
59.7M
    return AsmToken(AsmToken::EndOfStatement,
521
59.7M
                    StringRef(TokStart, strlen(MAI.getSeparatorString())));
522
59.7M
  }
523
524
  // If we're missing a newline at EOF, make sure we still get an
525
  // EndOfStatement token before the Eof token.
526
120M
  if (CurChar == EOF && !isAtStartOfLine) {
527
123k
    isAtStartOfLine = true;
528
123k
    return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
529
123k
  }
530
531
120M
  isAtStartOfLine = false;
532
120M
  switch (CurChar) {
533
50.4M
  default:
534
    // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
535
50.4M
    if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
536
35.7M
      return LexIdentifier();
537
538
    // Unknown character, emit an error.
539
14.6M
    return ReturnError(TokStart, "invalid character in input");
540
121k
  case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
541
0
  case 0:
542
6.60M
  case ' ':
543
8.39M
  case '\t':
544
8.39M
    if (SkipSpace) {
545
      // Ignore whitespace.
546
8.39M
      return LexToken();
547
8.39M
    } else {
548
194
      int len = 1;
549
375
      while (*CurPtr==' ' || *CurPtr=='\t') {
550
181
        CurPtr++;
551
181
        len++;
552
181
      }
553
194
      return AsmToken(AsmToken::Space, StringRef(TokStart, len));
554
194
    }
555
7.98M
  case '\n': // FALL THROUGH.
556
9.68M
  case '\r':
557
9.68M
    isAtStartOfLine = true;
558
9.68M
    return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
559
124k
  case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
560
1.92M
  case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
561
4.36M
  case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
562
375k
  case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
563
1.51M
  case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
564
784k
  case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
565
193k
  case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
566
69.6k
  case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
567
220k
  case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
568
74.9k
  case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
569
526k
  case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
570
17.8M
  case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
571
6.54M
  case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
572
399k
  case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
573
444k
  case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
574
2.21M
  case '=':
575
2.21M
    if (*CurPtr == '=')
576
30.0k
      return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
577
2.18M
    return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
578
431k
  case '|':
579
431k
    if (*CurPtr == '|')
580
196k
      return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
581
235k
    return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
582
249k
  case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
583
335k
  case '&':
584
335k
    if (*CurPtr == '&')
585
24.3k
      return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
586
311k
    return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
587
166k
  case '!':
588
166k
    if (*CurPtr == '=')
589
15.4k
      return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
590
150k
    return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
591
197k
  case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
592
266k
  case '/': return LexSlash();
593
1.00M
  case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
594
246k
  case '\'': return LexSingleQuote();
595
1.06M
  case '"': return LexQuote();
596
4.62M
  case '0': case '1': case '2': case '3': case '4':
597
7.02M
  case '5': case '6': case '7': case '8': case '9':
598
7.02M
    return LexDigit();
599
2.96M
  case '<':
600
2.96M
    switch (*CurPtr) {
601
31.8k
    case '<': return ++CurPtr, AsmToken(AsmToken::LessLess,
602
31.8k
                                        StringRef(TokStart, 2));
603
15.4k
    case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual,
604
15.4k
                                        StringRef(TokStart, 2));
605
9.31k
    case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater,
606
9.31k
                                        StringRef(TokStart, 2));
607
2.90M
    default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
608
2.96M
    }
609
244k
  case '>':
610
244k
    switch (*CurPtr) {
611
62.5k
    case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater,
612
62.5k
                                        StringRef(TokStart, 2));
613
15.6k
    case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual,
614
15.6k
                                        StringRef(TokStart, 2));
615
166k
    default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
616
244k
    }
617
618
  // TODO: Quoted identifiers (objc methods etc)
619
  // local labels: [0-9][:]
620
  // Forward/backward labels: [0-9][fb]
621
  // Integers, fp constants, character constants.
622
120M
  }
623
120M
}