Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/llvm/lib/AsmParser/LLLexer.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Implement the Lexer for .ll files.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "llvm/AsmParser/LLLexer.h"
14
#include "llvm/ADT/APInt.h"
15
#include "llvm/ADT/STLExtras.h"
16
#include "llvm/ADT/StringExtras.h"
17
#include "llvm/ADT/Twine.h"
18
#include "llvm/IR/DerivedTypes.h"
19
#include "llvm/IR/Instruction.h"
20
#include "llvm/Support/ErrorHandling.h"
21
#include "llvm/Support/SourceMgr.h"
22
#include <cassert>
23
#include <cctype>
24
#include <cstdio>
25
26
using namespace llvm;
27
28
29.4k
bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
29
29.4k
  ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
30
29.4k
  return true;
31
29.4k
}
32
33
0
void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const {
34
0
  SM.PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg);
35
0
}
36
37
//===----------------------------------------------------------------------===//
38
// Helper functions.
39
//===----------------------------------------------------------------------===//
40
41
// atoull - Convert an ascii string of decimal digits into the unsigned long
42
// long representation... this does not have to do input error checking,
43
// because we know that the input will be matched by a suitable regex...
44
//
45
147k
uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
46
147k
  uint64_t Result = 0;
47
573k
  for (; Buffer != End; Buffer++) {
48
425k
    uint64_t OldRes = Result;
49
425k
    Result *= 10;
50
425k
    Result += *Buffer-'0';
51
425k
    if (Result < OldRes) {  // Uh, oh, overflow detected!!!
52
209
      Error("constant bigger than 64 bits detected!");
53
209
      return 0;
54
209
    }
55
425k
  }
56
147k
  return Result;
57
147k
}
58
59
226
uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
60
226
  uint64_t Result = 0;
61
3.02k
  for (; Buffer != End; ++Buffer) {
62
2.81k
    uint64_t OldRes = Result;
63
2.81k
    Result *= 16;
64
2.81k
    Result += hexDigitValue(*Buffer);
65
66
2.81k
    if (Result < OldRes) {   // Uh, oh, overflow detected!!!
67
18
      Error("constant bigger than 64 bits detected!");
68
18
      return 0;
69
18
    }
70
2.81k
  }
71
208
  return Result;
72
226
}
73
74
void LLLexer::HexToIntPair(const char *Buffer, const char *End,
75
156
                           uint64_t Pair[2]) {
76
156
  Pair[0] = 0;
77
156
  if (End - Buffer >= 16) {
78
1.36k
    for (int i = 0; i < 16; i++, Buffer++) {
79
1.28k
      assert(Buffer != End);
80
0
      Pair[0] *= 16;
81
1.28k
      Pair[0] += hexDigitValue(*Buffer);
82
1.28k
    }
83
80
  }
84
156
  Pair[1] = 0;
85
1.07k
  for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
86
923
    Pair[1] *= 16;
87
923
    Pair[1] += hexDigitValue(*Buffer);
88
923
  }
89
156
  if (Buffer != End)
90
8
    Error("constant bigger than 128 bits detected!");
91
156
}
92
93
/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
94
/// { low64, high16 } as usual for an APInt.
95
void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
96
326
                           uint64_t Pair[2]) {
97
326
  Pair[1] = 0;
98
1.57k
  for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
99
1.25k
    assert(Buffer != End);
100
0
    Pair[1] *= 16;
101
1.25k
    Pair[1] += hexDigitValue(*Buffer);
102
1.25k
  }
103
326
  Pair[0] = 0;
104
3.97k
  for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
105
3.64k
    Pair[0] *= 16;
106
3.64k
    Pair[0] += hexDigitValue(*Buffer);
107
3.64k
  }
108
326
  if (Buffer != End)
109
69
    Error("constant bigger than 128 bits detected!");
110
326
}
111
112
// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
113
// appropriate character.
114
499k
static void UnEscapeLexed(std::string &Str) {
115
499k
  if (Str.empty()) return;
116
117
313k
  char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
118
313k
  char *BOut = Buffer;
119
25.3M
  for (char *BIn = Buffer; BIn != EndBuffer; ) {
120
25.0M
    if (BIn[0] == '\\') {
121
124k
      if (BIn < EndBuffer-1 && BIn[1] == '\\') {
122
52.6k
        *BOut++ = '\\'; // Two \ becomes one
123
52.6k
        BIn += 2;
124
71.8k
      } else if (BIn < EndBuffer-2 &&
125
71.8k
                 isxdigit(static_cast<unsigned char>(BIn[1])) &&
126
71.8k
                 isxdigit(static_cast<unsigned char>(BIn[2]))) {
127
1.32k
        *BOut = hexDigitValue(BIn[1]) * 16 + hexDigitValue(BIn[2]);
128
1.32k
        BIn += 3;                           // Skip over handled chars
129
1.32k
        ++BOut;
130
70.5k
      } else {
131
70.5k
        *BOut++ = *BIn++;
132
70.5k
      }
133
24.9M
    } else {
134
24.9M
      *BOut++ = *BIn++;
135
24.9M
    }
136
25.0M
  }
137
313k
  Str.resize(BOut-Buffer);
138
313k
}
139
140
/// isLabelChar - Return true for [-a-zA-Z$._0-9].
141
11.1M
static bool isLabelChar(char C) {
142
11.1M
  return isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' ||
143
11.1M
         C == '.' || C == '_';
144
11.1M
}
145
146
/// isLabelTail - Return true if this pointer points to a valid end of a label.
147
24.8k
static const char *isLabelTail(const char *CurPtr) {
148
1.37M
  while (true) {
149
1.37M
    if (CurPtr[0] == ':') return CurPtr+1;
150
1.37M
    if (!isLabelChar(CurPtr[0])) return nullptr;
151
1.34M
    ++CurPtr;
152
1.34M
  }
153
24.8k
}
154
155
//===----------------------------------------------------------------------===//
156
// Lexer definition.
157
//===----------------------------------------------------------------------===//
158
159
LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err,
160
                 LLVMContext &C)
161
30.5k
    : CurBuf(StartBuf), ErrorInfo(Err), SM(SM), Context(C) {
162
30.5k
  CurPtr = CurBuf.begin();
163
30.5k
}
164
165
29.6M
int LLLexer::getNextChar() {
166
29.6M
  char CurChar = *CurPtr++;
167
29.6M
  switch (CurChar) {
168
29.6M
  default: return (unsigned char)CurChar;
169
5.01k
  case 0:
170
    // A nul character in the stream is either the end of the current buffer or
171
    // a random nul in the file.  Disambiguate that here.
172
5.01k
    if (CurPtr-1 != CurBuf.end())
173
0
      return 0;  // Just whitespace.
174
175
    // Otherwise, return end of file.
176
5.01k
    --CurPtr;  // Another call to lex will return EOF again.
177
5.01k
    return EOF;
178
29.6M
  }
179
29.6M
}
180
181
4.26M
lltok::Kind LLLexer::LexToken() {
182
4.41M
  while (true) {
183
4.41M
    TokStart = CurPtr;
184
185
4.41M
    int CurChar = getNextChar();
186
4.41M
    switch (CurChar) {
187
222k
    default:
188
      // Handle letters: [a-zA-Z_]
189
222k
      if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_')
190
221k
        return LexIdentifier();
191
192
1.43k
      return lltok::Error;
193
4.22k
    case EOF: return lltok::Eof;
194
0
    case 0:
195
7.75k
    case ' ':
196
9.16k
    case '\t':
197
15.6k
    case '\n':
198
156k
    case '\r':
199
      // Ignore whitespace.
200
156k
      continue;
201
249
    case '+': return LexPositive();
202
36.6k
    case '@': return LexAt();
203
173
    case '$': return LexDollar();
204
139k
    case '%': return LexPercent();
205
374k
    case '"': return LexQuote();
206
154
    case '.':
207
154
      if (const char *Ptr = isLabelTail(CurPtr)) {
208
7
        CurPtr = Ptr;
209
7
        StrVal.assign(TokStart, CurPtr-1);
210
7
        return lltok::LabelStr;
211
7
      }
212
147
      if (CurPtr[0] == '.' && CurPtr[1] == '.') {
213
16
        CurPtr += 2;
214
16
        return lltok::dotdotdot;
215
16
      }
216
131
      return lltok::Error;
217
1.04k
    case ';':
218
1.04k
      SkipLineComment();
219
1.04k
      continue;
220
1.03M
    case '!': return LexExclaim();
221
38
    case '^':
222
38
      return LexCaret();
223
15
    case ':':
224
15
      return lltok::colon;
225
45
    case '#': return LexHash();
226
884k
    case '0': case '1': case '2': case '3': case '4':
227
924k
    case '5': case '6': case '7': case '8': case '9':
228
969k
    case '-':
229
969k
      return LexDigitOrNegative();
230
43.2k
    case '=': return lltok::equal;
231
978
    case '[': return lltok::lsquare;
232
154
    case ']': return lltok::rsquare;
233
193k
    case '{': return lltok::lbrace;
234
93.5k
    case '}': return lltok::rbrace;
235
6.29k
    case '<': return lltok::less;
236
14
    case '>': return lltok::greater;
237
20.3k
    case '(': return lltok::lparen;
238
11.8k
    case ')': return lltok::rparen;
239
1.07M
    case ',': return lltok::comma;
240
35.5k
    case '*': return lltok::star;
241
12
    case '|': return lltok::bar;
242
4.41M
    }
243
4.41M
  }
244
4.26M
}
245
246
1.04k
void LLLexer::SkipLineComment() {
247
10.3k
  while (true) {
248
10.3k
    if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
249
1.04k
      return;
250
10.3k
  }
251
1.04k
}
252
253
/// Lex all tokens that start with an @ character.
254
///   GlobalVar   @\"[^\"]*\"
255
///   GlobalVar   @[-a-zA-Z$._][-a-zA-Z$._0-9]*
256
///   GlobalVarID @[0-9]+
257
36.6k
lltok::Kind LLLexer::LexAt() {
258
36.6k
  return LexVar(lltok::GlobalVar, lltok::GlobalID);
259
36.6k
}
260
261
173
lltok::Kind LLLexer::LexDollar() {
262
173
  if (const char *Ptr = isLabelTail(TokStart)) {
263
3
    CurPtr = Ptr;
264
3
    StrVal.assign(TokStart, CurPtr - 1);
265
3
    return lltok::LabelStr;
266
3
  }
267
268
  // Handle DollarStringConstant: $\"[^\"]*\"
269
170
  if (CurPtr[0] == '"') {
270
37
    ++CurPtr;
271
272
5.93k
    while (true) {
273
5.93k
      int CurChar = getNextChar();
274
275
5.93k
      if (CurChar == EOF) {
276
15
        Error("end of file in COMDAT variable name");
277
15
        return lltok::Error;
278
15
      }
279
5.91k
      if (CurChar == '"') {
280
22
        StrVal.assign(TokStart + 2, CurPtr - 1);
281
22
        UnEscapeLexed(StrVal);
282
22
        if (StringRef(StrVal).contains(0)) {
283
11
          Error("Null bytes are not allowed in names");
284
11
          return lltok::Error;
285
11
        }
286
11
        return lltok::ComdatVar;
287
22
      }
288
5.91k
    }
289
37
  }
290
291
  // Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]*
292
133
  if (ReadVarName())
293
99
    return lltok::ComdatVar;
294
295
34
  return lltok::Error;
296
133
}
297
298
/// ReadString - Read a string until the closing quote.
299
374k
lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
300
374k
  const char *Start = CurPtr;
301
19.2M
  while (true) {
302
19.2M
    int CurChar = getNextChar();
303
304
19.2M
    if (CurChar == EOF) {
305
576
      Error("end of file in string constant");
306
576
      return lltok::Error;
307
576
    }
308
19.2M
    if (CurChar == '"') {
309
373k
      StrVal.assign(Start, CurPtr-1);
310
373k
      UnEscapeLexed(StrVal);
311
373k
      return kind;
312
373k
    }
313
19.2M
  }
314
374k
}
315
316
/// ReadVarName - Read the rest of a token containing a variable name.
317
88.7k
bool LLLexer::ReadVarName() {
318
88.7k
  const char *NameStart = CurPtr;
319
88.7k
  if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
320
88.7k
      CurPtr[0] == '-' || CurPtr[0] == '$' ||
321
88.7k
      CurPtr[0] == '.' || CurPtr[0] == '_') {
322
65.9k
    ++CurPtr;
323
3.31M
    while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
324
3.31M
           CurPtr[0] == '-' || CurPtr[0] == '$' ||
325
3.31M
           CurPtr[0] == '.' || CurPtr[0] == '_')
326
3.24M
      ++CurPtr;
327
328
65.9k
    StrVal.assign(NameStart, CurPtr);
329
65.9k
    return true;
330
65.9k
  }
331
22.7k
  return false;
332
88.7k
}
333
334
// Lex an ID: [0-9]+. On success, the ID is stored in UIntVal and Token is
335
// returned, otherwise the Error token is returned.
336
22.8k
lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
337
22.8k
  if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
338
173
    return lltok::Error;
339
340
96.7k
  for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
341
74.0k
    /*empty*/;
342
343
22.6k
  uint64_t Val = atoull(TokStart + 1, CurPtr);
344
22.6k
  if ((unsigned)Val != Val)
345
2.27k
    Error("invalid value number (too large)!");
346
22.6k
  UIntVal = unsigned(Val);
347
22.6k
  return Token;
348
22.8k
}
349
350
175k
lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
351
  // Handle StringConstant: \"[^\"]*\"
352
175k
  if (CurPtr[0] == '"') {
353
87.1k
    ++CurPtr;
354
355
6.02M
    while (true) {
356
6.02M
      int CurChar = getNextChar();
357
358
6.02M
      if (CurChar == EOF) {
359
163
        Error("end of file in global variable name");
360
163
        return lltok::Error;
361
163
      }
362
6.02M
      if (CurChar == '"') {
363
86.9k
        StrVal.assign(TokStart+2, CurPtr-1);
364
86.9k
        UnEscapeLexed(StrVal);
365
86.9k
        if (StringRef(StrVal).contains(0)) {
366
2
          Error("Null bytes are not allowed in names");
367
2
          return lltok::Error;
368
2
        }
369
86.9k
        return Var;
370
86.9k
      }
371
6.02M
    }
372
87.1k
  }
373
374
  // Handle VarName: [-a-zA-Z$._][-a-zA-Z$._0-9]*
375
88.6k
  if (ReadVarName())
376
65.8k
    return Var;
377
378
  // Handle VarID: [0-9]+
379
22.7k
  return LexUIntID(VarID);
380
88.6k
}
381
382
/// Lex all tokens that start with a % character.
383
///   LocalVar   ::= %\"[^\"]*\"
384
///   LocalVar   ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
385
///   LocalVarID ::= %[0-9]+
386
139k
lltok::Kind LLLexer::LexPercent() {
387
139k
  return LexVar(lltok::LocalVar, lltok::LocalVarID);
388
139k
}
389
390
/// Lex all tokens that start with a " character.
391
///   QuoteLabel        "[^"]+":
392
///   StringConstant    "[^"]*"
393
374k
lltok::Kind LLLexer::LexQuote() {
394
374k
  lltok::Kind kind = ReadString(lltok::StringConstant);
395
374k
  if (kind == lltok::Error || kind == lltok::Eof)
396
576
    return kind;
397
398
373k
  if (CurPtr[0] == ':') {
399
13
    ++CurPtr;
400
13
    if (StringRef(StrVal).contains(0)) {
401
9
      Error("Null bytes are not allowed in names");
402
9
      kind = lltok::Error;
403
9
    } else {
404
4
      kind = lltok::LabelStr;
405
4
    }
406
13
  }
407
408
373k
  return kind;
409
374k
}
410
411
/// Lex all tokens that start with a ! character.
412
///    !foo
413
///    !
414
1.03M
lltok::Kind LLLexer::LexExclaim() {
415
  // Lex a metadata name as a MetadataVar.
416
1.03M
  if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
417
1.03M
      CurPtr[0] == '-' || CurPtr[0] == '$' ||
418
1.03M
      CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
419
39.0k
    ++CurPtr;
420
1.59M
    while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
421
1.59M
           CurPtr[0] == '-' || CurPtr[0] == '$' ||
422
1.59M
           CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
423
1.55M
      ++CurPtr;
424
425
39.0k
    StrVal.assign(TokStart+1, CurPtr);   // Skip !
426
39.0k
    UnEscapeLexed(StrVal);
427
39.0k
    return lltok::MetadataVar;
428
39.0k
  }
429
993k
  return lltok::exclaim;
430
1.03M
}
431
432
/// Lex all tokens that start with a ^ character.
433
///    SummaryID ::= ^[0-9]+
434
38
lltok::Kind LLLexer::LexCaret() {
435
  // Handle SummaryID: ^[0-9]+
436
38
  return LexUIntID(lltok::SummaryID);
437
38
}
438
439
/// Lex all tokens that start with a # character.
440
///    AttrGrpID ::= #[0-9]+
441
45
lltok::Kind LLLexer::LexHash() {
442
  // Handle AttrGrpID: #[0-9]+
443
45
  return LexUIntID(lltok::AttrGrpID);
444
45
}
445
446
/// Lex a label, integer type, keyword, or hexadecimal integer constant.
447
///    Label           [-a-zA-Z$._0-9]+:
448
///    IntegerType     i[0-9]+
449
///    Keyword         sdiv, float, ...
450
///    HexIntConstant  [us]0x[0-9A-Fa-f]+
451
221k
lltok::Kind LLLexer::LexIdentifier() {
452
221k
  const char *StartChar = CurPtr;
453
221k
  const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar;
454
221k
  const char *KeywordEnd = nullptr;
455
456
8.85M
  for (; isLabelChar(*CurPtr); ++CurPtr) {
457
    // If we decide this is an integer, remember the end of the sequence.
458
8.63M
    if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))
459
17.2k
      IntEnd = CurPtr;
460
8.63M
    if (!KeywordEnd && !isalnum(static_cast<unsigned char>(*CurPtr)) &&
461
8.63M
        *CurPtr != '_')
462
24.4k
      KeywordEnd = CurPtr;
463
8.63M
  }
464
465
  // If we stopped due to a colon, unless we were directed to ignore it,
466
  // this really is a label.
467
221k
  if (!IgnoreColonInIdentifiers && *CurPtr == ':') {
468
262
    StrVal.assign(StartChar-1, CurPtr++);
469
262
    return lltok::LabelStr;
470
262
  }
471
472
  // Otherwise, this wasn't a label.  If this was valid as an integer type,
473
  // return it.
474
221k
  if (!IntEnd) IntEnd = CurPtr;
475
221k
  if (IntEnd != StartChar) {
476
124k
    CurPtr = IntEnd;
477
124k
    uint64_t NumBits = atoull(StartChar, CurPtr);
478
124k
    if (NumBits < IntegerType::MIN_INT_BITS ||
479
124k
        NumBits > IntegerType::MAX_INT_BITS) {
480
179
      Error("bitwidth for integer type out of range!");
481
179
      return lltok::Error;
482
179
    }
483
124k
    TyVal = IntegerType::get(Context, NumBits);
484
124k
    return lltok::Type;
485
124k
  }
486
487
  // Otherwise, this was a letter sequence.  See which keyword this is.
488
96.5k
  if (!KeywordEnd) KeywordEnd = CurPtr;
489
96.5k
  CurPtr = KeywordEnd;
490
96.5k
  --StartChar;
491
96.5k
  StringRef Keyword(StartChar, CurPtr - StartChar);
492
493
96.5k
#define KEYWORD(STR)                                                           \
494
28.0M
  do {                                                                         \
495
28.0M
    if (Keyword == #STR)                                                       \
496
28.0M
      return lltok::kw_##STR;                                                  \
497
28.0M
  } while (false)
498
499
96.5k
  KEYWORD(true);    KEYWORD(false);
500
96.5k
  KEYWORD(declare); KEYWORD(define);
501
92.7k
  KEYWORD(global);  KEYWORD(constant);
502
503
92.7k
  KEYWORD(dso_local);
504
92.7k
  KEYWORD(dso_preemptable);
505
506
92.7k
  KEYWORD(private);
507
92.7k
  KEYWORD(internal);
508
92.7k
  KEYWORD(available_externally);
509
92.7k
  KEYWORD(linkonce);
510
92.7k
  KEYWORD(linkonce_odr);
511
92.7k
  KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg".
512
92.7k
  KEYWORD(weak_odr);
513
92.7k
  KEYWORD(appending);
514
92.7k
  KEYWORD(dllimport);
515
92.7k
  KEYWORD(dllexport);
516
92.7k
  KEYWORD(common);
517
92.7k
  KEYWORD(default);
518
92.7k
  KEYWORD(hidden);
519
92.7k
  KEYWORD(protected);
520
92.7k
  KEYWORD(unnamed_addr);
521
92.7k
  KEYWORD(local_unnamed_addr);
522
92.7k
  KEYWORD(externally_initialized);
523
92.7k
  KEYWORD(extern_weak);
524
92.7k
  KEYWORD(external);
525
92.7k
  KEYWORD(thread_local);
526
92.6k
  KEYWORD(localdynamic);
527
92.6k
  KEYWORD(initialexec);
528
92.6k
  KEYWORD(localexec);
529
92.6k
  KEYWORD(zeroinitializer);
530
92.5k
  KEYWORD(undef);
531
92.5k
  KEYWORD(null);
532
92.4k
  KEYWORD(none);
533
92.4k
  KEYWORD(poison);
534
92.3k
  KEYWORD(to);
535
92.3k
  KEYWORD(caller);
536
92.3k
  KEYWORD(within);
537
92.3k
  KEYWORD(from);
538
92.3k
  KEYWORD(tail);
539
92.3k
  KEYWORD(musttail);
540
92.3k
  KEYWORD(notail);
541
92.3k
  KEYWORD(target);
542
88.0k
  KEYWORD(triple);
543
88.0k
  KEYWORD(source_filename);
544
87.8k
  KEYWORD(unwind);
545
87.8k
  KEYWORD(datalayout);
546
87.8k
  KEYWORD(volatile);
547
87.8k
  KEYWORD(atomic);
548
87.8k
  KEYWORD(unordered);
549
87.8k
  KEYWORD(monotonic);
550
87.8k
  KEYWORD(acquire);
551
87.8k
  KEYWORD(release);
552
87.8k
  KEYWORD(acq_rel);
553
87.8k
  KEYWORD(seq_cst);
554
87.8k
  KEYWORD(syncscope);
555
556
87.8k
  KEYWORD(nnan);
557
87.8k
  KEYWORD(ninf);
558
87.8k
  KEYWORD(nsz);
559
87.8k
  KEYWORD(arcp);
560
87.8k
  KEYWORD(contract);
561
87.8k
  KEYWORD(reassoc);
562
87.8k
  KEYWORD(afn);
563
87.8k
  KEYWORD(fast);
564
87.8k
  KEYWORD(nuw);
565
87.8k
  KEYWORD(nsw);
566
87.8k
  KEYWORD(exact);
567
87.8k
  KEYWORD(disjoint);
568
87.8k
  KEYWORD(inbounds);
569
87.8k
  KEYWORD(nneg);
570
87.8k
  KEYWORD(inrange);
571
87.8k
  KEYWORD(addrspace);
572
87.8k
  KEYWORD(section);
573
87.7k
  KEYWORD(partition);
574
87.7k
  KEYWORD(code_model);
575
87.7k
  KEYWORD(alias);
576
87.7k
  KEYWORD(ifunc);
577
87.7k
  KEYWORD(module);
578
87.7k
  KEYWORD(asm);
579
87.7k
  KEYWORD(sideeffect);
580
87.7k
  KEYWORD(inteldialect);
581
87.7k
  KEYWORD(gc);
582
87.7k
  KEYWORD(prefix);
583
87.7k
  KEYWORD(prologue);
584
585
87.7k
  KEYWORD(no_sanitize_address);
586
87.7k
  KEYWORD(no_sanitize_hwaddress);
587
87.7k
  KEYWORD(sanitize_address_dyninit);
588
589
87.7k
  KEYWORD(ccc);
590
87.7k
  KEYWORD(fastcc);
591
87.7k
  KEYWORD(coldcc);
592
87.7k
  KEYWORD(cfguard_checkcc);
593
87.7k
  KEYWORD(x86_stdcallcc);
594
87.7k
  KEYWORD(x86_fastcallcc);
595
87.7k
  KEYWORD(x86_thiscallcc);
596
87.7k
  KEYWORD(x86_vectorcallcc);
597
87.7k
  KEYWORD(arm_apcscc);
598
87.7k
  KEYWORD(arm_aapcscc);
599
87.7k
  KEYWORD(arm_aapcs_vfpcc);
600
87.7k
  KEYWORD(aarch64_vector_pcs);
601
87.7k
  KEYWORD(aarch64_sve_vector_pcs);
602
87.7k
  KEYWORD(aarch64_sme_preservemost_from_x0);
603
87.7k
  KEYWORD(aarch64_sme_preservemost_from_x2);
604
87.7k
  KEYWORD(msp430_intrcc);
605
87.7k
  KEYWORD(avr_intrcc);
606
87.7k
  KEYWORD(avr_signalcc);
607
87.7k
  KEYWORD(ptx_kernel);
608
87.7k
  KEYWORD(ptx_device);
609
87.7k
  KEYWORD(spir_kernel);
610
87.7k
  KEYWORD(spir_func);
611
87.7k
  KEYWORD(intel_ocl_bicc);
612
87.7k
  KEYWORD(x86_64_sysvcc);
613
87.7k
  KEYWORD(win64cc);
614
87.7k
  KEYWORD(x86_regcallcc);
615
87.7k
  KEYWORD(swiftcc);
616
87.7k
  KEYWORD(swifttailcc);
617
87.7k
  KEYWORD(anyregcc);
618
87.6k
  KEYWORD(preserve_mostcc);
619
87.6k
  KEYWORD(preserve_allcc);
620
87.6k
  KEYWORD(ghccc);
621
87.6k
  KEYWORD(x86_intrcc);
622
87.6k
  KEYWORD(hhvmcc);
623
87.6k
  KEYWORD(hhvm_ccc);
624
87.6k
  KEYWORD(cxx_fast_tlscc);
625
87.6k
  KEYWORD(amdgpu_vs);
626
87.6k
  KEYWORD(amdgpu_ls);
627
87.6k
  KEYWORD(amdgpu_hs);
628
87.6k
  KEYWORD(amdgpu_es);
629
87.6k
  KEYWORD(amdgpu_gs);
630
87.6k
  KEYWORD(amdgpu_ps);
631
87.6k
  KEYWORD(amdgpu_cs);
632
87.6k
  KEYWORD(amdgpu_cs_chain);
633
87.6k
  KEYWORD(amdgpu_cs_chain_preserve);
634
87.6k
  KEYWORD(amdgpu_kernel);
635
87.6k
  KEYWORD(amdgpu_gfx);
636
87.6k
  KEYWORD(tailcc);
637
87.6k
  KEYWORD(m68k_rtdcc);
638
87.6k
  KEYWORD(graalcc);
639
640
87.6k
  KEYWORD(cc);
641
87.6k
  KEYWORD(c);
642
643
87.5k
  KEYWORD(attributes);
644
87.5k
  KEYWORD(sync);
645
87.5k
  KEYWORD(async);
646
647
87.5k
#define GET_ATTR_NAMES
648
87.5k
#define ATTRIBUTE_ENUM(ENUM_NAME, DISPLAY_NAME) \
649
6.27M
  KEYWORD(DISPLAY_NAME);
650
87.5k
#include "llvm/IR/Attributes.inc"
651
652
61.9k
  KEYWORD(read);
653
61.9k
  KEYWORD(write);
654
61.9k
  KEYWORD(readwrite);
655
61.9k
  KEYWORD(argmem);
656
61.9k
  KEYWORD(inaccessiblemem);
657
61.9k
  KEYWORD(argmemonly);
658
61.9k
  KEYWORD(inaccessiblememonly);
659
61.8k
  KEYWORD(inaccessiblemem_or_argmemonly);
660
661
  // nofpclass attribute
662
61.8k
  KEYWORD(all);
663
61.8k
  KEYWORD(nan);
664
61.8k
  KEYWORD(snan);
665
61.8k
  KEYWORD(qnan);
666
61.8k
  KEYWORD(inf);
667
  // ninf already a keyword
668
61.8k
  KEYWORD(pinf);
669
61.8k
  KEYWORD(norm);
670
61.8k
  KEYWORD(nnorm);
671
61.8k
  KEYWORD(pnorm);
672
  // sub already a keyword
673
61.8k
  KEYWORD(nsub);
674
61.8k
  KEYWORD(psub);
675
61.8k
  KEYWORD(zero);
676
61.8k
  KEYWORD(nzero);
677
61.8k
  KEYWORD(pzero);
678
679
61.8k
  KEYWORD(type);
680
59.7k
  KEYWORD(opaque);
681
682
59.7k
  KEYWORD(comdat);
683
684
  // Comdat types
685
59.7k
  KEYWORD(any);
686
59.7k
  KEYWORD(exactmatch);
687
59.7k
  KEYWORD(largest);
688
59.7k
  KEYWORD(nodeduplicate);
689
59.7k
  KEYWORD(samesize);
690
691
59.6k
  KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
692
59.6k
  KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
693
59.6k
  KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
694
59.6k
  KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
695
696
59.6k
  KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
697
59.6k
  KEYWORD(umin); KEYWORD(fmax); KEYWORD(fmin);
698
59.6k
  KEYWORD(uinc_wrap);
699
59.6k
  KEYWORD(udec_wrap);
700
701
59.6k
  KEYWORD(splat);
702
59.6k
  KEYWORD(vscale);
703
59.6k
  KEYWORD(x);
704
59.6k
  KEYWORD(blockaddress);
705
59.5k
  KEYWORD(dso_local_equivalent);
706
56.7k
  KEYWORD(no_cfi);
707
708
  // Metadata types.
709
53.5k
  KEYWORD(distinct);
710
711
  // Use-list order directives.
712
53.5k
  KEYWORD(uselistorder);
713
53.3k
  KEYWORD(uselistorder_bb);
714
715
53.1k
  KEYWORD(personality);
716
53.1k
  KEYWORD(cleanup);
717
53.1k
  KEYWORD(catch);
718
53.1k
  KEYWORD(filter);
719
720
  // Summary index keywords.
721
53.1k
  KEYWORD(path);
722
53.1k
  KEYWORD(hash);
723
53.1k
  KEYWORD(gv);
724
53.1k
  KEYWORD(guid);
725
53.1k
  KEYWORD(name);
726
53.1k
  KEYWORD(summaries);
727
53.1k
  KEYWORD(flags);
728
53.1k
  KEYWORD(blockcount);
729
53.1k
  KEYWORD(linkage);
730
53.1k
  KEYWORD(visibility);
731
53.1k
  KEYWORD(notEligibleToImport);
732
53.1k
  KEYWORD(live);
733
53.1k
  KEYWORD(dsoLocal);
734
53.1k
  KEYWORD(canAutoHide);
735
53.1k
  KEYWORD(function);
736
53.1k
  KEYWORD(insts);
737
53.1k
  KEYWORD(funcFlags);
738
53.1k
  KEYWORD(readNone);
739
53.1k
  KEYWORD(readOnly);
740
53.1k
  KEYWORD(noRecurse);
741
53.1k
  KEYWORD(returnDoesNotAlias);
742
53.1k
  KEYWORD(noInline);
743
53.1k
  KEYWORD(alwaysInline);
744
53.1k
  KEYWORD(noUnwind);
745
53.1k
  KEYWORD(mayThrow);
746
53.1k
  KEYWORD(hasUnknownCall);
747
53.1k
  KEYWORD(mustBeUnreachable);
748
53.1k
  KEYWORD(calls);
749
53.1k
  KEYWORD(callee);
750
53.1k
  KEYWORD(params);
751
53.1k
  KEYWORD(param);
752
53.1k
  KEYWORD(hotness);
753
53.1k
  KEYWORD(unknown);
754
53.1k
  KEYWORD(critical);
755
53.1k
  KEYWORD(relbf);
756
53.1k
  KEYWORD(variable);
757
53.1k
  KEYWORD(vTableFuncs);
758
53.1k
  KEYWORD(virtFunc);
759
53.1k
  KEYWORD(aliasee);
760
53.1k
  KEYWORD(refs);
761
53.1k
  KEYWORD(typeIdInfo);
762
53.1k
  KEYWORD(typeTests);
763
53.1k
  KEYWORD(typeTestAssumeVCalls);
764
53.1k
  KEYWORD(typeCheckedLoadVCalls);
765
53.1k
  KEYWORD(typeTestAssumeConstVCalls);
766
53.1k
  KEYWORD(typeCheckedLoadConstVCalls);
767
53.1k
  KEYWORD(vFuncId);
768
53.1k
  KEYWORD(offset);
769
53.1k
  KEYWORD(args);
770
53.1k
  KEYWORD(typeid);
771
53.1k
  KEYWORD(typeidCompatibleVTable);
772
53.1k
  KEYWORD(summary);
773
53.1k
  KEYWORD(typeTestRes);
774
53.1k
  KEYWORD(kind);
775
53.1k
  KEYWORD(unsat);
776
53.1k
  KEYWORD(byteArray);
777
53.1k
  KEYWORD(inline);
778
53.1k
  KEYWORD(single);
779
53.1k
  KEYWORD(allOnes);
780
53.1k
  KEYWORD(sizeM1BitWidth);
781
53.1k
  KEYWORD(alignLog2);
782
53.1k
  KEYWORD(sizeM1);
783
53.1k
  KEYWORD(bitMask);
784
53.1k
  KEYWORD(inlineBits);
785
53.1k
  KEYWORD(vcall_visibility);
786
53.1k
  KEYWORD(wpdResolutions);
787
53.1k
  KEYWORD(wpdRes);
788
53.1k
  KEYWORD(indir);
789
53.1k
  KEYWORD(singleImpl);
790
53.1k
  KEYWORD(branchFunnel);
791
53.1k
  KEYWORD(singleImplName);
792
53.1k
  KEYWORD(resByArg);
793
53.1k
  KEYWORD(byArg);
794
53.1k
  KEYWORD(uniformRetVal);
795
53.1k
  KEYWORD(uniqueRetVal);
796
53.1k
  KEYWORD(virtualConstProp);
797
53.1k
  KEYWORD(info);
798
53.1k
  KEYWORD(byte);
799
53.0k
  KEYWORD(bit);
800
53.0k
  KEYWORD(varFlags);
801
53.0k
  KEYWORD(callsites);
802
53.0k
  KEYWORD(clones);
803
53.0k
  KEYWORD(stackIds);
804
53.0k
  KEYWORD(allocs);
805
53.0k
  KEYWORD(versions);
806
53.0k
  KEYWORD(memProf);
807
53.0k
  KEYWORD(notcold);
808
809
53.0k
#undef KEYWORD
810
811
  // Keywords for types.
812
53.0k
#define TYPEKEYWORD(STR, LLVMTY)                                               \
813
478k
  do {                                                                         \
814
478k
    if (Keyword == STR) {                                                      \
815
22.2k
      TyVal = LLVMTY;                                                          \
816
22.2k
      return lltok::Type;                                                      \
817
22.2k
    }                                                                          \
818
478k
  } while (false)
819
820
53.0k
  TYPEKEYWORD("void",      Type::getVoidTy(Context));
821
53.0k
  TYPEKEYWORD("half",      Type::getHalfTy(Context));
822
31.1k
  TYPEKEYWORD("bfloat",    Type::getBFloatTy(Context));
823
31.1k
  TYPEKEYWORD("float",     Type::getFloatTy(Context));
824
31.0k
  TYPEKEYWORD("double",    Type::getDoubleTy(Context));
825
31.0k
  TYPEKEYWORD("x86_fp80",  Type::getX86_FP80Ty(Context));
826
31.0k
  TYPEKEYWORD("fp128",     Type::getFP128Ty(Context));
827
31.0k
  TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
828
31.0k
  TYPEKEYWORD("label",     Type::getLabelTy(Context));
829
30.9k
  TYPEKEYWORD("metadata",  Type::getMetadataTy(Context));
830
30.9k
  TYPEKEYWORD("x86_mmx",   Type::getX86_MMXTy(Context));
831
30.9k
  TYPEKEYWORD("x86_amx",   Type::getX86_AMXTy(Context));
832
30.9k
  TYPEKEYWORD("token",     Type::getTokenTy(Context));
833
30.8k
  TYPEKEYWORD("ptr",       PointerType::getUnqual(Context));
834
835
30.8k
#undef TYPEKEYWORD
836
837
  // Keywords for instructions.
838
30.8k
#define INSTKEYWORD(STR, Enum)                                                 \
839
1.90M
  do {                                                                         \
840
1.90M
    if (Keyword == #STR) {                                                     \
841
3.13k
      UIntVal = Instruction::Enum;                                             \
842
3.13k
      return lltok::kw_##STR;                                                  \
843
3.13k
    }                                                                          \
844
1.90M
  } while (false)
845
846
30.8k
  INSTKEYWORD(fneg,  FNeg);
847
848
30.8k
  INSTKEYWORD(add,   Add);  INSTKEYWORD(fadd,   FAdd);
849
30.6k
  INSTKEYWORD(sub,   Sub);  INSTKEYWORD(fsub,   FSub);
850
30.5k
  INSTKEYWORD(mul,   Mul);  INSTKEYWORD(fmul,   FMul);
851
30.5k
  INSTKEYWORD(udiv,  UDiv); INSTKEYWORD(sdiv,  SDiv); INSTKEYWORD(fdiv,  FDiv);
852
30.5k
  INSTKEYWORD(urem,  URem); INSTKEYWORD(srem,  SRem); INSTKEYWORD(frem,  FRem);
853
30.5k
  INSTKEYWORD(shl,   Shl);  INSTKEYWORD(lshr,  LShr); INSTKEYWORD(ashr,  AShr);
854
29.8k
  INSTKEYWORD(and,   And);  INSTKEYWORD(or,    Or);   INSTKEYWORD(xor,   Xor);
855
29.6k
  INSTKEYWORD(icmp,  ICmp); INSTKEYWORD(fcmp,  FCmp);
856
857
29.6k
  INSTKEYWORD(phi,         PHI);
858
29.6k
  INSTKEYWORD(call,        Call);
859
29.6k
  INSTKEYWORD(trunc,       Trunc);
860
29.6k
  INSTKEYWORD(zext,        ZExt);
861
29.6k
  INSTKEYWORD(sext,        SExt);
862
29.6k
  INSTKEYWORD(fptrunc,     FPTrunc);
863
29.6k
  INSTKEYWORD(fpext,       FPExt);
864
29.6k
  INSTKEYWORD(uitofp,      UIToFP);
865
29.6k
  INSTKEYWORD(sitofp,      SIToFP);
866
29.6k
  INSTKEYWORD(fptoui,      FPToUI);
867
29.6k
  INSTKEYWORD(fptosi,      FPToSI);
868
29.6k
  INSTKEYWORD(inttoptr,    IntToPtr);
869
29.6k
  INSTKEYWORD(ptrtoint,    PtrToInt);
870
29.6k
  INSTKEYWORD(bitcast,     BitCast);
871
29.6k
  INSTKEYWORD(addrspacecast, AddrSpaceCast);
872
29.5k
  INSTKEYWORD(select,      Select);
873
29.5k
  INSTKEYWORD(va_arg,      VAArg);
874
29.5k
  INSTKEYWORD(ret,         Ret);
875
29.5k
  INSTKEYWORD(br,          Br);
876
29.5k
  INSTKEYWORD(switch,      Switch);
877
29.5k
  INSTKEYWORD(indirectbr,  IndirectBr);
878
29.5k
  INSTKEYWORD(invoke,      Invoke);
879
29.5k
  INSTKEYWORD(resume,      Resume);
880
28.1k
  INSTKEYWORD(unreachable, Unreachable);
881
28.1k
  INSTKEYWORD(callbr,      CallBr);
882
883
28.1k
  INSTKEYWORD(alloca,      Alloca);
884
28.1k
  INSTKEYWORD(load,        Load);
885
28.1k
  INSTKEYWORD(store,       Store);
886
28.1k
  INSTKEYWORD(cmpxchg,     AtomicCmpXchg);
887
28.1k
  INSTKEYWORD(atomicrmw,   AtomicRMW);
888
28.1k
  INSTKEYWORD(fence,       Fence);
889
28.1k
  INSTKEYWORD(getelementptr, GetElementPtr);
890
891
27.9k
  INSTKEYWORD(extractelement, ExtractElement);
892
27.9k
  INSTKEYWORD(insertelement,  InsertElement);
893
27.8k
  INSTKEYWORD(shufflevector,  ShuffleVector);
894
27.8k
  INSTKEYWORD(extractvalue,   ExtractValue);
895
27.8k
  INSTKEYWORD(insertvalue,    InsertValue);
896
27.8k
  INSTKEYWORD(landingpad,     LandingPad);
897
27.8k
  INSTKEYWORD(cleanupret,     CleanupRet);
898
27.7k
  INSTKEYWORD(catchret,       CatchRet);
899
27.7k
  INSTKEYWORD(catchswitch,  CatchSwitch);
900
27.7k
  INSTKEYWORD(catchpad,     CatchPad);
901
27.7k
  INSTKEYWORD(cleanuppad,   CleanupPad);
902
903
27.7k
  INSTKEYWORD(freeze,       Freeze);
904
905
27.6k
#undef INSTKEYWORD
906
907
27.6k
#define DWKEYWORD(TYPE, TOKEN)                                                 \
908
193k
  do {                                                                         \
909
193k
    if (Keyword.starts_with("DW_" #TYPE "_")) {                                \
910
347
      StrVal.assign(Keyword.begin(), Keyword.end());                           \
911
347
      return lltok::TOKEN;                                                     \
912
347
    }                                                                          \
913
193k
  } while (false)
914
915
27.6k
  DWKEYWORD(TAG, DwarfTag);
916
27.6k
  DWKEYWORD(ATE, DwarfAttEncoding);
917
27.6k
  DWKEYWORD(VIRTUALITY, DwarfVirtuality);
918
27.6k
  DWKEYWORD(LANG, DwarfLang);
919
27.6k
  DWKEYWORD(CC, DwarfCC);
920
27.6k
  DWKEYWORD(OP, DwarfOp);
921
27.3k
  DWKEYWORD(MACINFO, DwarfMacinfo);
922
923
27.3k
#undef DWKEYWORD
924
925
27.3k
  if (Keyword.starts_with("DIFlag")) {
926
3
    StrVal.assign(Keyword.begin(), Keyword.end());
927
3
    return lltok::DIFlag;
928
3
  }
929
930
27.3k
  if (Keyword.starts_with("DISPFlag")) {
931
3
    StrVal.assign(Keyword.begin(), Keyword.end());
932
3
    return lltok::DISPFlag;
933
3
  }
934
935
27.3k
  if (Keyword.starts_with("CSK_")) {
936
7
    StrVal.assign(Keyword.begin(), Keyword.end());
937
7
    return lltok::ChecksumKind;
938
7
  }
939
940
27.3k
  if (Keyword == "NoDebug" || Keyword == "FullDebug" ||
941
27.3k
      Keyword == "LineTablesOnly" || Keyword == "DebugDirectivesOnly") {
942
5
    StrVal.assign(Keyword.begin(), Keyword.end());
943
5
    return lltok::EmissionKind;
944
5
  }
945
946
27.3k
  if (Keyword == "GNU" || Keyword == "Apple" || Keyword == "None" ||
947
27.3k
      Keyword == "Default") {
948
4
    StrVal.assign(Keyword.begin(), Keyword.end());
949
4
    return lltok::NameTableKind;
950
4
  }
951
952
  // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
953
  // the CFE to avoid forcing it to deal with 64-bit numbers.
954
27.3k
  if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
955
27.3k
      TokStart[1] == '0' && TokStart[2] == 'x' &&
956
27.3k
      isxdigit(static_cast<unsigned char>(TokStart[3]))) {
957
25.1k
    int len = CurPtr-TokStart-3;
958
25.1k
    uint32_t bits = len * 4;
959
25.1k
    StringRef HexStr(TokStart + 3, len);
960
25.1k
    if (!all_of(HexStr, isxdigit)) {
961
      // Bad token, return it as an error.
962
21
      CurPtr = TokStart+3;
963
21
      return lltok::Error;
964
21
    }
965
25.1k
    APInt Tmp(bits, HexStr, 16);
966
25.1k
    uint32_t activeBits = Tmp.getActiveBits();
967
25.1k
    if (activeBits > 0 && activeBits < bits)
968
6.90k
      Tmp = Tmp.trunc(activeBits);
969
25.1k
    APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
970
25.1k
    return lltok::APSInt;
971
25.1k
  }
972
973
  // If this is "cc1234", return this as just "cc".
974
2.12k
  if (TokStart[0] == 'c' && TokStart[1] == 'c') {
975
11
    CurPtr = TokStart+2;
976
11
    return lltok::kw_cc;
977
11
  }
978
979
  // Finally, if this isn't known, return an error.
980
2.10k
  CurPtr = TokStart+1;
981
2.10k
  return lltok::Error;
982
2.12k
}
983
984
/// Lex all tokens that start with a 0x prefix, knowing they match and are not
985
/// labels.
986
///    HexFPConstant     0x[0-9A-Fa-f]+
987
///    HexFP80Constant   0xK[0-9A-Fa-f]+
988
///    HexFP128Constant  0xL[0-9A-Fa-f]+
989
///    HexPPC128Constant 0xM[0-9A-Fa-f]+
990
///    HexHalfConstant   0xH[0-9A-Fa-f]+
991
///    HexBFloatConstant 0xR[0-9A-Fa-f]+
992
720
lltok::Kind LLLexer::Lex0x() {
993
720
  CurPtr = TokStart + 2;
994
995
720
  char Kind;
996
720
  if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H' ||
997
720
      CurPtr[0] == 'R') {
998
597
    Kind = *CurPtr++;
999
597
  } else {
1000
123
    Kind = 'J';
1001
123
  }
1002
1003
720
  if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
1004
    // Bad token, return it as an error.
1005
12
    CurPtr = TokStart+1;
1006
12
    return lltok::Error;
1007
12
  }
1008
1009
11.0k
  while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
1010
10.3k
    ++CurPtr;
1011
1012
708
  if (Kind == 'J') {
1013
    // HexFPConstant - Floating point constant represented in IEEE format as a
1014
    // hexadecimal number for when exponential notation is not precise enough.
1015
    // Half, BFloat, Float, and double only.
1016
114
    APFloatVal = APFloat(APFloat::IEEEdouble(),
1017
114
                         APInt(64, HexIntToVal(TokStart + 2, CurPtr)));
1018
114
    return lltok::APFloat;
1019
114
  }
1020
1021
594
  uint64_t Pair[2];
1022
594
  switch (Kind) {
1023
0
  default: llvm_unreachable("Unknown kind!");
1024
326
  case 'K':
1025
    // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
1026
326
    FP80HexToIntPair(TokStart+3, CurPtr, Pair);
1027
326
    APFloatVal = APFloat(APFloat::x87DoubleExtended(), APInt(80, Pair));
1028
326
    return lltok::APFloat;
1029
138
  case 'L':
1030
    // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
1031
138
    HexToIntPair(TokStart+3, CurPtr, Pair);
1032
138
    APFloatVal = APFloat(APFloat::IEEEquad(), APInt(128, Pair));
1033
138
    return lltok::APFloat;
1034
18
  case 'M':
1035
    // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
1036
18
    HexToIntPair(TokStart+3, CurPtr, Pair);
1037
18
    APFloatVal = APFloat(APFloat::PPCDoubleDouble(), APInt(128, Pair));
1038
18
    return lltok::APFloat;
1039
61
  case 'H':
1040
61
    APFloatVal = APFloat(APFloat::IEEEhalf(),
1041
61
                         APInt(16,HexIntToVal(TokStart+3, CurPtr)));
1042
61
    return lltok::APFloat;
1043
51
  case 'R':
1044
    // Brain floating point
1045
51
    APFloatVal = APFloat(APFloat::BFloat(),
1046
51
                         APInt(16, HexIntToVal(TokStart + 3, CurPtr)));
1047
51
    return lltok::APFloat;
1048
594
  }
1049
594
}
1050
1051
/// Lex tokens for a label or a numeric constant, possibly starting with -.
1052
///    Label             [-a-zA-Z$._0-9]+:
1053
///    NInteger          -[0-9]+
1054
///    FPConstant        [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
1055
///    PInteger          [0-9]+
1056
///    HexFPConstant     0x[0-9A-Fa-f]+
1057
///    HexFP80Constant   0xK[0-9A-Fa-f]+
1058
///    HexFP128Constant  0xL[0-9A-Fa-f]+
1059
///    HexPPC128Constant 0xM[0-9A-Fa-f]+
1060
969k
lltok::Kind LLLexer::LexDigitOrNegative() {
1061
  // If the letter after the negative is not a number, this is probably a label.
1062
969k
  if (!isdigit(static_cast<unsigned char>(TokStart[0])) &&
1063
969k
      !isdigit(static_cast<unsigned char>(CurPtr[0]))) {
1064
    // Okay, this is not a number after the -, it's probably a label.
1065
121
    if (const char *End = isLabelTail(CurPtr)) {
1066
6
      StrVal.assign(TokStart, End-1);
1067
6
      CurPtr = End;
1068
6
      return lltok::LabelStr;
1069
6
    }
1070
1071
115
    return lltok::Error;
1072
121
  }
1073
1074
  // At this point, it is either a label, int or fp constant.
1075
1076
  // Skip digits, we have at least one.
1077
4.24M
  for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
1078
3.27M
    /*empty*/;
1079
1080
  // Check if this is a fully-numeric label:
1081
969k
  if (isdigit(TokStart[0]) && CurPtr[0] == ':') {
1082
184
    uint64_t Val = atoull(TokStart, CurPtr);
1083
184
    ++CurPtr; // Skip the colon.
1084
184
    if ((unsigned)Val != Val)
1085
106
      Error("invalid value number (too large)!");
1086
184
    UIntVal = unsigned(Val);
1087
184
    return lltok::LabelID;
1088
184
  }
1089
1090
  // Check to see if this really is a string label, e.g. "-1:".
1091
969k
  if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
1092
24.4k
    if (const char *End = isLabelTail(CurPtr)) {
1093
18
      StrVal.assign(TokStart, End-1);
1094
18
      CurPtr = End;
1095
18
      return lltok::LabelStr;
1096
18
    }
1097
24.4k
  }
1098
1099
  // If the next character is a '.', then it is a fp value, otherwise its
1100
  // integer.
1101
969k
  if (CurPtr[0] != '.') {
1102
946k
    if (TokStart[0] == '0' && TokStart[1] == 'x')
1103
720
      return Lex0x();
1104
945k
    APSIntVal = APSInt(StringRef(TokStart, CurPtr - TokStart));
1105
945k
    return lltok::APSInt;
1106
946k
  }
1107
1108
23.0k
  ++CurPtr;
1109
1110
  // Skip over [0-9]*([eE][-+]?[0-9]+)?
1111
722k
  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
1112
1113
23.0k
  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
1114
159
    if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
1115
159
        ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
1116
141
          isdigit(static_cast<unsigned char>(CurPtr[2])))) {
1117
141
      CurPtr += 2;
1118
1.43k
      while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
1119
141
    }
1120
159
  }
1121
1122
23.0k
  APFloatVal = APFloat(APFloat::IEEEdouble(),
1123
23.0k
                       StringRef(TokStart, CurPtr - TokStart));
1124
23.0k
  return lltok::APFloat;
1125
969k
}
1126
1127
/// Lex a floating point constant starting with +.
1128
///    FPConstant  [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
1129
249
lltok::Kind LLLexer::LexPositive() {
1130
  // If the letter after the negative is a number, this is probably not a
1131
  // label.
1132
249
  if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
1133
20
    return lltok::Error;
1134
1135
  // Skip digits.
1136
1.18k
  for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
1137
958
    /*empty*/;
1138
1139
  // At this point, we need a '.'.
1140
229
  if (CurPtr[0] != '.') {
1141
21
    CurPtr = TokStart+1;
1142
21
    return lltok::Error;
1143
21
  }
1144
1145
208
  ++CurPtr;
1146
1147
  // Skip over [0-9]*([eE][-+]?[0-9]+)?
1148
1.45k
  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
1149
1150
208
  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
1151
61
    if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
1152
61
        ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
1153
49
        isdigit(static_cast<unsigned char>(CurPtr[2])))) {
1154
49
      CurPtr += 2;
1155
347
      while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
1156
49
    }
1157
61
  }
1158
1159
208
  APFloatVal = APFloat(APFloat::IEEEdouble(),
1160
208
                       StringRef(TokStart, CurPtr - TokStart));
1161
208
  return lltok::APFloat;
1162
229
}