Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/clang/lib/AST/CommentParser.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "clang/AST/CommentParser.h"
10
#include "clang/AST/CommentCommandTraits.h"
11
#include "clang/AST/CommentDiagnostic.h"
12
#include "clang/AST/CommentSema.h"
13
#include "clang/Basic/CharInfo.h"
14
#include "clang/Basic/SourceManager.h"
15
#include "llvm/Support/ErrorHandling.h"
16
17
namespace clang {
18
19
0
static inline bool isWhitespace(llvm::StringRef S) {
20
0
  for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
21
0
    if (!isWhitespace(*I))
22
0
      return false;
23
0
  }
24
0
  return true;
25
0
}
26
27
namespace comments {
28
29
/// Re-lexes a sequence of tok::text tokens.
30
class TextTokenRetokenizer {
31
  llvm::BumpPtrAllocator &Allocator;
32
  Parser &P;
33
34
  /// This flag is set when there are no more tokens we can fetch from lexer.
35
  bool NoMoreInterestingTokens;
36
37
  /// Token buffer: tokens we have processed and lookahead.
38
  SmallVector<Token, 16> Toks;
39
40
  /// A position in \c Toks.
41
  struct Position {
42
    const char *BufferStart;
43
    const char *BufferEnd;
44
    const char *BufferPtr;
45
    SourceLocation BufferStartLoc;
46
    unsigned CurToken;
47
  };
48
49
  /// Current position in Toks.
50
  Position Pos;
51
52
0
  bool isEnd() const {
53
0
    return Pos.CurToken >= Toks.size();
54
0
  }
55
56
  /// Sets up the buffer pointers to point to current token.
57
0
  void setupBuffer() {
58
0
    assert(!isEnd());
59
0
    const Token &Tok = Toks[Pos.CurToken];
60
61
0
    Pos.BufferStart = Tok.getText().begin();
62
0
    Pos.BufferEnd = Tok.getText().end();
63
0
    Pos.BufferPtr = Pos.BufferStart;
64
0
    Pos.BufferStartLoc = Tok.getLocation();
65
0
  }
66
67
0
  SourceLocation getSourceLocation() const {
68
0
    const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
69
0
    return Pos.BufferStartLoc.getLocWithOffset(CharNo);
70
0
  }
71
72
0
  char peek() const {
73
0
    assert(!isEnd());
74
0
    assert(Pos.BufferPtr != Pos.BufferEnd);
75
0
    return *Pos.BufferPtr;
76
0
  }
77
78
0
  void consumeChar() {
79
0
    assert(!isEnd());
80
0
    assert(Pos.BufferPtr != Pos.BufferEnd);
81
0
    Pos.BufferPtr++;
82
0
    if (Pos.BufferPtr == Pos.BufferEnd) {
83
0
      Pos.CurToken++;
84
0
      if (isEnd() && !addToken())
85
0
        return;
86
87
0
      assert(!isEnd());
88
0
      setupBuffer();
89
0
    }
90
0
  }
91
92
  /// Add a token.
93
  /// Returns true on success, false if there are no interesting tokens to
94
  /// fetch from lexer.
95
0
  bool addToken() {
96
0
    if (NoMoreInterestingTokens)
97
0
      return false;
98
99
0
    if (P.Tok.is(tok::newline)) {
100
      // If we see a single newline token between text tokens, skip it.
101
0
      Token Newline = P.Tok;
102
0
      P.consumeToken();
103
0
      if (P.Tok.isNot(tok::text)) {
104
0
        P.putBack(Newline);
105
0
        NoMoreInterestingTokens = true;
106
0
        return false;
107
0
      }
108
0
    }
109
0
    if (P.Tok.isNot(tok::text)) {
110
0
      NoMoreInterestingTokens = true;
111
0
      return false;
112
0
    }
113
114
0
    Toks.push_back(P.Tok);
115
0
    P.consumeToken();
116
0
    if (Toks.size() == 1)
117
0
      setupBuffer();
118
0
    return true;
119
0
  }
120
121
0
  void consumeWhitespace() {
122
0
    while (!isEnd()) {
123
0
      if (isWhitespace(peek()))
124
0
        consumeChar();
125
0
      else
126
0
        break;
127
0
    }
128
0
  }
129
130
  void formTokenWithChars(Token &Result,
131
                          SourceLocation Loc,
132
                          const char *TokBegin,
133
                          unsigned TokLength,
134
0
                          StringRef Text) {
135
0
    Result.setLocation(Loc);
136
0
    Result.setKind(tok::text);
137
0
    Result.setLength(TokLength);
138
0
#ifndef NDEBUG
139
0
    Result.TextPtr = "<UNSET>";
140
0
    Result.IntVal = 7;
141
0
#endif
142
0
    Result.setText(Text);
143
0
  }
144
145
public:
146
  TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
147
0
      Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
148
0
    Pos.CurToken = 0;
149
0
    addToken();
150
0
  }
151
152
  /// Extract a word -- sequence of non-whitespace characters.
153
0
  bool lexWord(Token &Tok) {
154
0
    if (isEnd())
155
0
      return false;
156
157
0
    Position SavedPos = Pos;
158
159
0
    consumeWhitespace();
160
0
    SmallString<32> WordText;
161
0
    const char *WordBegin = Pos.BufferPtr;
162
0
    SourceLocation Loc = getSourceLocation();
163
0
    while (!isEnd()) {
164
0
      const char C = peek();
165
0
      if (!isWhitespace(C)) {
166
0
        WordText.push_back(C);
167
0
        consumeChar();
168
0
      } else
169
0
        break;
170
0
    }
171
0
    const unsigned Length = WordText.size();
172
0
    if (Length == 0) {
173
0
      Pos = SavedPos;
174
0
      return false;
175
0
    }
176
177
0
    char *TextPtr = Allocator.Allocate<char>(Length + 1);
178
179
0
    memcpy(TextPtr, WordText.c_str(), Length + 1);
180
0
    StringRef Text = StringRef(TextPtr, Length);
181
182
0
    formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
183
0
    return true;
184
0
  }
185
186
0
  bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
187
0
    if (isEnd())
188
0
      return false;
189
190
0
    Position SavedPos = Pos;
191
192
0
    consumeWhitespace();
193
0
    SmallString<32> WordText;
194
0
    const char *WordBegin = Pos.BufferPtr;
195
0
    SourceLocation Loc = getSourceLocation();
196
0
    bool Error = false;
197
0
    if (!isEnd()) {
198
0
      const char C = peek();
199
0
      if (C == OpenDelim) {
200
0
        WordText.push_back(C);
201
0
        consumeChar();
202
0
      } else
203
0
        Error = true;
204
0
    }
205
0
    char C = '\0';
206
0
    while (!Error && !isEnd()) {
207
0
      C = peek();
208
0
      WordText.push_back(C);
209
0
      consumeChar();
210
0
      if (C == CloseDelim)
211
0
        break;
212
0
    }
213
0
    if (!Error && C != CloseDelim)
214
0
      Error = true;
215
216
0
    if (Error) {
217
0
      Pos = SavedPos;
218
0
      return false;
219
0
    }
220
221
0
    const unsigned Length = WordText.size();
222
0
    char *TextPtr = Allocator.Allocate<char>(Length + 1);
223
224
0
    memcpy(TextPtr, WordText.c_str(), Length + 1);
225
0
    StringRef Text = StringRef(TextPtr, Length);
226
227
0
    formTokenWithChars(Tok, Loc, WordBegin,
228
0
                       Pos.BufferPtr - WordBegin, Text);
229
0
    return true;
230
0
  }
231
232
  /// Put back tokens that we didn't consume.
233
0
  void putBackLeftoverTokens() {
234
0
    if (isEnd())
235
0
      return;
236
237
0
    bool HavePartialTok = false;
238
0
    Token PartialTok;
239
0
    if (Pos.BufferPtr != Pos.BufferStart) {
240
0
      formTokenWithChars(PartialTok, getSourceLocation(),
241
0
                         Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
242
0
                         StringRef(Pos.BufferPtr,
243
0
                                   Pos.BufferEnd - Pos.BufferPtr));
244
0
      HavePartialTok = true;
245
0
      Pos.CurToken++;
246
0
    }
247
248
0
    P.putBack(llvm::ArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
249
0
    Pos.CurToken = Toks.size();
250
251
0
    if (HavePartialTok)
252
0
      P.putBack(PartialTok);
253
0
  }
254
};
255
256
Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
257
               const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
258
               const CommandTraits &Traits):
259
    L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
260
0
    Traits(Traits) {
261
0
  consumeToken();
262
0
}
263
264
void Parser::parseParamCommandArgs(ParamCommandComment *PC,
265
0
                                   TextTokenRetokenizer &Retokenizer) {
266
0
  Token Arg;
267
  // Check if argument looks like direction specification: [dir]
268
  // e.g., [in], [out], [in,out]
269
0
  if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
270
0
    S.actOnParamCommandDirectionArg(PC,
271
0
                                    Arg.getLocation(),
272
0
                                    Arg.getEndLocation(),
273
0
                                    Arg.getText());
274
275
0
  if (Retokenizer.lexWord(Arg))
276
0
    S.actOnParamCommandParamNameArg(PC,
277
0
                                    Arg.getLocation(),
278
0
                                    Arg.getEndLocation(),
279
0
                                    Arg.getText());
280
0
}
281
282
void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
283
0
                                    TextTokenRetokenizer &Retokenizer) {
284
0
  Token Arg;
285
0
  if (Retokenizer.lexWord(Arg))
286
0
    S.actOnTParamCommandParamNameArg(TPC,
287
0
                                     Arg.getLocation(),
288
0
                                     Arg.getEndLocation(),
289
0
                                     Arg.getText());
290
0
}
291
292
ArrayRef<Comment::Argument>
293
0
Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) {
294
0
  auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
295
0
      Comment::Argument[NumArgs];
296
0
  unsigned ParsedArgs = 0;
297
0
  Token Arg;
298
0
  while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
299
0
    Args[ParsedArgs] = Comment::Argument{
300
0
        SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
301
0
    ParsedArgs++;
302
0
  }
303
304
0
  return llvm::ArrayRef(Args, ParsedArgs);
305
0
}
306
307
0
BlockCommandComment *Parser::parseBlockCommand() {
308
0
  assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
309
310
0
  ParamCommandComment *PC = nullptr;
311
0
  TParamCommandComment *TPC = nullptr;
312
0
  BlockCommandComment *BC = nullptr;
313
0
  const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
314
0
  CommandMarkerKind CommandMarker =
315
0
      Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
316
0
  if (Info->IsParamCommand) {
317
0
    PC = S.actOnParamCommandStart(Tok.getLocation(),
318
0
                                  Tok.getEndLocation(),
319
0
                                  Tok.getCommandID(),
320
0
                                  CommandMarker);
321
0
  } else if (Info->IsTParamCommand) {
322
0
    TPC = S.actOnTParamCommandStart(Tok.getLocation(),
323
0
                                    Tok.getEndLocation(),
324
0
                                    Tok.getCommandID(),
325
0
                                    CommandMarker);
326
0
  } else {
327
0
    BC = S.actOnBlockCommandStart(Tok.getLocation(),
328
0
                                  Tok.getEndLocation(),
329
0
                                  Tok.getCommandID(),
330
0
                                  CommandMarker);
331
0
  }
332
0
  consumeToken();
333
334
0
  if (isTokBlockCommand()) {
335
    // Block command ahead.  We can't nest block commands, so pretend that this
336
    // command has an empty argument.
337
0
    ParagraphComment *Paragraph = S.actOnParagraphComment(std::nullopt);
338
0
    if (PC) {
339
0
      S.actOnParamCommandFinish(PC, Paragraph);
340
0
      return PC;
341
0
    } else if (TPC) {
342
0
      S.actOnTParamCommandFinish(TPC, Paragraph);
343
0
      return TPC;
344
0
    } else {
345
0
      S.actOnBlockCommandFinish(BC, Paragraph);
346
0
      return BC;
347
0
    }
348
0
  }
349
350
0
  if (PC || TPC || Info->NumArgs > 0) {
351
    // In order to parse command arguments we need to retokenize a few
352
    // following text tokens.
353
0
    TextTokenRetokenizer Retokenizer(Allocator, *this);
354
355
0
    if (PC)
356
0
      parseParamCommandArgs(PC, Retokenizer);
357
0
    else if (TPC)
358
0
      parseTParamCommandArgs(TPC, Retokenizer);
359
0
    else
360
0
      S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs));
361
362
0
    Retokenizer.putBackLeftoverTokens();
363
0
  }
364
365
  // If there's a block command ahead, we will attach an empty paragraph to
366
  // this command.
367
0
  bool EmptyParagraph = false;
368
0
  if (isTokBlockCommand())
369
0
    EmptyParagraph = true;
370
0
  else if (Tok.is(tok::newline)) {
371
0
    Token PrevTok = Tok;
372
0
    consumeToken();
373
0
    EmptyParagraph = isTokBlockCommand();
374
0
    putBack(PrevTok);
375
0
  }
376
377
0
  ParagraphComment *Paragraph;
378
0
  if (EmptyParagraph)
379
0
    Paragraph = S.actOnParagraphComment(std::nullopt);
380
0
  else {
381
0
    BlockContentComment *Block = parseParagraphOrBlockCommand();
382
    // Since we have checked for a block command, we should have parsed a
383
    // paragraph.
384
0
    Paragraph = cast<ParagraphComment>(Block);
385
0
  }
386
387
0
  if (PC) {
388
0
    S.actOnParamCommandFinish(PC, Paragraph);
389
0
    return PC;
390
0
  } else if (TPC) {
391
0
    S.actOnTParamCommandFinish(TPC, Paragraph);
392
0
    return TPC;
393
0
  } else {
394
0
    S.actOnBlockCommandFinish(BC, Paragraph);
395
0
    return BC;
396
0
  }
397
0
}
398
399
0
InlineCommandComment *Parser::parseInlineCommand() {
400
0
  assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
401
0
  const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
402
403
0
  const Token CommandTok = Tok;
404
0
  consumeToken();
405
406
0
  TextTokenRetokenizer Retokenizer(Allocator, *this);
407
0
  ArrayRef<Comment::Argument> Args =
408
0
      parseCommandArgs(Retokenizer, Info->NumArgs);
409
410
0
  InlineCommandComment *IC = S.actOnInlineCommand(
411
0
      CommandTok.getLocation(), CommandTok.getEndLocation(),
412
0
      CommandTok.getCommandID(), Args);
413
414
0
  if (Args.size() < Info->NumArgs) {
415
0
    Diag(CommandTok.getEndLocation().getLocWithOffset(1),
416
0
         diag::warn_doc_inline_command_not_enough_arguments)
417
0
        << CommandTok.is(tok::at_command) << Info->Name << Args.size()
418
0
        << Info->NumArgs
419
0
        << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation());
420
0
  }
421
422
0
  Retokenizer.putBackLeftoverTokens();
423
424
0
  return IC;
425
0
}
426
427
0
HTMLStartTagComment *Parser::parseHTMLStartTag() {
428
0
  assert(Tok.is(tok::html_start_tag));
429
0
  HTMLStartTagComment *HST =
430
0
      S.actOnHTMLStartTagStart(Tok.getLocation(),
431
0
                               Tok.getHTMLTagStartName());
432
0
  consumeToken();
433
434
0
  SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
435
0
  while (true) {
436
0
    switch (Tok.getKind()) {
437
0
    case tok::html_ident: {
438
0
      Token Ident = Tok;
439
0
      consumeToken();
440
0
      if (Tok.isNot(tok::html_equals)) {
441
0
        Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
442
0
                                                       Ident.getHTMLIdent()));
443
0
        continue;
444
0
      }
445
0
      Token Equals = Tok;
446
0
      consumeToken();
447
0
      if (Tok.isNot(tok::html_quoted_string)) {
448
0
        Diag(Tok.getLocation(),
449
0
             diag::warn_doc_html_start_tag_expected_quoted_string)
450
0
          << SourceRange(Equals.getLocation());
451
0
        Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
452
0
                                                       Ident.getHTMLIdent()));
453
0
        while (Tok.is(tok::html_equals) ||
454
0
               Tok.is(tok::html_quoted_string))
455
0
          consumeToken();
456
0
        continue;
457
0
      }
458
0
      Attrs.push_back(HTMLStartTagComment::Attribute(
459
0
                              Ident.getLocation(),
460
0
                              Ident.getHTMLIdent(),
461
0
                              Equals.getLocation(),
462
0
                              SourceRange(Tok.getLocation(),
463
0
                                          Tok.getEndLocation()),
464
0
                              Tok.getHTMLQuotedString()));
465
0
      consumeToken();
466
0
      continue;
467
0
    }
468
469
0
    case tok::html_greater:
470
0
      S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
471
0
                                Tok.getLocation(),
472
0
                                /* IsSelfClosing = */ false);
473
0
      consumeToken();
474
0
      return HST;
475
476
0
    case tok::html_slash_greater:
477
0
      S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
478
0
                                Tok.getLocation(),
479
0
                                /* IsSelfClosing = */ true);
480
0
      consumeToken();
481
0
      return HST;
482
483
0
    case tok::html_equals:
484
0
    case tok::html_quoted_string:
485
0
      Diag(Tok.getLocation(),
486
0
           diag::warn_doc_html_start_tag_expected_ident_or_greater);
487
0
      while (Tok.is(tok::html_equals) ||
488
0
             Tok.is(tok::html_quoted_string))
489
0
        consumeToken();
490
0
      if (Tok.is(tok::html_ident) ||
491
0
          Tok.is(tok::html_greater) ||
492
0
          Tok.is(tok::html_slash_greater))
493
0
        continue;
494
495
0
      S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
496
0
                                SourceLocation(),
497
0
                                /* IsSelfClosing = */ false);
498
0
      return HST;
499
500
0
    default:
501
      // Not a token from an HTML start tag.  Thus HTML tag prematurely ended.
502
0
      S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
503
0
                                SourceLocation(),
504
0
                                /* IsSelfClosing = */ false);
505
0
      bool StartLineInvalid;
506
0
      const unsigned StartLine = SourceMgr.getPresumedLineNumber(
507
0
                                                  HST->getLocation(),
508
0
                                                  &StartLineInvalid);
509
0
      bool EndLineInvalid;
510
0
      const unsigned EndLine = SourceMgr.getPresumedLineNumber(
511
0
                                                  Tok.getLocation(),
512
0
                                                  &EndLineInvalid);
513
0
      if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
514
0
        Diag(Tok.getLocation(),
515
0
             diag::warn_doc_html_start_tag_expected_ident_or_greater)
516
0
          << HST->getSourceRange();
517
0
      else {
518
0
        Diag(Tok.getLocation(),
519
0
             diag::warn_doc_html_start_tag_expected_ident_or_greater);
520
0
        Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
521
0
          << HST->getSourceRange();
522
0
      }
523
0
      return HST;
524
0
    }
525
0
  }
526
0
}
527
528
0
HTMLEndTagComment *Parser::parseHTMLEndTag() {
529
0
  assert(Tok.is(tok::html_end_tag));
530
0
  Token TokEndTag = Tok;
531
0
  consumeToken();
532
0
  SourceLocation Loc;
533
0
  if (Tok.is(tok::html_greater)) {
534
0
    Loc = Tok.getLocation();
535
0
    consumeToken();
536
0
  }
537
538
0
  return S.actOnHTMLEndTag(TokEndTag.getLocation(),
539
0
                           Loc,
540
0
                           TokEndTag.getHTMLTagEndName());
541
0
}
542
543
0
BlockContentComment *Parser::parseParagraphOrBlockCommand() {
544
0
  SmallVector<InlineContentComment *, 8> Content;
545
546
0
  while (true) {
547
0
    switch (Tok.getKind()) {
548
0
    case tok::verbatim_block_begin:
549
0
    case tok::verbatim_line_name:
550
0
    case tok::eof:
551
0
      break; // Block content or EOF ahead, finish this parapgaph.
552
553
0
    case tok::unknown_command:
554
0
      Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
555
0
                                              Tok.getEndLocation(),
556
0
                                              Tok.getUnknownCommandName()));
557
0
      consumeToken();
558
0
      continue;
559
560
0
    case tok::backslash_command:
561
0
    case tok::at_command: {
562
0
      const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
563
0
      if (Info->IsBlockCommand) {
564
0
        if (Content.size() == 0)
565
0
          return parseBlockCommand();
566
0
        break; // Block command ahead, finish this parapgaph.
567
0
      }
568
0
      if (Info->IsVerbatimBlockEndCommand) {
569
0
        Diag(Tok.getLocation(),
570
0
             diag::warn_verbatim_block_end_without_start)
571
0
          << Tok.is(tok::at_command)
572
0
          << Info->Name
573
0
          << SourceRange(Tok.getLocation(), Tok.getEndLocation());
574
0
        consumeToken();
575
0
        continue;
576
0
      }
577
0
      if (Info->IsUnknownCommand) {
578
0
        Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
579
0
                                                Tok.getEndLocation(),
580
0
                                                Info->getID()));
581
0
        consumeToken();
582
0
        continue;
583
0
      }
584
0
      assert(Info->IsInlineCommand);
585
0
      Content.push_back(parseInlineCommand());
586
0
      continue;
587
0
    }
588
589
0
    case tok::newline: {
590
0
      consumeToken();
591
0
      if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
592
0
        consumeToken();
593
0
        break; // Two newlines -- end of paragraph.
594
0
      }
595
      // Also allow [tok::newline, tok::text, tok::newline] if the middle
596
      // tok::text is just whitespace.
597
0
      if (Tok.is(tok::text) && isWhitespace(Tok.getText())) {
598
0
        Token WhitespaceTok = Tok;
599
0
        consumeToken();
600
0
        if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
601
0
          consumeToken();
602
0
          break;
603
0
        }
604
        // We have [tok::newline, tok::text, non-newline].  Put back tok::text.
605
0
        putBack(WhitespaceTok);
606
0
      }
607
0
      if (Content.size() > 0)
608
0
        Content.back()->addTrailingNewline();
609
0
      continue;
610
0
    }
611
612
    // Don't deal with HTML tag soup now.
613
0
    case tok::html_start_tag:
614
0
      Content.push_back(parseHTMLStartTag());
615
0
      continue;
616
617
0
    case tok::html_end_tag:
618
0
      Content.push_back(parseHTMLEndTag());
619
0
      continue;
620
621
0
    case tok::text:
622
0
      Content.push_back(S.actOnText(Tok.getLocation(),
623
0
                                    Tok.getEndLocation(),
624
0
                                    Tok.getText()));
625
0
      consumeToken();
626
0
      continue;
627
628
0
    case tok::verbatim_block_line:
629
0
    case tok::verbatim_block_end:
630
0
    case tok::verbatim_line_text:
631
0
    case tok::html_ident:
632
0
    case tok::html_equals:
633
0
    case tok::html_quoted_string:
634
0
    case tok::html_greater:
635
0
    case tok::html_slash_greater:
636
0
      llvm_unreachable("should not see this token");
637
0
    }
638
0
    break;
639
0
  }
640
641
0
  return S.actOnParagraphComment(S.copyArray(llvm::ArrayRef(Content)));
642
0
}
643
644
0
VerbatimBlockComment *Parser::parseVerbatimBlock() {
645
0
  assert(Tok.is(tok::verbatim_block_begin));
646
647
0
  VerbatimBlockComment *VB =
648
0
      S.actOnVerbatimBlockStart(Tok.getLocation(),
649
0
                                Tok.getVerbatimBlockID());
650
0
  consumeToken();
651
652
  // Don't create an empty line if verbatim opening command is followed
653
  // by a newline.
654
0
  if (Tok.is(tok::newline))
655
0
    consumeToken();
656
657
0
  SmallVector<VerbatimBlockLineComment *, 8> Lines;
658
0
  while (Tok.is(tok::verbatim_block_line) ||
659
0
         Tok.is(tok::newline)) {
660
0
    VerbatimBlockLineComment *Line;
661
0
    if (Tok.is(tok::verbatim_block_line)) {
662
0
      Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
663
0
                                      Tok.getVerbatimBlockText());
664
0
      consumeToken();
665
0
      if (Tok.is(tok::newline)) {
666
0
        consumeToken();
667
0
      }
668
0
    } else {
669
      // Empty line, just a tok::newline.
670
0
      Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
671
0
      consumeToken();
672
0
    }
673
0
    Lines.push_back(Line);
674
0
  }
675
676
0
  if (Tok.is(tok::verbatim_block_end)) {
677
0
    const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
678
0
    S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), Info->Name,
679
0
                               S.copyArray(llvm::ArrayRef(Lines)));
680
0
    consumeToken();
681
0
  } else {
682
    // Unterminated \\verbatim block
683
0
    S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
684
0
                               S.copyArray(llvm::ArrayRef(Lines)));
685
0
  }
686
687
0
  return VB;
688
0
}
689
690
0
VerbatimLineComment *Parser::parseVerbatimLine() {
691
0
  assert(Tok.is(tok::verbatim_line_name));
692
693
0
  Token NameTok = Tok;
694
0
  consumeToken();
695
696
0
  SourceLocation TextBegin;
697
0
  StringRef Text;
698
  // Next token might not be a tok::verbatim_line_text if verbatim line
699
  // starting command comes just before a newline or comment end.
700
0
  if (Tok.is(tok::verbatim_line_text)) {
701
0
    TextBegin = Tok.getLocation();
702
0
    Text = Tok.getVerbatimLineText();
703
0
  } else {
704
0
    TextBegin = NameTok.getEndLocation();
705
0
    Text = "";
706
0
  }
707
708
0
  VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
709
0
                                                NameTok.getVerbatimLineID(),
710
0
                                                TextBegin,
711
0
                                                Text);
712
0
  consumeToken();
713
0
  return VL;
714
0
}
715
716
0
BlockContentComment *Parser::parseBlockContent() {
717
0
  switch (Tok.getKind()) {
718
0
  case tok::text:
719
0
  case tok::unknown_command:
720
0
  case tok::backslash_command:
721
0
  case tok::at_command:
722
0
  case tok::html_start_tag:
723
0
  case tok::html_end_tag:
724
0
    return parseParagraphOrBlockCommand();
725
726
0
  case tok::verbatim_block_begin:
727
0
    return parseVerbatimBlock();
728
729
0
  case tok::verbatim_line_name:
730
0
    return parseVerbatimLine();
731
732
0
  case tok::eof:
733
0
  case tok::newline:
734
0
  case tok::verbatim_block_line:
735
0
  case tok::verbatim_block_end:
736
0
  case tok::verbatim_line_text:
737
0
  case tok::html_ident:
738
0
  case tok::html_equals:
739
0
  case tok::html_quoted_string:
740
0
  case tok::html_greater:
741
0
  case tok::html_slash_greater:
742
0
    llvm_unreachable("should not see this token");
743
0
  }
744
0
  llvm_unreachable("bogus token kind");
745
0
}
746
747
0
FullComment *Parser::parseFullComment() {
748
  // Skip newlines at the beginning of the comment.
749
0
  while (Tok.is(tok::newline))
750
0
    consumeToken();
751
752
0
  SmallVector<BlockContentComment *, 8> Blocks;
753
0
  while (Tok.isNot(tok::eof)) {
754
0
    Blocks.push_back(parseBlockContent());
755
756
    // Skip extra newlines after paragraph end.
757
0
    while (Tok.is(tok::newline))
758
0
      consumeToken();
759
0
  }
760
0
  return S.actOnFullComment(S.copyArray(llvm::ArrayRef(Blocks)));
761
0
}
762
763
} // end namespace comments
764
} // end namespace clang