/src/llvm-project/clang/lib/AST/CommentParser.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- CommentParser.cpp - Doxygen comment parser -----------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #include "clang/AST/CommentParser.h" |
10 | | #include "clang/AST/CommentCommandTraits.h" |
11 | | #include "clang/AST/CommentDiagnostic.h" |
12 | | #include "clang/AST/CommentSema.h" |
13 | | #include "clang/Basic/CharInfo.h" |
14 | | #include "clang/Basic/SourceManager.h" |
15 | | #include "llvm/Support/ErrorHandling.h" |
16 | | |
17 | | namespace clang { |
18 | | |
19 | 0 | static inline bool isWhitespace(llvm::StringRef S) { |
20 | 0 | for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) { |
21 | 0 | if (!isWhitespace(*I)) |
22 | 0 | return false; |
23 | 0 | } |
24 | 0 | return true; |
25 | 0 | } |
26 | | |
27 | | namespace comments { |
28 | | |
29 | | /// Re-lexes a sequence of tok::text tokens. |
30 | | class TextTokenRetokenizer { |
31 | | llvm::BumpPtrAllocator &Allocator; |
32 | | Parser &P; |
33 | | |
34 | | /// This flag is set when there are no more tokens we can fetch from lexer. |
35 | | bool NoMoreInterestingTokens; |
36 | | |
37 | | /// Token buffer: tokens we have processed and lookahead. |
38 | | SmallVector<Token, 16> Toks; |
39 | | |
40 | | /// A position in \c Toks. |
41 | | struct Position { |
42 | | const char *BufferStart; |
43 | | const char *BufferEnd; |
44 | | const char *BufferPtr; |
45 | | SourceLocation BufferStartLoc; |
46 | | unsigned CurToken; |
47 | | }; |
48 | | |
49 | | /// Current position in Toks. |
50 | | Position Pos; |
51 | | |
52 | 0 | bool isEnd() const { |
53 | 0 | return Pos.CurToken >= Toks.size(); |
54 | 0 | } |
55 | | |
56 | | /// Sets up the buffer pointers to point to current token. |
57 | 0 | void setupBuffer() { |
58 | 0 | assert(!isEnd()); |
59 | 0 | const Token &Tok = Toks[Pos.CurToken]; |
60 | |
|
61 | 0 | Pos.BufferStart = Tok.getText().begin(); |
62 | 0 | Pos.BufferEnd = Tok.getText().end(); |
63 | 0 | Pos.BufferPtr = Pos.BufferStart; |
64 | 0 | Pos.BufferStartLoc = Tok.getLocation(); |
65 | 0 | } |
66 | | |
67 | 0 | SourceLocation getSourceLocation() const { |
68 | 0 | const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart; |
69 | 0 | return Pos.BufferStartLoc.getLocWithOffset(CharNo); |
70 | 0 | } |
71 | | |
72 | 0 | char peek() const { |
73 | 0 | assert(!isEnd()); |
74 | 0 | assert(Pos.BufferPtr != Pos.BufferEnd); |
75 | 0 | return *Pos.BufferPtr; |
76 | 0 | } |
77 | | |
78 | 0 | void consumeChar() { |
79 | 0 | assert(!isEnd()); |
80 | 0 | assert(Pos.BufferPtr != Pos.BufferEnd); |
81 | 0 | Pos.BufferPtr++; |
82 | 0 | if (Pos.BufferPtr == Pos.BufferEnd) { |
83 | 0 | Pos.CurToken++; |
84 | 0 | if (isEnd() && !addToken()) |
85 | 0 | return; |
86 | | |
87 | 0 | assert(!isEnd()); |
88 | 0 | setupBuffer(); |
89 | 0 | } |
90 | 0 | } |
91 | | |
92 | | /// Add a token. |
93 | | /// Returns true on success, false if there are no interesting tokens to |
94 | | /// fetch from lexer. |
95 | 0 | bool addToken() { |
96 | 0 | if (NoMoreInterestingTokens) |
97 | 0 | return false; |
98 | | |
99 | 0 | if (P.Tok.is(tok::newline)) { |
100 | | // If we see a single newline token between text tokens, skip it. |
101 | 0 | Token Newline = P.Tok; |
102 | 0 | P.consumeToken(); |
103 | 0 | if (P.Tok.isNot(tok::text)) { |
104 | 0 | P.putBack(Newline); |
105 | 0 | NoMoreInterestingTokens = true; |
106 | 0 | return false; |
107 | 0 | } |
108 | 0 | } |
109 | 0 | if (P.Tok.isNot(tok::text)) { |
110 | 0 | NoMoreInterestingTokens = true; |
111 | 0 | return false; |
112 | 0 | } |
113 | | |
114 | 0 | Toks.push_back(P.Tok); |
115 | 0 | P.consumeToken(); |
116 | 0 | if (Toks.size() == 1) |
117 | 0 | setupBuffer(); |
118 | 0 | return true; |
119 | 0 | } |
120 | | |
121 | 0 | void consumeWhitespace() { |
122 | 0 | while (!isEnd()) { |
123 | 0 | if (isWhitespace(peek())) |
124 | 0 | consumeChar(); |
125 | 0 | else |
126 | 0 | break; |
127 | 0 | } |
128 | 0 | } |
129 | | |
130 | | void formTokenWithChars(Token &Result, |
131 | | SourceLocation Loc, |
132 | | const char *TokBegin, |
133 | | unsigned TokLength, |
134 | 0 | StringRef Text) { |
135 | 0 | Result.setLocation(Loc); |
136 | 0 | Result.setKind(tok::text); |
137 | 0 | Result.setLength(TokLength); |
138 | 0 | #ifndef NDEBUG |
139 | 0 | Result.TextPtr = "<UNSET>"; |
140 | 0 | Result.IntVal = 7; |
141 | 0 | #endif |
142 | 0 | Result.setText(Text); |
143 | 0 | } |
144 | | |
145 | | public: |
146 | | TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P): |
147 | 0 | Allocator(Allocator), P(P), NoMoreInterestingTokens(false) { |
148 | 0 | Pos.CurToken = 0; |
149 | 0 | addToken(); |
150 | 0 | } |
151 | | |
152 | | /// Extract a word -- sequence of non-whitespace characters. |
153 | 0 | bool lexWord(Token &Tok) { |
154 | 0 | if (isEnd()) |
155 | 0 | return false; |
156 | | |
157 | 0 | Position SavedPos = Pos; |
158 | |
|
159 | 0 | consumeWhitespace(); |
160 | 0 | SmallString<32> WordText; |
161 | 0 | const char *WordBegin = Pos.BufferPtr; |
162 | 0 | SourceLocation Loc = getSourceLocation(); |
163 | 0 | while (!isEnd()) { |
164 | 0 | const char C = peek(); |
165 | 0 | if (!isWhitespace(C)) { |
166 | 0 | WordText.push_back(C); |
167 | 0 | consumeChar(); |
168 | 0 | } else |
169 | 0 | break; |
170 | 0 | } |
171 | 0 | const unsigned Length = WordText.size(); |
172 | 0 | if (Length == 0) { |
173 | 0 | Pos = SavedPos; |
174 | 0 | return false; |
175 | 0 | } |
176 | | |
177 | 0 | char *TextPtr = Allocator.Allocate<char>(Length + 1); |
178 | |
|
179 | 0 | memcpy(TextPtr, WordText.c_str(), Length + 1); |
180 | 0 | StringRef Text = StringRef(TextPtr, Length); |
181 | |
|
182 | 0 | formTokenWithChars(Tok, Loc, WordBegin, Length, Text); |
183 | 0 | return true; |
184 | 0 | } |
185 | | |
186 | 0 | bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) { |
187 | 0 | if (isEnd()) |
188 | 0 | return false; |
189 | | |
190 | 0 | Position SavedPos = Pos; |
191 | |
|
192 | 0 | consumeWhitespace(); |
193 | 0 | SmallString<32> WordText; |
194 | 0 | const char *WordBegin = Pos.BufferPtr; |
195 | 0 | SourceLocation Loc = getSourceLocation(); |
196 | 0 | bool Error = false; |
197 | 0 | if (!isEnd()) { |
198 | 0 | const char C = peek(); |
199 | 0 | if (C == OpenDelim) { |
200 | 0 | WordText.push_back(C); |
201 | 0 | consumeChar(); |
202 | 0 | } else |
203 | 0 | Error = true; |
204 | 0 | } |
205 | 0 | char C = '\0'; |
206 | 0 | while (!Error && !isEnd()) { |
207 | 0 | C = peek(); |
208 | 0 | WordText.push_back(C); |
209 | 0 | consumeChar(); |
210 | 0 | if (C == CloseDelim) |
211 | 0 | break; |
212 | 0 | } |
213 | 0 | if (!Error && C != CloseDelim) |
214 | 0 | Error = true; |
215 | |
|
216 | 0 | if (Error) { |
217 | 0 | Pos = SavedPos; |
218 | 0 | return false; |
219 | 0 | } |
220 | | |
221 | 0 | const unsigned Length = WordText.size(); |
222 | 0 | char *TextPtr = Allocator.Allocate<char>(Length + 1); |
223 | |
|
224 | 0 | memcpy(TextPtr, WordText.c_str(), Length + 1); |
225 | 0 | StringRef Text = StringRef(TextPtr, Length); |
226 | |
|
227 | 0 | formTokenWithChars(Tok, Loc, WordBegin, |
228 | 0 | Pos.BufferPtr - WordBegin, Text); |
229 | 0 | return true; |
230 | 0 | } |
231 | | |
232 | | /// Put back tokens that we didn't consume. |
233 | 0 | void putBackLeftoverTokens() { |
234 | 0 | if (isEnd()) |
235 | 0 | return; |
236 | | |
237 | 0 | bool HavePartialTok = false; |
238 | 0 | Token PartialTok; |
239 | 0 | if (Pos.BufferPtr != Pos.BufferStart) { |
240 | 0 | formTokenWithChars(PartialTok, getSourceLocation(), |
241 | 0 | Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr, |
242 | 0 | StringRef(Pos.BufferPtr, |
243 | 0 | Pos.BufferEnd - Pos.BufferPtr)); |
244 | 0 | HavePartialTok = true; |
245 | 0 | Pos.CurToken++; |
246 | 0 | } |
247 | |
|
248 | 0 | P.putBack(llvm::ArrayRef(Toks.begin() + Pos.CurToken, Toks.end())); |
249 | 0 | Pos.CurToken = Toks.size(); |
250 | |
|
251 | 0 | if (HavePartialTok) |
252 | 0 | P.putBack(PartialTok); |
253 | 0 | } |
254 | | }; |
255 | | |
256 | | Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator, |
257 | | const SourceManager &SourceMgr, DiagnosticsEngine &Diags, |
258 | | const CommandTraits &Traits): |
259 | | L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags), |
260 | 0 | Traits(Traits) { |
261 | 0 | consumeToken(); |
262 | 0 | } |
263 | | |
264 | | void Parser::parseParamCommandArgs(ParamCommandComment *PC, |
265 | 0 | TextTokenRetokenizer &Retokenizer) { |
266 | 0 | Token Arg; |
267 | | // Check if argument looks like direction specification: [dir] |
268 | | // e.g., [in], [out], [in,out] |
269 | 0 | if (Retokenizer.lexDelimitedSeq(Arg, '[', ']')) |
270 | 0 | S.actOnParamCommandDirectionArg(PC, |
271 | 0 | Arg.getLocation(), |
272 | 0 | Arg.getEndLocation(), |
273 | 0 | Arg.getText()); |
274 | |
|
275 | 0 | if (Retokenizer.lexWord(Arg)) |
276 | 0 | S.actOnParamCommandParamNameArg(PC, |
277 | 0 | Arg.getLocation(), |
278 | 0 | Arg.getEndLocation(), |
279 | 0 | Arg.getText()); |
280 | 0 | } |
281 | | |
282 | | void Parser::parseTParamCommandArgs(TParamCommandComment *TPC, |
283 | 0 | TextTokenRetokenizer &Retokenizer) { |
284 | 0 | Token Arg; |
285 | 0 | if (Retokenizer.lexWord(Arg)) |
286 | 0 | S.actOnTParamCommandParamNameArg(TPC, |
287 | 0 | Arg.getLocation(), |
288 | 0 | Arg.getEndLocation(), |
289 | 0 | Arg.getText()); |
290 | 0 | } |
291 | | |
292 | | ArrayRef<Comment::Argument> |
293 | 0 | Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) { |
294 | 0 | auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs)) |
295 | 0 | Comment::Argument[NumArgs]; |
296 | 0 | unsigned ParsedArgs = 0; |
297 | 0 | Token Arg; |
298 | 0 | while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) { |
299 | 0 | Args[ParsedArgs] = Comment::Argument{ |
300 | 0 | SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()}; |
301 | 0 | ParsedArgs++; |
302 | 0 | } |
303 | |
|
304 | 0 | return llvm::ArrayRef(Args, ParsedArgs); |
305 | 0 | } |
306 | | |
307 | 0 | BlockCommandComment *Parser::parseBlockCommand() { |
308 | 0 | assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); |
309 | | |
310 | 0 | ParamCommandComment *PC = nullptr; |
311 | 0 | TParamCommandComment *TPC = nullptr; |
312 | 0 | BlockCommandComment *BC = nullptr; |
313 | 0 | const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); |
314 | 0 | CommandMarkerKind CommandMarker = |
315 | 0 | Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At; |
316 | 0 | if (Info->IsParamCommand) { |
317 | 0 | PC = S.actOnParamCommandStart(Tok.getLocation(), |
318 | 0 | Tok.getEndLocation(), |
319 | 0 | Tok.getCommandID(), |
320 | 0 | CommandMarker); |
321 | 0 | } else if (Info->IsTParamCommand) { |
322 | 0 | TPC = S.actOnTParamCommandStart(Tok.getLocation(), |
323 | 0 | Tok.getEndLocation(), |
324 | 0 | Tok.getCommandID(), |
325 | 0 | CommandMarker); |
326 | 0 | } else { |
327 | 0 | BC = S.actOnBlockCommandStart(Tok.getLocation(), |
328 | 0 | Tok.getEndLocation(), |
329 | 0 | Tok.getCommandID(), |
330 | 0 | CommandMarker); |
331 | 0 | } |
332 | 0 | consumeToken(); |
333 | |
|
334 | 0 | if (isTokBlockCommand()) { |
335 | | // Block command ahead. We can't nest block commands, so pretend that this |
336 | | // command has an empty argument. |
337 | 0 | ParagraphComment *Paragraph = S.actOnParagraphComment(std::nullopt); |
338 | 0 | if (PC) { |
339 | 0 | S.actOnParamCommandFinish(PC, Paragraph); |
340 | 0 | return PC; |
341 | 0 | } else if (TPC) { |
342 | 0 | S.actOnTParamCommandFinish(TPC, Paragraph); |
343 | 0 | return TPC; |
344 | 0 | } else { |
345 | 0 | S.actOnBlockCommandFinish(BC, Paragraph); |
346 | 0 | return BC; |
347 | 0 | } |
348 | 0 | } |
349 | | |
350 | 0 | if (PC || TPC || Info->NumArgs > 0) { |
351 | | // In order to parse command arguments we need to retokenize a few |
352 | | // following text tokens. |
353 | 0 | TextTokenRetokenizer Retokenizer(Allocator, *this); |
354 | |
|
355 | 0 | if (PC) |
356 | 0 | parseParamCommandArgs(PC, Retokenizer); |
357 | 0 | else if (TPC) |
358 | 0 | parseTParamCommandArgs(TPC, Retokenizer); |
359 | 0 | else |
360 | 0 | S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs)); |
361 | |
|
362 | 0 | Retokenizer.putBackLeftoverTokens(); |
363 | 0 | } |
364 | | |
365 | | // If there's a block command ahead, we will attach an empty paragraph to |
366 | | // this command. |
367 | 0 | bool EmptyParagraph = false; |
368 | 0 | if (isTokBlockCommand()) |
369 | 0 | EmptyParagraph = true; |
370 | 0 | else if (Tok.is(tok::newline)) { |
371 | 0 | Token PrevTok = Tok; |
372 | 0 | consumeToken(); |
373 | 0 | EmptyParagraph = isTokBlockCommand(); |
374 | 0 | putBack(PrevTok); |
375 | 0 | } |
376 | |
|
377 | 0 | ParagraphComment *Paragraph; |
378 | 0 | if (EmptyParagraph) |
379 | 0 | Paragraph = S.actOnParagraphComment(std::nullopt); |
380 | 0 | else { |
381 | 0 | BlockContentComment *Block = parseParagraphOrBlockCommand(); |
382 | | // Since we have checked for a block command, we should have parsed a |
383 | | // paragraph. |
384 | 0 | Paragraph = cast<ParagraphComment>(Block); |
385 | 0 | } |
386 | |
|
387 | 0 | if (PC) { |
388 | 0 | S.actOnParamCommandFinish(PC, Paragraph); |
389 | 0 | return PC; |
390 | 0 | } else if (TPC) { |
391 | 0 | S.actOnTParamCommandFinish(TPC, Paragraph); |
392 | 0 | return TPC; |
393 | 0 | } else { |
394 | 0 | S.actOnBlockCommandFinish(BC, Paragraph); |
395 | 0 | return BC; |
396 | 0 | } |
397 | 0 | } |
398 | | |
399 | 0 | InlineCommandComment *Parser::parseInlineCommand() { |
400 | 0 | assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); |
401 | 0 | const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); |
402 | |
|
403 | 0 | const Token CommandTok = Tok; |
404 | 0 | consumeToken(); |
405 | |
|
406 | 0 | TextTokenRetokenizer Retokenizer(Allocator, *this); |
407 | 0 | ArrayRef<Comment::Argument> Args = |
408 | 0 | parseCommandArgs(Retokenizer, Info->NumArgs); |
409 | |
|
410 | 0 | InlineCommandComment *IC = S.actOnInlineCommand( |
411 | 0 | CommandTok.getLocation(), CommandTok.getEndLocation(), |
412 | 0 | CommandTok.getCommandID(), Args); |
413 | |
|
414 | 0 | if (Args.size() < Info->NumArgs) { |
415 | 0 | Diag(CommandTok.getEndLocation().getLocWithOffset(1), |
416 | 0 | diag::warn_doc_inline_command_not_enough_arguments) |
417 | 0 | << CommandTok.is(tok::at_command) << Info->Name << Args.size() |
418 | 0 | << Info->NumArgs |
419 | 0 | << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation()); |
420 | 0 | } |
421 | |
|
422 | 0 | Retokenizer.putBackLeftoverTokens(); |
423 | |
|
424 | 0 | return IC; |
425 | 0 | } |
426 | | |
427 | 0 | HTMLStartTagComment *Parser::parseHTMLStartTag() { |
428 | 0 | assert(Tok.is(tok::html_start_tag)); |
429 | 0 | HTMLStartTagComment *HST = |
430 | 0 | S.actOnHTMLStartTagStart(Tok.getLocation(), |
431 | 0 | Tok.getHTMLTagStartName()); |
432 | 0 | consumeToken(); |
433 | |
|
434 | 0 | SmallVector<HTMLStartTagComment::Attribute, 2> Attrs; |
435 | 0 | while (true) { |
436 | 0 | switch (Tok.getKind()) { |
437 | 0 | case tok::html_ident: { |
438 | 0 | Token Ident = Tok; |
439 | 0 | consumeToken(); |
440 | 0 | if (Tok.isNot(tok::html_equals)) { |
441 | 0 | Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), |
442 | 0 | Ident.getHTMLIdent())); |
443 | 0 | continue; |
444 | 0 | } |
445 | 0 | Token Equals = Tok; |
446 | 0 | consumeToken(); |
447 | 0 | if (Tok.isNot(tok::html_quoted_string)) { |
448 | 0 | Diag(Tok.getLocation(), |
449 | 0 | diag::warn_doc_html_start_tag_expected_quoted_string) |
450 | 0 | << SourceRange(Equals.getLocation()); |
451 | 0 | Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), |
452 | 0 | Ident.getHTMLIdent())); |
453 | 0 | while (Tok.is(tok::html_equals) || |
454 | 0 | Tok.is(tok::html_quoted_string)) |
455 | 0 | consumeToken(); |
456 | 0 | continue; |
457 | 0 | } |
458 | 0 | Attrs.push_back(HTMLStartTagComment::Attribute( |
459 | 0 | Ident.getLocation(), |
460 | 0 | Ident.getHTMLIdent(), |
461 | 0 | Equals.getLocation(), |
462 | 0 | SourceRange(Tok.getLocation(), |
463 | 0 | Tok.getEndLocation()), |
464 | 0 | Tok.getHTMLQuotedString())); |
465 | 0 | consumeToken(); |
466 | 0 | continue; |
467 | 0 | } |
468 | | |
469 | 0 | case tok::html_greater: |
470 | 0 | S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), |
471 | 0 | Tok.getLocation(), |
472 | 0 | /* IsSelfClosing = */ false); |
473 | 0 | consumeToken(); |
474 | 0 | return HST; |
475 | | |
476 | 0 | case tok::html_slash_greater: |
477 | 0 | S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), |
478 | 0 | Tok.getLocation(), |
479 | 0 | /* IsSelfClosing = */ true); |
480 | 0 | consumeToken(); |
481 | 0 | return HST; |
482 | | |
483 | 0 | case tok::html_equals: |
484 | 0 | case tok::html_quoted_string: |
485 | 0 | Diag(Tok.getLocation(), |
486 | 0 | diag::warn_doc_html_start_tag_expected_ident_or_greater); |
487 | 0 | while (Tok.is(tok::html_equals) || |
488 | 0 | Tok.is(tok::html_quoted_string)) |
489 | 0 | consumeToken(); |
490 | 0 | if (Tok.is(tok::html_ident) || |
491 | 0 | Tok.is(tok::html_greater) || |
492 | 0 | Tok.is(tok::html_slash_greater)) |
493 | 0 | continue; |
494 | | |
495 | 0 | S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), |
496 | 0 | SourceLocation(), |
497 | 0 | /* IsSelfClosing = */ false); |
498 | 0 | return HST; |
499 | | |
500 | 0 | default: |
501 | | // Not a token from an HTML start tag. Thus HTML tag prematurely ended. |
502 | 0 | S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), |
503 | 0 | SourceLocation(), |
504 | 0 | /* IsSelfClosing = */ false); |
505 | 0 | bool StartLineInvalid; |
506 | 0 | const unsigned StartLine = SourceMgr.getPresumedLineNumber( |
507 | 0 | HST->getLocation(), |
508 | 0 | &StartLineInvalid); |
509 | 0 | bool EndLineInvalid; |
510 | 0 | const unsigned EndLine = SourceMgr.getPresumedLineNumber( |
511 | 0 | Tok.getLocation(), |
512 | 0 | &EndLineInvalid); |
513 | 0 | if (StartLineInvalid || EndLineInvalid || StartLine == EndLine) |
514 | 0 | Diag(Tok.getLocation(), |
515 | 0 | diag::warn_doc_html_start_tag_expected_ident_or_greater) |
516 | 0 | << HST->getSourceRange(); |
517 | 0 | else { |
518 | 0 | Diag(Tok.getLocation(), |
519 | 0 | diag::warn_doc_html_start_tag_expected_ident_or_greater); |
520 | 0 | Diag(HST->getLocation(), diag::note_doc_html_tag_started_here) |
521 | 0 | << HST->getSourceRange(); |
522 | 0 | } |
523 | 0 | return HST; |
524 | 0 | } |
525 | 0 | } |
526 | 0 | } |
527 | | |
528 | 0 | HTMLEndTagComment *Parser::parseHTMLEndTag() { |
529 | 0 | assert(Tok.is(tok::html_end_tag)); |
530 | 0 | Token TokEndTag = Tok; |
531 | 0 | consumeToken(); |
532 | 0 | SourceLocation Loc; |
533 | 0 | if (Tok.is(tok::html_greater)) { |
534 | 0 | Loc = Tok.getLocation(); |
535 | 0 | consumeToken(); |
536 | 0 | } |
537 | |
|
538 | 0 | return S.actOnHTMLEndTag(TokEndTag.getLocation(), |
539 | 0 | Loc, |
540 | 0 | TokEndTag.getHTMLTagEndName()); |
541 | 0 | } |
542 | | |
543 | 0 | BlockContentComment *Parser::parseParagraphOrBlockCommand() { |
544 | 0 | SmallVector<InlineContentComment *, 8> Content; |
545 | |
|
546 | 0 | while (true) { |
547 | 0 | switch (Tok.getKind()) { |
548 | 0 | case tok::verbatim_block_begin: |
549 | 0 | case tok::verbatim_line_name: |
550 | 0 | case tok::eof: |
551 | 0 | break; // Block content or EOF ahead, finish this parapgaph. |
552 | | |
553 | 0 | case tok::unknown_command: |
554 | 0 | Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), |
555 | 0 | Tok.getEndLocation(), |
556 | 0 | Tok.getUnknownCommandName())); |
557 | 0 | consumeToken(); |
558 | 0 | continue; |
559 | | |
560 | 0 | case tok::backslash_command: |
561 | 0 | case tok::at_command: { |
562 | 0 | const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); |
563 | 0 | if (Info->IsBlockCommand) { |
564 | 0 | if (Content.size() == 0) |
565 | 0 | return parseBlockCommand(); |
566 | 0 | break; // Block command ahead, finish this parapgaph. |
567 | 0 | } |
568 | 0 | if (Info->IsVerbatimBlockEndCommand) { |
569 | 0 | Diag(Tok.getLocation(), |
570 | 0 | diag::warn_verbatim_block_end_without_start) |
571 | 0 | << Tok.is(tok::at_command) |
572 | 0 | << Info->Name |
573 | 0 | << SourceRange(Tok.getLocation(), Tok.getEndLocation()); |
574 | 0 | consumeToken(); |
575 | 0 | continue; |
576 | 0 | } |
577 | 0 | if (Info->IsUnknownCommand) { |
578 | 0 | Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), |
579 | 0 | Tok.getEndLocation(), |
580 | 0 | Info->getID())); |
581 | 0 | consumeToken(); |
582 | 0 | continue; |
583 | 0 | } |
584 | 0 | assert(Info->IsInlineCommand); |
585 | 0 | Content.push_back(parseInlineCommand()); |
586 | 0 | continue; |
587 | 0 | } |
588 | | |
589 | 0 | case tok::newline: { |
590 | 0 | consumeToken(); |
591 | 0 | if (Tok.is(tok::newline) || Tok.is(tok::eof)) { |
592 | 0 | consumeToken(); |
593 | 0 | break; // Two newlines -- end of paragraph. |
594 | 0 | } |
595 | | // Also allow [tok::newline, tok::text, tok::newline] if the middle |
596 | | // tok::text is just whitespace. |
597 | 0 | if (Tok.is(tok::text) && isWhitespace(Tok.getText())) { |
598 | 0 | Token WhitespaceTok = Tok; |
599 | 0 | consumeToken(); |
600 | 0 | if (Tok.is(tok::newline) || Tok.is(tok::eof)) { |
601 | 0 | consumeToken(); |
602 | 0 | break; |
603 | 0 | } |
604 | | // We have [tok::newline, tok::text, non-newline]. Put back tok::text. |
605 | 0 | putBack(WhitespaceTok); |
606 | 0 | } |
607 | 0 | if (Content.size() > 0) |
608 | 0 | Content.back()->addTrailingNewline(); |
609 | 0 | continue; |
610 | 0 | } |
611 | | |
612 | | // Don't deal with HTML tag soup now. |
613 | 0 | case tok::html_start_tag: |
614 | 0 | Content.push_back(parseHTMLStartTag()); |
615 | 0 | continue; |
616 | | |
617 | 0 | case tok::html_end_tag: |
618 | 0 | Content.push_back(parseHTMLEndTag()); |
619 | 0 | continue; |
620 | | |
621 | 0 | case tok::text: |
622 | 0 | Content.push_back(S.actOnText(Tok.getLocation(), |
623 | 0 | Tok.getEndLocation(), |
624 | 0 | Tok.getText())); |
625 | 0 | consumeToken(); |
626 | 0 | continue; |
627 | | |
628 | 0 | case tok::verbatim_block_line: |
629 | 0 | case tok::verbatim_block_end: |
630 | 0 | case tok::verbatim_line_text: |
631 | 0 | case tok::html_ident: |
632 | 0 | case tok::html_equals: |
633 | 0 | case tok::html_quoted_string: |
634 | 0 | case tok::html_greater: |
635 | 0 | case tok::html_slash_greater: |
636 | 0 | llvm_unreachable("should not see this token"); |
637 | 0 | } |
638 | 0 | break; |
639 | 0 | } |
640 | | |
641 | 0 | return S.actOnParagraphComment(S.copyArray(llvm::ArrayRef(Content))); |
642 | 0 | } |
643 | | |
644 | 0 | VerbatimBlockComment *Parser::parseVerbatimBlock() { |
645 | 0 | assert(Tok.is(tok::verbatim_block_begin)); |
646 | | |
647 | 0 | VerbatimBlockComment *VB = |
648 | 0 | S.actOnVerbatimBlockStart(Tok.getLocation(), |
649 | 0 | Tok.getVerbatimBlockID()); |
650 | 0 | consumeToken(); |
651 | | |
652 | | // Don't create an empty line if verbatim opening command is followed |
653 | | // by a newline. |
654 | 0 | if (Tok.is(tok::newline)) |
655 | 0 | consumeToken(); |
656 | |
|
657 | 0 | SmallVector<VerbatimBlockLineComment *, 8> Lines; |
658 | 0 | while (Tok.is(tok::verbatim_block_line) || |
659 | 0 | Tok.is(tok::newline)) { |
660 | 0 | VerbatimBlockLineComment *Line; |
661 | 0 | if (Tok.is(tok::verbatim_block_line)) { |
662 | 0 | Line = S.actOnVerbatimBlockLine(Tok.getLocation(), |
663 | 0 | Tok.getVerbatimBlockText()); |
664 | 0 | consumeToken(); |
665 | 0 | if (Tok.is(tok::newline)) { |
666 | 0 | consumeToken(); |
667 | 0 | } |
668 | 0 | } else { |
669 | | // Empty line, just a tok::newline. |
670 | 0 | Line = S.actOnVerbatimBlockLine(Tok.getLocation(), ""); |
671 | 0 | consumeToken(); |
672 | 0 | } |
673 | 0 | Lines.push_back(Line); |
674 | 0 | } |
675 | |
|
676 | 0 | if (Tok.is(tok::verbatim_block_end)) { |
677 | 0 | const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID()); |
678 | 0 | S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), Info->Name, |
679 | 0 | S.copyArray(llvm::ArrayRef(Lines))); |
680 | 0 | consumeToken(); |
681 | 0 | } else { |
682 | | // Unterminated \\verbatim block |
683 | 0 | S.actOnVerbatimBlockFinish(VB, SourceLocation(), "", |
684 | 0 | S.copyArray(llvm::ArrayRef(Lines))); |
685 | 0 | } |
686 | |
|
687 | 0 | return VB; |
688 | 0 | } |
689 | | |
690 | 0 | VerbatimLineComment *Parser::parseVerbatimLine() { |
691 | 0 | assert(Tok.is(tok::verbatim_line_name)); |
692 | | |
693 | 0 | Token NameTok = Tok; |
694 | 0 | consumeToken(); |
695 | |
|
696 | 0 | SourceLocation TextBegin; |
697 | 0 | StringRef Text; |
698 | | // Next token might not be a tok::verbatim_line_text if verbatim line |
699 | | // starting command comes just before a newline or comment end. |
700 | 0 | if (Tok.is(tok::verbatim_line_text)) { |
701 | 0 | TextBegin = Tok.getLocation(); |
702 | 0 | Text = Tok.getVerbatimLineText(); |
703 | 0 | } else { |
704 | 0 | TextBegin = NameTok.getEndLocation(); |
705 | 0 | Text = ""; |
706 | 0 | } |
707 | |
|
708 | 0 | VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(), |
709 | 0 | NameTok.getVerbatimLineID(), |
710 | 0 | TextBegin, |
711 | 0 | Text); |
712 | 0 | consumeToken(); |
713 | 0 | return VL; |
714 | 0 | } |
715 | | |
716 | 0 | BlockContentComment *Parser::parseBlockContent() { |
717 | 0 | switch (Tok.getKind()) { |
718 | 0 | case tok::text: |
719 | 0 | case tok::unknown_command: |
720 | 0 | case tok::backslash_command: |
721 | 0 | case tok::at_command: |
722 | 0 | case tok::html_start_tag: |
723 | 0 | case tok::html_end_tag: |
724 | 0 | return parseParagraphOrBlockCommand(); |
725 | | |
726 | 0 | case tok::verbatim_block_begin: |
727 | 0 | return parseVerbatimBlock(); |
728 | | |
729 | 0 | case tok::verbatim_line_name: |
730 | 0 | return parseVerbatimLine(); |
731 | | |
732 | 0 | case tok::eof: |
733 | 0 | case tok::newline: |
734 | 0 | case tok::verbatim_block_line: |
735 | 0 | case tok::verbatim_block_end: |
736 | 0 | case tok::verbatim_line_text: |
737 | 0 | case tok::html_ident: |
738 | 0 | case tok::html_equals: |
739 | 0 | case tok::html_quoted_string: |
740 | 0 | case tok::html_greater: |
741 | 0 | case tok::html_slash_greater: |
742 | 0 | llvm_unreachable("should not see this token"); |
743 | 0 | } |
744 | 0 | llvm_unreachable("bogus token kind"); |
745 | 0 | } |
746 | | |
747 | 0 | FullComment *Parser::parseFullComment() { |
748 | | // Skip newlines at the beginning of the comment. |
749 | 0 | while (Tok.is(tok::newline)) |
750 | 0 | consumeToken(); |
751 | |
|
752 | 0 | SmallVector<BlockContentComment *, 8> Blocks; |
753 | 0 | while (Tok.isNot(tok::eof)) { |
754 | 0 | Blocks.push_back(parseBlockContent()); |
755 | | |
756 | | // Skip extra newlines after paragraph end. |
757 | 0 | while (Tok.is(tok::newline)) |
758 | 0 | consumeToken(); |
759 | 0 | } |
760 | 0 | return S.actOnFullComment(S.copyArray(llvm::ArrayRef(Blocks))); |
761 | 0 | } |
762 | | |
763 | | } // end namespace comments |
764 | | } // end namespace clang |