Coverage Report

Created: 2025-07-23 06:45

/proc/self/cwd/cpp/htmlparser/parser.cc
Line
Count
Source (jump to first uncovered line)
1
#include <algorithm>
2
#include <set>
3
#include <tuple>
4
#ifdef DUMP_NODES
5
#include <iostream>  // For DumpDocument
6
#endif               // DUMP_NODES
7
8
#include "absl/flags/flag.h"
9
#include "absl/status/status.h"
10
#include "cpp/htmlparser/atomutil.h"
11
#include "cpp/htmlparser/comparators.h"
12
#include "cpp/htmlparser/defer.h"
13
#include "cpp/htmlparser/doctype.h"
14
#include "cpp/htmlparser/foreign.h"
15
#include "cpp/htmlparser/logging.h"
16
#include "cpp/htmlparser/parser.h"
17
#include "cpp/htmlparser/strings.h"
18
19
ABSL_RETIRED_FLAG(uint32_t, htmlparser_max_nodes_depth_count, 245, "retired");
20
21
namespace htmlparser {
22
23
namespace {
24
// Internal functions forward declarations.
25
std::string ExtractWhitespace(const std::string& s);
26
27
#ifdef DUMP_NODES
28
void DumpNode(Node* root_node) {
29
  for (Node* c = root_node->FirstChild(); c; c = c->NextSibling()) {
30
    std::cerr << c->NameSpace() << ": " << AtomUtil::ToString(c->DataAtom())
31
              << std::endl;
32
    DumpNode(c);
33
  }
34
}
35
// Dumps the nodes in the DOM in their final order after parsing.
36
void DumpDocument(Document* doc) { DumpNode(doc->RootNode()); }
37
38
#endif  // DUMP_NODES
39
40
}  // namespace.
41
42
0
std::unique_ptr<Document> Parse(std::string_view html) {
43
0
  std::unique_ptr<Parser> parser = std::make_unique<Parser>(
44
0
      html,
45
0
      ParseOptions{.scripting = true,
46
0
                   .frameset_ok = true,
47
0
                   .record_node_offsets = true,
48
0
                   .record_attribute_offsets = true,
49
0
                   .count_num_terms_in_text_node = true});
50
0
  return parser->Parse();
51
0
}
52
53
std::unique_ptr<Document> ParseWithOptions(std::string_view html,
54
0
                                           const ParseOptions& options) {
55
0
  return std::make_unique<Parser>(html, options)->Parse();
56
0
}
57
58
std::unique_ptr<Document> ParseFragmentWithOptions(std::string_view html,
59
                                                   const ParseOptions& options,
60
0
                                                   Node* fragment_parent) {
61
0
  std::unique_ptr<Parser> parser = std::make_unique<Parser>(
62
0
      html, options, fragment_parent);
63
0
  Node* root = parser->document_->NewNode(NodeType::ELEMENT_NODE, Atom::HTML);
64
0
  parser->document_->root_node_->AppendChild(root);
65
0
  parser->open_elements_stack_.Push(root);
66
67
0
  if (fragment_parent && fragment_parent->DataAtom() == Atom::TEMPLATE) {
68
0
    parser->template_stack_.push_back(std::bind(&Parser::InTemplateIM,
69
0
                                                parser.get()));
70
0
  }
71
72
0
  parser->ResetInsertionMode();
73
74
0
  for (Node* node = fragment_parent; node; node = node->Parent()) {
75
0
    if (node->Type() == NodeType::ELEMENT_NODE &&
76
0
        node->DataAtom() == Atom::FORM) {
77
0
      parser->form_ = node;
78
0
      break;
79
0
    }
80
0
  }
81
82
0
  auto doc = parser->Parse();
83
84
0
  if (doc->status().ok()) {
85
0
    Node* parent = fragment_parent ? root : doc->root_node_;
86
0
    for (Node* c = parent->FirstChild(); c;) {
87
0
      Node* next = c->NextSibling();
88
0
      doc->fragment_nodes_.push_back(std::move(c));
89
0
      parent->RemoveChild(c);
90
0
      c = next;
91
0
    }
92
0
  }
93
94
0
  return doc;
95
0
}
96
97
std::unique_ptr<Document> ParseFragment(std::string_view html,
98
0
                                        Node* fragment_parent) {
99
  // Expects clients to update the offsets relative to the parent which
100
  // this fragment belongs.
101
0
  ParseOptions options = {.scripting = true,
102
0
                          .frameset_ok = true,
103
0
                          .record_node_offsets = true,
104
0
                          .record_attribute_offsets = true,
105
0
                          .count_num_terms_in_text_node = true};
106
0
  return ParseFragmentWithOptions(html, options, fragment_parent);
107
0
}
108
109
Parser::Parser(std::string_view html, const ParseOptions& options,
110
               Node* fragment_parent)
111
    : tokenizer_(std::make_unique<Tokenizer>(
112
          html,
113
          fragment_parent ? AtomUtil::ToString(fragment_parent->atom_) : "")),
114
      on_node_callback_(options.on_node_callback),
115
      document_(new Document),
116
      scope_marker_(document_->NewNode(NodeType::SCOPE_MARKER_NODE)),
117
      scripting_(options.scripting),
118
      frameset_ok_(options.frameset_ok),
119
      record_node_offsets_(options.record_node_offsets),
120
      record_attribute_offsets_(options.record_attribute_offsets),
121
      count_num_terms_in_text_node_(options.count_num_terms_in_text_node),
122
      fragment_(fragment_parent != nullptr),
123
11.5k
      context_node_(fragment_parent) {
124
11.5k
  document_->metadata_.html_src_bytes = html.size();
125
11.5k
  insertion_mode_ = std::bind(&Parser::InitialIM, this);
126
11.5k
}
127
128
11.5k
std::unique_ptr<Document> Parser::Parse() {
129
11.5k
  bool eof = tokenizer_->IsEOF();
130
15.3M
  while (!eof) {
131
15.3M
    Node* node = open_elements_stack_.Top();
132
15.3M
    tokenizer_->SetAllowCDATA(node && !node->name_space_.empty());
133
    // Read and parse the next token.
134
15.3M
    TokenType token_type = tokenizer_->Next(!template_stack_.empty());
135
136
    // No end of input, but error token. Parsing failed.
137
15.3M
    if (token_type == TokenType::ERROR_TOKEN) {
138
11.5k
      eof = tokenizer_->IsEOF();
139
11.5k
      if (!eof && tokenizer_->Error()) {
140
0
        document_->status_ = absl::InvalidArgumentError(
141
0
            "htmlparser::Parser tokenizer error.");
142
0
        return std::move(document_);
143
0
      }
144
11.5k
    }
145
15.3M
    token_ = tokenizer_->token();
146
15.3M
    ParseCurrentToken();
147
15.3M
  }
148
149
#ifdef DUMP_NODES
150
  DumpDocument(document_.get());
151
#endif
152
153
11.5k
  document_->metadata_.document_end_location = tokenizer_->CurrentPosition();
154
11.5k
  return std::move(document_);
155
11.5k
}  // End Parser::Parse.
156
157
34.8M
Node* Parser::top() {
158
34.8M
  Node* node = open_elements_stack_.Top();
159
34.8M
  if (node) {
160
34.7M
    return node;
161
34.7M
  }
162
163
11.5k
  return document_->root_node_;
164
34.8M
}  // End Parser::Top.
165
166
template <typename... Args>
167
722k
bool Parser::PopUntil(Scope scope, Args... match_tags) {
168
722k
  std::vector<Atom> argsList{match_tags...};
169
722k
  int i = IndexOfElementInScope(scope, argsList);
170
722k
  if (i != -1) {
171
470k
    open_elements_stack_.Pop(open_elements_stack_.size() - i);
172
470k
    return true;
173
470k
  }
174
251k
  return false;
175
722k
}  // End Parser::PopUntil.
bool htmlparser::Parser::PopUntil<htmlparser::Atom>(htmlparser::Parser::Scope, htmlparser::Atom)
Line
Count
Source
167
608k
bool Parser::PopUntil(Scope scope, Args... match_tags) {
168
608k
  std::vector<Atom> argsList{match_tags...};
169
608k
  int i = IndexOfElementInScope(scope, argsList);
170
608k
  if (i != -1) {
171
360k
    open_elements_stack_.Pop(open_elements_stack_.size() - i);
172
360k
    return true;
173
360k
  }
174
248k
  return false;
175
608k
}  // End Parser::PopUntil.
bool htmlparser::Parser::PopUntil<htmlparser::Atom, htmlparser::Atom, htmlparser::Atom, htmlparser::Atom, htmlparser::Atom, htmlparser::Atom>(htmlparser::Parser::Scope, htmlparser::Atom, htmlparser::Atom, htmlparser::Atom, htmlparser::Atom, htmlparser::Atom, htmlparser::Atom)
Line
Count
Source
167
4.33k
bool Parser::PopUntil(Scope scope, Args... match_tags) {
168
4.33k
  std::vector<Atom> argsList{match_tags...};
169
4.33k
  int i = IndexOfElementInScope(scope, argsList);
170
4.33k
  if (i != -1) {
171
756
    open_elements_stack_.Pop(open_elements_stack_.size() - i);
172
756
    return true;
173
756
  }
174
3.57k
  return false;
175
4.33k
}  // End Parser::PopUntil.
bool htmlparser::Parser::PopUntil<htmlparser::Atom, htmlparser::Atom, htmlparser::Atom>(htmlparser::Parser::Scope, htmlparser::Atom, htmlparser::Atom, htmlparser::Atom)
Line
Count
Source
167
23.9k
bool Parser::PopUntil(Scope scope, Args... match_tags) {
168
23.9k
  std::vector<Atom> argsList{match_tags...};
169
23.9k
  int i = IndexOfElementInScope(scope, argsList);
170
23.9k
  if (i != -1) {
171
23.9k
    open_elements_stack_.Pop(open_elements_stack_.size() - i);
172
23.9k
    return true;
173
23.9k
  }
174
0
  return false;
175
23.9k
}  // End Parser::PopUntil.
bool htmlparser::Parser::PopUntil<htmlparser::Atom, htmlparser::Atom>(htmlparser::Parser::Scope, htmlparser::Atom, htmlparser::Atom)
Line
Count
Source
167
85.5k
bool Parser::PopUntil(Scope scope, Args... match_tags) {
168
85.5k
  std::vector<Atom> argsList{match_tags...};
169
85.5k
  int i = IndexOfElementInScope(scope, argsList);
170
85.5k
  if (i != -1) {
171
85.5k
    open_elements_stack_.Pop(open_elements_stack_.size() - i);
172
85.5k
    return true;
173
85.5k
  }
174
0
  return false;
175
85.5k
}  // End Parser::PopUntil.
176
177
int Parser::IndexOfElementInScope(Scope scope,
178
1.21M
                                  const std::vector<Atom>& match_tags) const {
179
338M
  for (int i = open_elements_stack_.size() - 1; i >= 0; --i) {
180
338M
    Node* node = open_elements_stack_.at(i);
181
338M
    if (node->name_space_.empty()) {
182
339M
      for (Atom a : match_tags) {
183
339M
        if (a == node->atom_) {
184
935k
          return i;
185
935k
        }
186
339M
      }
187
337M
      switch (scope) {
188
231M
        case Scope::DefaultScope:
189
          // No-op.
190
231M
          break;
191
695k
        case Scope::ListItemScope:
192
695k
          if (node->atom_ == Atom::OL || node->atom_ == Atom::UL) return -1;
193
694k
          break;
194
103M
        case Scope::ButtonScope:
195
103M
          if (node->atom_ == Atom::BUTTON) return -1;
196
103M
          break;
197
103M
        case Scope::TableScope:
198
3.00M
          if (node->atom_ == Atom::HTML || node->atom_ == Atom::TABLE ||
199
3.00M
              node->atom_ == Atom::TEMPLATE) {
200
757
            return -1;
201
757
          }
202
3.00M
          break;
203
3.00M
        case Scope::SelectScope:
204
0
          if (node->atom_ != Atom::OPTGROUP && node->atom_ != Atom::OPTION) {
205
0
            return -1;
206
0
          }
207
0
          break;
208
0
        default:
209
0
          CHECK(false) << "HTML Parser reached unreachable scope";
210
337M
      }
211
337M
    }
212
213
337M
    switch (scope) {
214
231M
      case Scope::DefaultScope:
215
231M
      case Scope::ListItemScope:
216
334M
      case Scope::ButtonScope: {
217
1.00G
        for (auto& scope_stop_tags : kDefaultScopeStopTags) {
218
1.00G
          if (scope_stop_tags.first == node->name_space_) {
219
3.01G
            for (Atom t : scope_stop_tags.second) {
220
3.01G
              if (t == Atom::UNKNOWN) break;
221
3.01G
              if (t == node->atom_) return -1;
222
3.01G
            }
223
334M
          }
224
1.00G
        }
225
334M
        break;
226
334M
      }
227
334M
      default:
228
3.00M
        break;
229
337M
    }
230
337M
  }
231
0
  return -1;
232
1.21M
}  // Parser::IndexOfElementInScope.
233
234
template <typename... Args>
235
491k
bool Parser::ElementInScope(Scope scope, Args... tags) const {
236
491k
  std::vector<Atom> argsList{tags...};
237
491k
  return IndexOfElementInScope(scope, argsList) != -1;
238
491k
}  // Parser::ElementInScope.
239
240
3.07M
void Parser::ClearStackToContext(Scope scope) {
241
6.63M
  for (int i = open_elements_stack_.size() - 1; i >= 0; --i) {
242
6.63M
    Node* node = open_elements_stack_.at(i);
243
6.63M
    Atom atom = node->atom_;
244
6.63M
    switch (scope) {
245
2.92M
      case Scope::TableScope:
246
2.92M
        if (atom == Atom::HTML || atom == Atom::TABLE ||
247
2.92M
            atom == Atom::TEMPLATE) {
248
1.01M
          open_elements_stack_.Pop(open_elements_stack_.size() - i - 1);
249
1.01M
          return;
250
1.01M
        }
251
1.91M
        break;
252
1.91M
      case Scope::TableRowScope:
253
1.14M
        if (atom == Atom::HTML || atom == Atom::TR || atom == Atom::TEMPLATE) {
254
1.06M
          open_elements_stack_.Pop(open_elements_stack_.size() - i - 1);
255
1.06M
          return;
256
1.06M
        }
257
79.8k
        break;
258
2.56M
      case Scope::TableBodyScope:
259
2.56M
        if (atom == Atom::HTML || atom == Atom::TBODY || atom == Atom::TFOOT ||
260
2.56M
            atom == Atom::THEAD || atom == Atom::TEMPLATE) {
261
1.00M
          open_elements_stack_.Pop(open_elements_stack_.size() - i - 1);
262
1.00M
          return;
263
1.00M
        }
264
1.55M
        break;
265
1.55M
      default:
266
0
        CHECK(false) << "HTML Parser reached unreachable scope";
267
6.63M
    }
268
6.63M
  }
269
3.07M
}  // Parser::ClearStackToContext.
270
271
void Parser::GenerateImpliedEndTags(
272
2.20k
    const std::initializer_list<Atom>& exceptions) {
273
2.20k
  int i = open_elements_stack_.size() - 1;
274
8.21k
  for (; i >= 0; --i) {
275
8.21k
    Node* node = open_elements_stack_.at(i);
276
8.21k
    if (node->node_type_ == NodeType::ELEMENT_NODE) {
277
8.21k
      switch (node->atom_) {
278
492
        case Atom::DD:
279
1.27k
        case Atom::DT:
280
2.51k
        case Atom::LI:
281
2.51k
        case Atom::OPTGROUP:
282
2.51k
        case Atom::OPTION:
283
2.59k
        case Atom::P:
284
2.84k
        case Atom::RB:
285
3.34k
        case Atom::RP:
286
6.00k
        case Atom::RT:
287
6.20k
        case Atom::RTC:
288
6.20k
          for (auto e : exceptions) {
289
772
            if (node->atom_ == e) {
290
              // Pop nodes and return early.
291
194
              open_elements_stack_.Pop(open_elements_stack_.size() - i - 1);
292
194
              return;
293
194
            }
294
772
          }
295
6.00k
          continue;
296
6.00k
        default:
297
2.01k
          break;
298
8.21k
      }
299
8.21k
    }
300
2.01k
    break;
301
8.21k
  }
302
2.01k
  open_elements_stack_.Pop(open_elements_stack_.size() - i - 1);
303
2.01k
}  // Parser::GenerateImpliedEndTags.
304
305
24.0M
void Parser::AddChild(Node* node) {
306
24.0M
  if (ShouldFosterParent()) {
307
17.5k
    FosterParent(node);
308
23.9M
  } else {
309
23.9M
    top()->AppendChild(node);
310
23.9M
  }
311
312
24.0M
  if (node->node_type_ == NodeType::ELEMENT_NODE) {
313
22.0M
    open_elements_stack_.Push(node);
314
22.0M
  }
315
24.0M
}  // Parser::AddChild.
316
317
24.5M
bool Parser::ShouldFosterParent() {
318
24.5M
  if (!foster_parenting_) return false;
319
5.71M
  Atom a = top()->atom_;
320
5.71M
  return (a == Atom::TABLE || a == Atom::TBODY || a == Atom::TFOOT ||
321
5.71M
          a == Atom::THEAD || a == Atom::TR);
322
24.5M
}  // Parser::ShouldFosterParent.
323
324
49.2k
void Parser::FosterParent(Node* node) {
325
49.2k
  Node* table = nullptr;
326
49.2k
  Node* parent = nullptr;
327
49.2k
  Node* prev = nullptr;
328
49.2k
  Node* tpl = nullptr;
329
49.2k
  int i = -1;
330
463k
  for (i = open_elements_stack_.size() - 1; i >= 0; --i) {
331
463k
    if (open_elements_stack_.at(i)->atom_ == Atom::TABLE) {
332
49.2k
      table = open_elements_stack_.at(i);
333
49.2k
      break;
334
49.2k
    }
335
463k
  }
336
337
49.2k
  int j = -1;
338
1.59G
  for (j = open_elements_stack_.size() - 1; j >= 0; --j) {
339
1.59G
    if (open_elements_stack_.at(j)->atom_ == Atom::TEMPLATE) {
340
0
      tpl = open_elements_stack_.at(j);
341
0
      break;
342
0
    }
343
1.59G
  }
344
345
49.2k
  if (tpl && (!table || j > i)) {
346
0
    tpl->AppendChild(node);
347
0
    return;
348
0
  }
349
350
49.2k
  if (!table) {
351
    // The foster parent is the html element.
352
0
    parent = open_elements_stack_.at(0);
353
49.2k
  } else {
354
49.2k
    parent = table->Parent();
355
49.2k
  }
356
357
49.2k
  if (!parent) {
358
0
    parent = open_elements_stack_.at(i - 1);
359
0
  }
360
361
49.2k
  if (table) {
362
49.2k
    prev = table->PrevSibling();
363
49.2k
  } else {
364
0
    prev = parent->LastChild();
365
0
  }
366
367
49.2k
  if (prev && prev->node_type_ == NodeType::TEXT_NODE &&
368
49.2k
      node->node_type_ == NodeType::TEXT_NODE) {
369
4.06k
    prev->data_.append(node->data_);
370
4.06k
    return;
371
4.06k
  }
372
373
45.1k
  parent->InsertBefore(node, table);
374
45.1k
}  // Parser::FosterParent.
375
376
568k
void Parser::AddText(const std::string& text) {
377
568k
  if (text.empty()) return;
378
379
568k
  auto text_node = document_->NewNode(NodeType::TEXT_NODE);
380
568k
  if (record_node_offsets_) {
381
0
    text_node->line_col_in_html_src_ = token_.line_col_in_html_src;
382
0
  }
383
384
568k
  if (ShouldFosterParent()) {
385
30.2k
    text_node->data_.assign(text, 0, text.size());
386
30.2k
    FosterParent(text_node);
387
30.2k
    return;
388
30.2k
  }
389
390
538k
  Node* top_node = top();
391
538k
  if (top_node->LastChild() &&
392
538k
      top_node->LastChild()->node_type_ == NodeType::TEXT_NODE) {
393
4.74k
    top_node->LastChild()->data_.append(text);
394
4.74k
    return;
395
4.74k
  }
396
397
533k
  text_node->data_.assign(text, 0, text.size());
398
533k
  AddChild(text_node);
399
  // Count number of terms in ths text node, except if this is <script>,
400
  // <textarea> or a comment node.
401
533k
  if (count_num_terms_in_text_node_ && text_node->Parent() &&
402
533k
      text_node->Parent()->DataAtom() != Atom::SCRIPT &&
403
533k
      text_node->Parent()->Type() != NodeType::COMMENT_NODE &&
404
533k
      text_node->Parent()->DataAtom() != Atom::TEXTAREA) {
405
0
    text_node->num_terms_ = Strings::CountTerms(text);
406
0
  }
407
533k
}  // Parser::AddText.
408
409
14.9M
void Parser::AddElement() {
410
14.9M
  Node* element_node = document_->NewNode(NodeType::ELEMENT_NODE, token_.atom);
411
14.9M
  if (token_.atom == Atom::UNKNOWN) {
412
6.17M
    element_node->data_ = token_.data;
413
6.17M
  }
414
415
14.9M
  if (record_node_offsets_) {
416
0
    element_node->line_col_in_html_src_ = token_.line_col_in_html_src;
417
0
  }
418
419
14.9M
  switch (token_.atom) {
420
11.7k
    case Atom::HTML: {
421
11.7k
      element_node->SetManufactured(document_->metadata_.has_manufactured_html);
422
11.7k
      break;
423
0
    }
424
11.5k
    case Atom::HEAD: {
425
11.5k
      element_node->SetManufactured(document_->metadata_.has_manufactured_head);
426
11.5k
      break;
427
0
    }
428
11.5k
    case Atom::BODY: {
429
11.5k
      element_node->SetManufactured(document_->metadata_.has_manufactured_body);
430
11.5k
      break;
431
0
    }
432
14.8M
    default:
433
14.8M
      break;
434
14.9M
  }
435
436
14.9M
  std::copy(token_.attributes.begin(), token_.attributes.end(),
437
14.9M
            std::back_inserter(element_node->attributes_));
438
14.9M
  AddChild(element_node);
439
440
14.9M
  if (!record_attribute_offsets_ && !element_node->attributes_.empty()) {
441
185k
    std::transform(
442
185k
        element_node->attributes_.begin(), element_node->attributes_.end(),
443
381k
        element_node->attributes_.begin(), [](Attribute attr) -> Attribute {
444
381k
          attr.line_col_in_html_src = std::nullopt;
445
381k
          return attr;
446
381k
        });
447
185k
  }
448
449
14.9M
  if (on_node_callback_) {
450
0
    on_node_callback_(element_node, token_);
451
0
  }
452
14.9M
}  // Parser::AddElement.
453
454
// Section 12.2.4.3.
455
4.12M
void Parser::AddFormattingElement() {
456
4.12M
  Atom tag_atom = token_.atom;
457
4.12M
  AddElement();
458
459
  // Implement the Noah's Ark clause, but with three per family instead of two.
460
4.12M
  int identical_elements = 0;
461
4.72G
  for (int i = active_formatting_elements_stack_.size() - 1; i >= 0; --i) {
462
4.72G
    Node* node = active_formatting_elements_stack_.at(i);
463
4.72G
    if (node->node_type_ == NodeType::SCOPE_MARKER_NODE) break;
464
4.71G
    if (node->node_type_ != NodeType::ELEMENT_NODE) continue;
465
4.71G
    if (node->name_space_ != "") continue;
466
4.71G
    if (node->atom_ != tag_atom) continue;
467
4.29G
    if (node->attributes_.size() != token_.attributes.size()) continue;
468
469
4.29G
    bool attr_matched = false;
470
4.29G
    for (int j = 0; j < node->attributes_.size(); ++j) {
471
830k
      for (int k = 0; k < token_.attributes.size(); ++k) {
472
490k
        attr_matched = (node->attributes_[j] == token_.attributes[k]);
473
        // Found a match for this attribute, continue with the next attribute.
474
490k
        if (attr_matched) break;
475
490k
      }
476
477
445k
      if (attr_matched) continue;
478
479
      // If we get here, there is no attribute that matches a.
480
      // Therefore the element is not identical to the new one.
481
      // Stop processing rest of the attributes and proceed to next element.
482
339k
      break;
483
445k
    }
484
485
4.29G
    if (attr_matched) {
486
84.1k
      identical_elements++;
487
84.1k
      if (identical_elements >= 3) {
488
27.8k
        active_formatting_elements_stack_.Remove(node);
489
27.8k
      }
490
84.1k
    }
491
4.29G
  }
492
493
4.12M
  active_formatting_elements_stack_.Push(top());
494
4.12M
}  // Parser::AddFormattingElement.
495
496
// Section 12.2.4.3.
497
87.7k
void Parser::ClearActiveFormattingElements() {
498
476k
  while (active_formatting_elements_stack_.size() != 0) {
499
476k
    Node* node = active_formatting_elements_stack_.Pop();
500
476k
    if (node->node_type_ == NodeType::SCOPE_MARKER_NODE) break;
501
476k
  }
502
87.7k
}  // Parser::ClearActiveFormattingElements.
503
504
// Section 12.2.4.3.
505
10.5M
void Parser::ReconstructActiveFormattingElements() {
506
10.5M
  Node* node = active_formatting_elements_stack_.Top();
507
10.5M
  if (!node) return;
508
509
7.19M
  if (node->node_type_ == NodeType::SCOPE_MARKER_NODE ||
510
7.19M
      open_elements_stack_.Index(node) != -1) {
511
7.16M
    return;
512
7.16M
  }
513
514
33.2k
  int i = active_formatting_elements_stack_.size() - 1;
515
7.18M
  while (node->node_type_ != NodeType::SCOPE_MARKER_NODE &&
516
7.18M
         open_elements_stack_.Index(node) == -1) {
517
7.16M
    if (i == 0) {
518
11.6k
      i = -1;
519
11.6k
      break;
520
11.6k
    }
521
7.15M
    i--;
522
7.15M
    node = active_formatting_elements_stack_.at(i);
523
7.15M
  }
524
525
7.16M
  do {
526
7.16M
    i++;
527
7.16M
    auto clone = document_->CloneNode(active_formatting_elements_stack_.at(i));
528
7.16M
    AddChild(clone);
529
7.16M
    active_formatting_elements_stack_.Replace(i, clone);
530
7.16M
  } while (i < active_formatting_elements_stack_.size() - 1);
531
33.2k
}  // Parser::ReconstructActiveFormattingElements.
532
533
// Section 12.2.5.
534
29.7k
void Parser::AcknowledgeSelfClosingTag() {
535
29.7k
  has_self_closing_token_ = false;
536
29.7k
}  // Parser::AcknowledgeSelfClosingTag.
537
538
// Section 12.2.4.1, "using the rules for".
539
17.6k
void Parser::SetOriginalIM() {
540
17.6k
  CHECK(!original_insertion_mode_)
541
17.6k
      << "html: bad parser state: original_insertion_mode was set twice";
542
17.6k
  original_insertion_mode_ = insertion_mode_;
543
17.6k
}  // Parser::SetOriginalIM.
544
545
// Section 12.2.4.1, "reset the insertion mode".
546
152k
void Parser::ResetInsertionMode() {
547
546k
  for (int i = open_elements_stack_.size() - 1; i >= 0; --i) {
548
546k
    Node* node = open_elements_stack_.at(i);
549
546k
    bool last = (i == 0);
550
546k
    if (last && context_node_) {
551
0
      node = context_node_;
552
0
    }
553
554
546k
    switch (node->atom_) {
555
0
      case Atom::SELECT:
556
0
        if (!last) {
557
0
          Node* ancestor = node;
558
0
          Node* first = open_elements_stack_.at(0);
559
0
          while (ancestor != first) {
560
0
            ancestor = open_elements_stack_.at(
561
0
                open_elements_stack_.Index(ancestor) - 1);
562
0
            switch (ancestor->atom_) {
563
0
              case Atom::TEMPLATE:
564
0
                insertion_mode_ = std::bind(&Parser::InSelectIM, this);
565
0
                return;
566
0
              case Atom::TABLE:
567
0
                insertion_mode_ = std::bind(&Parser::InSelectInTableIM, this);
568
0
                return;
569
0
              default:
570
0
                break;
571
0
            }
572
0
          }
573
0
        }
574
0
        insertion_mode_ = std::bind(&Parser::InSelectIM, this);
575
0
        break;
576
7.67k
      case Atom::TD:
577
145k
      case Atom::TH:
578
        // https://bugs.chromium.org/p/chromium/issues/detail?id=829668
579
145k
        insertion_mode_ = std::bind(&Parser::InCellIM, this);
580
145k
        break;
581
0
      case Atom::TR:
582
0
        insertion_mode_ = std::bind(&Parser::InRowIM, this);
583
0
        break;
584
0
      case Atom::TBODY:
585
0
      case Atom::THEAD:
586
0
      case Atom::TFOOT:
587
0
        insertion_mode_ = std::bind(&Parser::InTableBodyIM, this);
588
0
        break;
589
0
      case Atom::CAPTION:
590
0
        insertion_mode_ = std::bind(&Parser::InCaptionIM, this);
591
0
        break;
592
0
      case Atom::COLGROUP:
593
0
        insertion_mode_ = std::bind(&Parser::InColumnGroupIM, this);
594
0
        break;
595
0
      case Atom::TABLE:
596
0
        insertion_mode_ = std::bind(&Parser::InTableIM, this);
597
0
        break;
598
0
      case Atom::TEMPLATE:
599
        // TODO: remove this divergence from the HTML5 spec.
600
0
        if (!node->name_space_.empty()) {
601
0
          continue;
602
0
        }
603
0
        insertion_mode_ = template_stack_.back();
604
0
        break;
605
0
      case Atom::HEAD:
606
        // https://bugs.chromium.org/p/chromium/issues/detail?id=829668
607
0
        insertion_mode_ = std::bind(&Parser::InHeadIM, this);
608
0
        break;
609
6.62k
      case Atom::BODY:
610
6.62k
        insertion_mode_ = std::bind(&Parser::InBodyIM, this);
611
6.62k
        break;
612
0
      case Atom::FRAMESET:
613
0
        insertion_mode_ = std::bind(&Parser::InFramesetIM, this);
614
0
        break;
615
0
      case Atom::HTML:
616
0
        if (head_) {
617
0
          insertion_mode_ = std::bind(&Parser::AfterHeadIM, this);
618
0
        } else {
619
0
          insertion_mode_ = std::bind(&Parser::BeforeHeadIM, this);
620
0
        }
621
0
        break;
622
394k
      default:
623
394k
        if (last) {
624
0
          insertion_mode_ = std::bind(&Parser::InBodyIM, this);
625
0
          return;
626
0
        }
627
394k
        continue;
628
546k
    }
629
152k
    return;
630
546k
  }
631
152k
}  // Parser::ResetInsertionMode.
632
633
// Section 12.2.6.4.1.
634
15.4k
bool Parser::InitialIM() {
635
15.4k
  switch (token_.token_type) {
636
3.28k
    case TokenType::TEXT_TOKEN: {
637
      // https://www.w3.org/TR/2011/WD-html5-20110113/tokenization.html#the-initial-insertion-mode
638
3.28k
      Strings::TrimLeft(&token_.data, Strings::kWhitespace);
639
3.28k
      if (token_.data.empty()) {
640
        // It was all whitespace, so ignore it.
641
1.05k
        return true;
642
1.05k
      }
643
2.23k
      break;
644
3.28k
    }
645
2.86k
    case TokenType::COMMENT_TOKEN: {
646
2.86k
      Node* node = document_->NewNode(NodeType::COMMENT_NODE);
647
2.86k
      node->data_ = std::move(token_.data);
648
2.86k
      if (record_node_offsets_) {
649
0
        node->line_col_in_html_src_ = token_.line_col_in_html_src;
650
0
      }
651
2.86k
      node->SetManufactured(token_.is_manufactured);
652
2.86k
      document_->root_node_->AppendChild(node);
653
2.86k
      return true;
654
3.28k
    }
655
106
    case TokenType::DOCTYPE_TOKEN: {
656
106
      auto doctype_node = document_->NewNode(NodeType::DOCTYPE_NODE);
657
106
      bool quirks_mode = ParseDoctype(token_.data, doctype_node);
658
106
      if (record_node_offsets_) {
659
0
        doctype_node->line_col_in_html_src_ = token_.line_col_in_html_src;
660
0
      }
661
106
      document_->root_node_->AppendChild(doctype_node);
662
106
      document_->metadata_.quirks_mode = quirks_mode;
663
106
      insertion_mode_ = std::bind(&Parser::BeforeHTMLIM, this);
664
665
106
      if (on_node_callback_) {
666
0
        on_node_callback_(doctype_node, token_);
667
0
      }
668
669
106
      return true;
670
3.28k
    }
671
9.18k
    default:
672
9.18k
      break;
673
15.4k
  }
674
675
11.4k
  document_->metadata_.quirks_mode = true;
676
11.4k
  insertion_mode_ = std::bind(&Parser::BeforeHTMLIM, this);
677
11.4k
  return false;
678
15.4k
}  // Parser::InitialIM.
679
680
// Section 12.2.6.4.2.
681
24.0k
bool Parser::BeforeHTMLIM() {
682
24.0k
  switch (token_.token_type) {
683
0
    case TokenType::DOCTYPE_TOKEN: {
684
      // Ignore the token.
685
0
      return true;
686
0
    }
687
2.54k
    case TokenType::TEXT_TOKEN: {
688
      // https://www.w3.org/TR/2011/WD-html5-20110113/tokenization.html#the-before-html-insertion-mode
689
2.54k
      Strings::TrimLeft(&token_.data, Strings::kWhitespace);
690
2.54k
      if (token_.data.empty()) {
691
        // It was all whitespace, so ignore it.
692
302
        return true;
693
302
      }
694
2.24k
      break;
695
2.54k
    }
696
19.8k
    case TokenType::START_TAG_TOKEN: {
697
19.8k
      if (token_.atom == Atom::HTML) {
698
11.5k
        AddElement();
699
11.5k
        insertion_mode_ = std::bind(&Parser::BeforeHeadIM, this);
700
11.5k
        return true;
701
11.5k
      }
702
8.31k
      break;
703
19.8k
    }
704
8.31k
    case TokenType::END_TAG_TOKEN: {
705
500
      switch ((Atom)token_.atom) {
706
79
        case Atom::HEAD:
707
148
        case Atom::BODY:
708
235
        case Atom::HTML:
709
242
        case Atom::BR:
710
242
          ParseImpliedToken(TokenType::START_TAG_TOKEN, Atom::HTML,
711
242
                            AtomUtil::ToString(Atom::HTML));
712
242
          return false;
713
258
        default:
714
          // Ignore the token.
715
258
          return true;
716
500
      }
717
0
      break;
718
500
    }
719
489
    case TokenType::COMMENT_TOKEN: {
720
489
      Node* node = document_->NewNode(NodeType::COMMENT_NODE);
721
489
      node->SetManufactured(token_.is_manufactured);
722
489
      if (record_node_offsets_) {
723
0
        node->line_col_in_html_src_ = token_.line_col_in_html_src;
724
0
      }
725
489
      node->data_ = std::move(token_.data);
726
489
      document_->root_node_->AppendChild(node);
727
489
      return true;
728
500
    }
729
658
    default:
730
658
      break;
731
24.0k
  }
732
11.2k
  ParseImpliedToken(TokenType::START_TAG_TOKEN, Atom::HTML,
733
11.2k
                    AtomUtil::ToString(Atom::HTML));
734
11.2k
  return false;
735
24.0k
}  // Parser::BeforeHTMLIM.
736
737
// Section 12.2.6.4.3.
738
24.6k
bool Parser::BeforeHeadIM() {
739
24.6k
  switch (token_.token_type) {
740
2.40k
    case TokenType::TEXT_TOKEN: {
741
      // https://www.w3.org/TR/2011/WD-html5-20110113/tokenization.html#the-before-head-insertion-mode
742
2.40k
      Strings::TrimLeft(&token_.data, Strings::kWhitespace);
743
2.40k
      if (token_.data.empty()) {
744
        // It was all whitespace, so ignore it.
745
152
        return true;
746
152
      }
747
2.25k
      break;
748
2.40k
    }
749
20.8k
    case TokenType::START_TAG_TOKEN: {
750
20.8k
      switch (token_.atom) {
751
11.5k
        case Atom::HEAD:
752
11.5k
          AddElement();
753
11.5k
          head_ = top();
754
11.5k
          insertion_mode_ = std::bind(&Parser::InHeadIM, this);
755
11.5k
          return true;
756
1.04k
        case Atom::HTML:
757
1.04k
          return InBodyIM();
758
8.24k
        default:
759
8.24k
          break;
760
20.8k
      }
761
8.24k
      break;
762
20.8k
    }
763
8.24k
    case TokenType::END_TAG_TOKEN: {
764
278
      switch (token_.atom) {
765
79
        case Atom::HEAD:
766
148
        case Atom::BODY:
767
235
        case Atom::HTML:
768
242
        case Atom::BR:
769
242
          ParseImpliedToken(TokenType::START_TAG_TOKEN, Atom::HEAD,
770
242
                            AtomUtil::ToString(Atom::HEAD));
771
242
          return false;
772
36
        default:
773
          // Ignore the token.
774
36
          return true;
775
278
      }
776
0
      break;
777
278
    }
778
460
    case TokenType::COMMENT_TOKEN: {
779
460
      Node* node = document_->NewNode(NodeType::COMMENT_NODE);
780
460
      node->SetManufactured(token_.is_manufactured);
781
460
      if (record_node_offsets_) {
782
0
        node->line_col_in_html_src_ = token_.line_col_in_html_src;
783
0
      }
784
460
      node->data_ = std::move(token_.data);
785
460
      AddChild(node);
786
460
      return true;
787
278
    }
788
0
    case TokenType::DOCTYPE_TOKEN: {
789
      // Ignore the token.
790
0
      return true;
791
278
    }
792
714
    default:
793
714
      break;
794
24.6k
  }
795
796
11.2k
  ParseImpliedToken(TokenType::START_TAG_TOKEN, Atom::HEAD,
797
11.2k
                    AtomUtil::ToString(Atom::HEAD));
798
11.2k
  return false;
799
24.6k
}  // Parser::BeforeHeadIM.
800
801
// Section 12.2.6.4.4.
802
72.8k
bool Parser::InHeadIM() {
803
72.8k
  switch (token_.token_type) {
804
2.34k
    case TokenType::TEXT_TOKEN: {
805
2.34k
      std::string s = token_.data;
806
2.34k
      Strings::TrimLeft(&s, Strings::kWhitespace);
807
2.34k
      if (s.size() < token_.data.size()) {
808
        // Add the initial whitespace to the current node.
809
        // https://www.w3.org/TR/2011/WD-html5-20110113/tokenization.html#parsing-main-inhead
810
68
        AddText(token_.data.substr(0, token_.data.size() - s.size()));
811
68
        if (s.empty()) {
812
53
          return true;
813
53
        }
814
15
        token_.data = s;
815
15
      }
816
2.28k
      break;
817
2.34k
    }
818
44.5k
    case TokenType::START_TAG_TOKEN: {
819
44.5k
      switch (token_.atom) {
820
66
        case Atom::HTML:
821
66
          return InBodyIM();
822
16.9k
        case Atom::BASE:
823
16.9k
        case Atom::BASEFONT:
824
16.9k
        case Atom::BGSOUND:
825
17.8k
        case Atom::LINK:
826
18.5k
        case Atom::META: {
827
18.5k
          AddElement();
828
18.5k
          open_elements_stack_.Pop();
829
18.5k
          AcknowledgeSelfClosingTag();
830
18.5k
          if (!top() || !top()->LastChild()) return true;
831
          // Record some extra document url related info.
832
18.3k
          if (token_.atom == Atom::BASE) {
833
16.9k
            auto base_node = top()->LastChild();
834
16.9k
            RecordBaseURLMetadata(base_node);
835
16.9k
          } else if (token_.atom == Atom::LINK) {
836
735
            auto link_node = top()->LastChild();
837
735
            RecordLinkRelCanonical(link_node);
838
735
          }
839
18.3k
          return true;
840
18.5k
        }
841
224
        case Atom::NOSCRIPT: {
842
224
          if (scripting_) {
843
224
            ParseGenericRawTextElement();
844
224
            return true;
845
224
          }
846
0
          AddElement();
847
0
          insertion_mode_ = std::bind(&Parser::InHeadNoscriptIM, this);
848
          // Don't let the tokenizer go into raw text mode when scripting is
849
          // disabled.
850
0
          tokenizer_->NextIsNotRawText();
851
0
          return true;
852
224
        }
853
2.65k
        case Atom::SCRIPT:
854
17.6k
        case Atom::TITLE: {
855
17.6k
          AddElement();
856
17.6k
          SetOriginalIM();
857
17.6k
          insertion_mode_ = std::bind(&Parser::TextIM, this);
858
17.6k
          return true;
859
2.65k
        }
860
0
        case Atom::NOFRAMES:
861
510
        case Atom::STYLE: {
862
510
          ParseGenericRawTextElement();
863
510
          return true;
864
0
        }
865
213
        case Atom::HEAD: {
866
          // Ignore the token.
867
213
          return true;
868
0
        }
869
0
        case Atom::TEMPLATE: {
870
0
          AddElement();
871
0
          active_formatting_elements_stack_.Push(scope_marker_);
872
0
          frameset_ok_ = false;
873
0
          insertion_mode_ = std::bind(&Parser::InTemplateIM, this);
874
0
          template_stack_.push_back(std::bind(&Parser::InTemplateIM, this));
875
0
          return true;
876
0
        }
877
7.28k
        default:
878
          // Ignore remaining tags.
879
7.28k
          break;
880
44.5k
      }
881
7.28k
      break;
882
44.5k
    }
883
24.1k
    case TokenType::END_TAG_TOKEN: {
884
24.1k
      switch (token_.atom) {
885
11.5k
        case Atom::HEAD: {
886
11.5k
          open_elements_stack_.Pop();
887
11.5k
          insertion_mode_ = std::bind(&Parser::AfterHeadIM, this);
888
11.5k
          return true;
889
0
        }
890
69
        case Atom::BODY:
891
156
        case Atom::HTML:
892
163
        case Atom::BR: {
893
163
          ParseImpliedToken(TokenType::END_TAG_TOKEN, Atom::HEAD,
894
163
                            AtomUtil::ToString(Atom::HEAD));
895
163
          return false;
896
156
        }
897
0
        case Atom::TEMPLATE: {
898
0
          if (!open_elements_stack_.Contains(Atom::TEMPLATE)) return true;
899
900
          // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
901
0
          GenerateImpliedEndTags();
902
0
          for (int i = open_elements_stack_.size() - 1; i >= 0; --i) {
903
0
            Node* node = open_elements_stack_.at(i);
904
0
            if (node->name_space_.empty() && node->atom_ == Atom::TEMPLATE) {
905
0
              open_elements_stack_.Pop(open_elements_stack_.size() - i);
906
0
              break;
907
0
            }
908
0
          }
909
910
0
          ClearActiveFormattingElements();
911
0
          template_stack_.pop_back();
912
0
          ResetInsertionMode();
913
0
          return true;
914
0
        }
915
12.4k
        default:
916
          // Ignore the token.
917
12.4k
          return true;
918
24.1k
      }
919
0
      break;
920
24.1k
    }
921
134
    case TokenType::COMMENT_TOKEN: {
922
134
      Node* node = document_->NewNode(NodeType::COMMENT_NODE);
923
134
      node->SetManufactured(token_.is_manufactured);
924
134
      if (record_node_offsets_) {
925
0
        node->line_col_in_html_src_ = token_.line_col_in_html_src;
926
0
      }
927
134
      node->data_ = std::move(token_.data);
928
134
      AddChild(node);
929
134
      return true;
930
24.1k
    }
931
0
    case TokenType::DOCTYPE_TOKEN: {
932
      // Ignore the token.
933
0
      return true;
934
24.1k
    }
935
1.69k
    default:
936
1.69k
      break;
937
72.8k
  }
938
939
11.2k
  ParseImpliedToken(TokenType::END_TAG_TOKEN, Atom::HEAD,
940
11.2k
                    AtomUtil::ToString(Atom::HEAD));
941
11.2k
  return false;
942
72.8k
}  // Parser::InHeadIM.
943
944
// 12.2.6.4.5.
945
0
bool Parser::InHeadNoscriptIM() {
946
0
  switch (token_.token_type) {
947
0
    case TokenType::DOCTYPE_TOKEN: {
948
      // Ignore the token.
949
0
      return true;
950
0
    }
951
0
    case TokenType::START_TAG_TOKEN: {
952
0
      switch (token_.atom) {
953
0
        case Atom::HTML: {
954
0
          return InBodyIM();
955
0
          break;
956
0
        }
957
0
        case Atom::BASEFONT:
958
0
        case Atom::BGSOUND:
959
0
        case Atom::LINK:
960
0
        case Atom::META:
961
0
        case Atom::NOFRAMES:
962
0
        case Atom::STYLE: {
963
0
          return InHeadIM();
964
0
          break;
965
0
        }
966
0
        case Atom::HEAD:
967
          // Ignore the token.
968
0
          return true;
969
0
        case Atom::NOSCRIPT: {
970
          // Don't let the tokenizer go into raw text mode even when a
971
          // <noscript> tag is in "in head noscript" insertion mode.
972
0
          tokenizer_->NextIsNotRawText();
973
          // Ignore the token.
974
0
          return true;
975
0
        }
976
0
        default:
977
0
          break;
978
0
      }
979
0
      break;
980
0
    }
981
0
    case TokenType::END_TAG_TOKEN: {
982
0
      switch (token_.atom) {
983
0
        case Atom::NOSCRIPT:
984
0
        case Atom::BR: {
985
0
          break;
986
0
        }
987
0
        default:
988
          // Ignore the token.
989
0
          return true;
990
0
      }
991
0
      break;
992
0
    }
993
0
    case TokenType::TEXT_TOKEN: {
994
0
      if (Strings::IsAllWhitespaceChars(token_.data)) {
995
        // It was all whitespace.
996
0
        return InHeadIM();
997
0
      }
998
0
      break;
999
0
    }
1000
0
    case TokenType::COMMENT_TOKEN: {
1001
0
      return InHeadIM();
1002
0
      break;
1003
0
    }
1004
0
    default:
1005
0
      break;
1006
0
  }
1007
0
  open_elements_stack_.Pop();
1008
0
  CHECK(top()->atom_ == Atom::HEAD)
1009
0
      << "html: the new current node will be a head element.";
1010
1011
0
  insertion_mode_ = std::bind(&Parser::InHeadIM, this);
1012
0
  if (token_.atom == Atom::NOSCRIPT) {
1013
0
    return true;
1014
0
  }
1015
1016
0
  return false;
1017
0
}  // Parser::InHeadNoscriptIM.
1018
1019
// Section 12.2.6.4.6.
1020
24.7k
bool Parser::AfterHeadIM() {
1021
24.7k
  switch (token_.token_type) {
1022
2.71k
    case TokenType::TEXT_TOKEN: {
1023
2.71k
      std::string s = token_.data;
1024
2.71k
      Strings::TrimLeft(&s);
1025
2.71k
      if (s.size() < token_.data.size()) {
1026
        // Add the initial whitespace to the current node.
1027
415
        AddText(token_.data.substr(0, token_.data.size() - s.size()));
1028
415
        if (s.empty()) return true;
1029
13
        token_.data = s;
1030
13
      }
1031
2.30k
      break;
1032
2.71k
    }
1033
19.3k
    case TokenType::START_TAG_TOKEN: {
1034
19.3k
      switch (token_.atom) {
1035
0
        case Atom::HTML:
1036
0
          return InBodyIM();
1037
11.5k
        case Atom::BODY: {
1038
11.5k
          AddElement();
1039
11.5k
          frameset_ok_ = false;
1040
11.5k
          insertion_mode_ = std::bind(&Parser::InBodyIM, this);
1041
11.5k
          return true;
1042
0
        }
1043
0
        case Atom::FRAMESET: {
1044
0
          AddElement();
1045
0
          insertion_mode_ = std::bind(&Parser::InFramesetIM, this);
1046
0
          return true;
1047
0
        }
1048
0
        case Atom::BASE:
1049
0
        case Atom::BASEFONT:
1050
0
        case Atom::BGSOUND:
1051
0
        case Atom::LINK:
1052
321
        case Atom::META:
1053
321
        case Atom::NOFRAMES:
1054
321
        case Atom::SCRIPT:
1055
321
        case Atom::STYLE:
1056
321
        case Atom::TEMPLATE:
1057
321
        case Atom::TITLE: {
1058
321
          open_elements_stack_.Push(head_);
1059
321
          defer(open_elements_stack_.Remove(head_));
1060
321
          return InHeadIM();
1061
321
        }
1062
757
        case Atom::HEAD:
1063
          // Ignore the token.
1064
757
          return true;
1065
6.71k
        default:
1066
6.71k
          break;
1067
19.3k
      }
1068
6.71k
      break;
1069
19.3k
    }
1070
6.71k
    case TokenType::END_TAG_TOKEN: {
1071
789
      switch (token_.atom) {
1072
69
        case Atom::BODY:
1073
156
        case Atom::HTML:
1074
163
        case Atom::BR: {
1075
          // Drop down to creating an implied <body> tag.
1076
163
          break;
1077
156
        }
1078
0
        case Atom::TEMPLATE: {
1079
0
          return InHeadIM();
1080
156
        }
1081
626
        default:
1082
          // Ignore the token.
1083
626
          return true;
1084
789
      }
1085
163
      break;
1086
789
    }
1087
163
    case TokenType::COMMENT_TOKEN: {
1088
147
      Node* node = document_->NewNode(NodeType::COMMENT_NODE);
1089
147
      node->SetManufactured(token_.is_manufactured);
1090
147
      if (record_node_offsets_) {
1091
0
        node->line_col_in_html_src_ = token_.line_col_in_html_src;
1092
0
      }
1093
147
      node->data_ = std::move(token_.data);
1094
147
      AddChild(node);
1095
147
      return true;
1096
789
    }
1097
0
    case TokenType::DOCTYPE_TOKEN:
1098
      // Ignore the token.
1099
0
      return true;
1100
1.75k
    default:
1101
1.75k
      break;
1102
24.7k
  }
1103
1104
10.9k
  ParseImpliedToken(TokenType::START_TAG_TOKEN, Atom::BODY,
1105
10.9k
                    AtomUtil::ToString(Atom::BODY));
1106
10.9k
  frameset_ok_ = true;
1107
10.9k
  return false;
1108
24.7k
}  // Parser::AfterHeadIM.
1109
1110
// Section 12.2.6.4.7.
1111
13.7M
bool Parser::InBodyIM() {  // NOLINT
1112
13.7M
  switch (token_.token_type) {
1113
540k
    case TokenType::TEXT_TOKEN: {
1114
540k
      std::string d = token_.data;
1115
540k
      Node* node = open_elements_stack_.Top();
1116
540k
      switch (node->atom_) {
1117
430
        case Atom::PRE:
1118
430
        case Atom::LISTING: {
1119
430
          if (!node->FirstChild()) {
1120
            // Ignore a new line at the start of a <pre> block.
1121
350
            if (!d.empty() && d.front() == '\r') {
1122
0
              d = d.substr(1);
1123
0
            }
1124
350
            if (!d.empty() && d.front() == '\n') {
1125
20
              d = d.substr(1);
1126
20
            }
1127
350
          }
1128
430
          break;
1129
430
        }
1130
540k
        default:
1131
540k
          break;
1132
540k
      }
1133
1134
540k
      Strings::ReplaceAny(&d, Strings::kNullChar, "");
1135
      // Checks if data empty or all null characters.
1136
540k
      if (d.empty()) {
1137
240
        return true;
1138
240
      }
1139
1140
540k
      ReconstructActiveFormattingElements();
1141
540k
      AddText(d);
1142
540k
      if (frameset_ok_ && !Strings::IsAllWhitespaceChars(d)) {
1143
        // There were non-whitespace chracters inserted.
1144
3.25k
        frameset_ok_ = false;
1145
3.25k
      }
1146
540k
      break;
1147
540k
    }
1148
11.4M
    case TokenType::START_TAG_TOKEN: {
1149
11.4M
      switch (token_.atom) {
1150
6.45k
        case Atom::HTML: {
1151
6.45k
          num_html_tags_++;
1152
6.45k
          if (open_elements_stack_.Contains(Atom::TEMPLATE)) {
1153
0
            return true;
1154
0
          }
1155
6.45k
          CopyAttributes(open_elements_stack_.at(0), token_);
1156
6.45k
          if (!document_->metadata_.has_manufactured_html ||
1157
6.45k
              num_html_tags_ > 1) {
1158
6.38k
            document_->metadata_.duplicate_html_elements = true;
1159
6.38k
            document_->metadata_.duplicate_html_element_location =
1160
6.38k
                token_.line_col_in_html_src;
1161
6.38k
          }
1162
6.45k
          break;
1163
6.45k
        }
1164
1.01k
        case Atom::BASE:
1165
1.01k
        case Atom::BASEFONT:
1166
1.01k
        case Atom::BGSOUND:
1167
1.61k
        case Atom::LINK:
1168
1.80k
        case Atom::META:
1169
1.80k
        case Atom::NOFRAMES:
1170
3.44k
        case Atom::SCRIPT:
1171
3.94k
        case Atom::STYLE:
1172
3.94k
        case Atom::TEMPLATE:
1173
4.81k
        case Atom::TITLE: {
1174
4.81k
          return InHeadIM();
1175
3.94k
        }
1176
20.3k
        case Atom::BODY: {
1177
20.3k
          num_body_tags_++;
1178
20.3k
          if (open_elements_stack_.Contains(Atom::TEMPLATE)) {
1179
0
            return true;
1180
0
          }
1181
20.3k
          if (open_elements_stack_.size() >= 2) {
1182
20.3k
            Node* body = open_elements_stack_.at(1);
1183
20.3k
            if (body->node_type_ == NodeType::ELEMENT_NODE &&
1184
20.3k
                body->atom_ == Atom::BODY) {
1185
20.3k
              frameset_ok_ = false;
1186
20.3k
              CopyAttributes(body, token_);
1187
20.3k
              if (!document_->metadata_.has_manufactured_body ||
1188
20.3k
                  num_body_tags_ > 1) {
1189
19.8k
                document_->metadata_.duplicate_body_elements = true;
1190
19.8k
                document_->metadata_.duplicate_body_element_location =
1191
19.8k
                    token_.line_col_in_html_src;
1192
19.8k
              }
1193
20.3k
            }
1194
20.3k
          }
1195
20.3k
          break;
1196
20.3k
        }
1197
0
        case Atom::FRAMESET: {
1198
0
          if (!frameset_ok_ || open_elements_stack_.size() < 2 ||
1199
0
              open_elements_stack_.at(1)->atom_ != Atom::BODY) {
1200
            // Ignore the token.
1201
0
            return true;
1202
0
          }
1203
0
          auto body = open_elements_stack_.at(1);
1204
0
          if (body->Parent()) {
1205
0
            auto removed_body = body->Parent()->RemoveChild(body);
1206
0
            open_elements_stack_.Remove(removed_body);
1207
0
          }
1208
          // Remove all nodes except one, the last in the stack.
1209
0
          open_elements_stack_.Pop(open_elements_stack_.size() - 1);
1210
0
          AddElement();
1211
0
          insertion_mode_ = std::bind(&Parser::InFramesetIM, this);
1212
0
          return true;
1213
0
        }
1214
0
        case Atom::ADDRESS:
1215
0
        case Atom::ARTICLE:
1216
194
        case Atom::ASIDE:
1217
194
        case Atom::BLOCKQUOTE:
1218
194
        case Atom::CENTER:
1219
194
        case Atom::DETAILS:
1220
194
        case Atom::DIALOG:
1221
389
        case Atom::DIR:
1222
836
        case Atom::DIV:
1223
13.4k
        case Atom::DL:
1224
13.4k
        case Atom::FIELDSET:
1225
13.4k
        case Atom::FIGCAPTION:
1226
13.4k
        case Atom::FIGURE:
1227
13.9k
        case Atom::FOOTER:
1228
14.3k
        case Atom::HEADER:
1229
14.3k
        case Atom::HGROUP:
1230
14.5k
        case Atom::MAIN:
1231
14.7k
        case Atom::MENU:
1232
14.8k
        case Atom::NAV:
1233
15.2k
        case Atom::OL:
1234
28.0k
        case Atom::P:
1235
28.0k
        case Atom::SECTION:
1236
28.0k
        case Atom::SUMMARY:
1237
48.9k
        case Atom::UL: {
1238
48.9k
          PopUntil(Scope::ButtonScope, Atom::P);
1239
48.9k
          AddElement();
1240
48.9k
          break;
1241
28.0k
        }
1242
2.00k
        case Atom::H1:
1243
4.20k
        case Atom::H2:
1244
5.68k
        case Atom::H3:
1245
9.68k
        case Atom::H4:
1246
17.7k
        case Atom::H5:
1247
18.8k
        case Atom::H6: {
1248
18.8k
          PopUntil(Scope::ButtonScope, Atom::P);
1249
18.8k
          Node* top_node = top();
1250
18.8k
          if (top_node) {
1251
18.8k
            switch (top_node->atom_) {
1252
508
              case Atom::H1:
1253
779
              case Atom::H2:
1254
1.14k
              case Atom::H3:
1255
4.07k
              case Atom::H4:
1256
5.45k
              case Atom::H5:
1257
5.71k
              case Atom::H6:
1258
5.71k
                open_elements_stack_.Pop();
1259
5.71k
                break;
1260
13.1k
              default:
1261
13.1k
                break;
1262
18.8k
            }
1263
18.8k
          }
1264
18.8k
          AddElement();
1265
18.8k
          break;
1266
18.8k
        }
1267
712
        case Atom::PRE:
1268
712
        case Atom::LISTING: {
1269
712
          PopUntil(Scope::ButtonScope, Atom::P);
1270
712
          AddElement();
1271
          // The newline, if any, will be dealth with by the TEXT_TOKEN case.
1272
712
          frameset_ok_ = false;
1273
712
          break;
1274
712
        }
1275
2.89k
        case Atom::FORM: {
1276
2.89k
          if (form_ && !open_elements_stack_.Contains(Atom::TEMPLATE)) {
1277
            // Ignore the token.
1278
1.38k
            return true;
1279
1.38k
          }
1280
1.51k
          PopUntil(Scope::ButtonScope, Atom::P);
1281
1.51k
          AddElement();
1282
1.51k
          if (!open_elements_stack_.Contains(Atom::TEMPLATE)) {
1283
1.51k
            form_ = top();
1284
1.51k
          }
1285
1.51k
          break;
1286
2.89k
        }
1287
171k
        case Atom::LI: {
1288
171k
          frameset_ok_ = false;
1289
848k
          for (int i = open_elements_stack_.size() - 1; i >= 0; --i) {
1290
848k
            Node* node = open_elements_stack_.at(i);
1291
848k
            switch (node->atom_) {
1292
3.24k
              case Atom::LI:
1293
                // Remove all except last in stack.
1294
3.24k
                open_elements_stack_.Pop(open_elements_stack_.size() - i);
1295
3.24k
                break;
1296
0
              case Atom::ADDRESS:
1297
194
              case Atom::DIV:
1298
414
              case Atom::P:
1299
414
                continue;
1300
844k
              default:
1301
844k
                if (!node->IsSpecialElement()) continue;
1302
848k
            }
1303
171k
            break;
1304
848k
          }
1305
171k
          PopUntil(Scope::ButtonScope, Atom::P);
1306
171k
          AddElement();
1307
171k
          break;
1308
171k
        }
1309
1.60k
        case Atom::DD:
1310
2.92k
        case Atom::DT: {
1311
2.92k
          frameset_ok_ = false;
1312
16.4k
          for (int i = open_elements_stack_.size() - 1; i >= 0; --i) {
1313
16.4k
            Node* node = open_elements_stack_.at(i);
1314
16.4k
            switch (node->atom_) {
1315
450
              case Atom::DD:
1316
849
              case Atom::DT:
1317
                // Remove all except last in stack.
1318
849
                open_elements_stack_.Pop(open_elements_stack_.size() - i);
1319
849
                break;
1320
0
              case Atom::ADDRESS:
1321
327
              case Atom::DIV:
1322
599
              case Atom::P:
1323
599
                continue;
1324
15.0k
              default:
1325
15.0k
                if (!node->IsSpecialElement()) continue;
1326
16.4k
            }
1327
2.92k
            break;
1328
16.4k
          }
1329
2.92k
          PopUntil(Scope::ButtonScope, Atom::P);
1330
2.92k
          AddElement();
1331
2.92k
          break;
1332
2.92k
        }
1333
0
        case Atom::PLAINTEXT: {
1334
0
          PopUntil(Scope::ButtonScope, Atom::P);
1335
0
          AddElement();
1336
0
          break;
1337
2.92k
        }
1338
314
        case Atom::BUTTON: {
1339
314
          PopUntil(Scope::DefaultScope, Atom::BUTTON);
1340
314
          ReconstructActiveFormattingElements();
1341
314
          AddElement();
1342
314
          frameset_ok_ = false;
1343
314
          break;
1344
2.92k
        }
1345
182k
        case Atom::A: {
1346
982k
          for (int i = active_formatting_elements_stack_.size() - 1; i >= 0;
1347
979k
               --i) {
1348
979k
            Node* node = active_formatting_elements_stack_.at(i);
1349
979k
            if (node->node_type_ == NodeType::SCOPE_MARKER_NODE) break;
1350
972k
            if (node->node_type_ == NodeType::ELEMENT_NODE &&
1351
972k
                node->atom_ == Atom::A) {
1352
172k
              InBodyEndTagFormatting(Atom::A, "a");
1353
172k
              open_elements_stack_.Remove(node);
1354
172k
              active_formatting_elements_stack_.Remove(node);
1355
172k
              break;
1356
172k
            }
1357
972k
          }
1358
182k
          ReconstructActiveFormattingElements();
1359
182k
          AddFormattingElement();
1360
182k
          break;
1361
2.92k
        }
1362
194k
        case Atom::B:
1363
194k
        case Atom::BIG:
1364
194k
        case Atom::CODE:
1365
195k
        case Atom::EM:
1366
195k
        case Atom::FONT:
1367
833k
        case Atom::I:
1368
3.75M
        case Atom::S:
1369
3.75M
        case Atom::SMALL:
1370
3.75M
        case Atom::STRIKE:
1371
3.75M
        case Atom::STRONG:
1372
3.77M
        case Atom::TT:
1373
3.93M
        case Atom::U: {
1374
3.93M
          ReconstructActiveFormattingElements();
1375
3.93M
          AddFormattingElement();
1376
3.93M
          break;
1377
3.77M
        }
1378
459
        case Atom::NOBR: {
1379
459
          ReconstructActiveFormattingElements();
1380
459
          if (ElementInScope(Scope::DefaultScope, Atom::NOBR)) {
1381
240
            InBodyEndTagFormatting(Atom::NOBR, "nobr");
1382
240
            ReconstructActiveFormattingElements();
1383
240
          }
1384
459
          AddFormattingElement();
1385
459
          break;
1386
3.77M
        }
1387
804
        case Atom::APPLET:
1388
804
        case Atom::MARQUEE:
1389
804
        case Atom::OBJECT: {
1390
804
          ReconstructActiveFormattingElements();
1391
804
          AddElement();
1392
804
          active_formatting_elements_stack_.Push(scope_marker_);
1393
804
          frameset_ok_ = false;
1394
804
          break;
1395
804
        }
1396
1.12M
        case Atom::TABLE: {
1397
1.12M
          if (!document_->metadata_.quirks_mode) {
1398
0
            PopUntil(Scope::ButtonScope, Atom::P);
1399
0
          }
1400
1.12M
          AddElement();
1401
1.12M
          frameset_ok_ = false;
1402
1.12M
          insertion_mode_ = std::bind(&Parser::InTableIM, this);
1403
1.12M
          return true;
1404
804
        }
1405
66
        case Atom::AREA:
1406
702
        case Atom::BR:
1407
896
        case Atom::EMBED:
1408
1.42k
        case Atom::IMG:
1409
2.35k
        case Atom::INPUT:
1410
2.35k
        case Atom::KEYGEN:
1411
2.55k
        case Atom::WBR: {
1412
2.55k
          ReconstructActiveFormattingElements();
1413
2.55k
          AddElement();
1414
2.55k
          open_elements_stack_.Pop();
1415
2.55k
          AcknowledgeSelfClosingTag();
1416
2.55k
          if (token_.atom == Atom::INPUT) {
1417
1.31k
            for (auto& attr : token_.attributes) {
1418
1.31k
              if (attr.key == "type" &&
1419
1.31k
                  Strings::EqualFold(attr.value, "hidden")) {
1420
                  // Skip setting frameset_ok_ = false;
1421
0
                  return true;
1422
0
              }
1423
1.31k
            }
1424
927
          }
1425
2.55k
          frameset_ok_ = false;
1426
2.55k
          break;
1427
2.55k
        }
1428
66
        case Atom::PARAM:
1429
66
        case Atom::SOURCE:
1430
286
        case Atom::TRACK: {
1431
286
          AddElement();
1432
286
          open_elements_stack_.Pop();
1433
286
          AcknowledgeSelfClosingTag();
1434
286
          break;
1435
66
        }
1436
200
        case Atom::HR: {
1437
200
          PopUntil(Scope::ButtonScope, Atom::P);
1438
200
          AddElement();
1439
200
          open_elements_stack_.Pop();
1440
200
          AcknowledgeSelfClosingTag();
1441
200
          frameset_ok_ = false;
1442
200
          break;
1443
66
        }
1444
0
        case Atom::IMAGE: {
1445
0
          token_.atom = Atom::IMG;
1446
0
          token_.data = AtomUtil::ToString(Atom::IMG);
1447
0
          return false;
1448
66
        }
1449
0
        case Atom::TEXTAREA: {
1450
0
          AddElement();
1451
0
          SetOriginalIM();
1452
0
          frameset_ok_ = false;
1453
0
          insertion_mode_ = std::bind(&Parser::TextIM, this);
1454
0
          break;
1455
66
        }
1456
5.06k
        case Atom::XMP: {
1457
5.06k
          PopUntil(Scope::ButtonScope, Atom::P);
1458
5.06k
          ReconstructActiveFormattingElements();
1459
5.06k
          frameset_ok_ = false;
1460
5.06k
          ParseGenericRawTextElement();
1461
5.06k
          break;
1462
66
        }
1463
1
        case Atom::IFRAME: {
1464
1
          frameset_ok_ = false;
1465
1
          ParseGenericRawTextElement();
1466
1
          break;
1467
66
        }
1468
1
        case Atom::NOEMBED: {
1469
1
          ParseGenericRawTextElement();
1470
1
          break;
1471
66
        }
1472
333
        case Atom::NOSCRIPT: {
1473
333
          if (scripting_) {
1474
333
            ParseGenericRawTextElement();
1475
333
            return true;
1476
333
          }
1477
0
          ReconstructActiveFormattingElements();
1478
0
          AddElement();
1479
          // Don't let the tokenizer go into raw text mode when scripting is
1480
          // disabled.
1481
0
          tokenizer_->NextIsNotRawText();
1482
0
          break;
1483
333
        }
1484
0
        case Atom::SELECT: {
1485
0
          ReconstructActiveFormattingElements();
1486
0
          AddElement();
1487
0
          frameset_ok_ = false;
1488
0
          insertion_mode_ = std::bind(&Parser::InSelectIM, this);
1489
0
          return true;
1490
0
          break;
1491
333
        }
1492
0
        case Atom::OPTGROUP:
1493
0
        case Atom::OPTION: {
1494
0
          if (top()->atom_ == Atom::OPTION) {
1495
0
            open_elements_stack_.Pop();
1496
0
          }
1497
0
          ReconstructActiveFormattingElements();
1498
0
          AddElement();
1499
0
          break;
1500
0
        }
1501
5.12k
        case Atom::RB:
1502
5.32k
        case Atom::RTC: {
1503
5.32k
          if (ElementInScope(Scope::DefaultScope, Atom::RUBY)) {
1504
140
            GenerateImpliedEndTags();
1505
140
          }
1506
5.32k
          AddElement();
1507
5.32k
          break;
1508
5.12k
        }
1509
1.24k
        case Atom::RP:
1510
20.8k
        case Atom::RT: {
1511
20.8k
          if (ElementInScope(Scope::DefaultScope, Atom::RUBY)) {
1512
576
            GenerateImpliedEndTags({Atom::RTC});
1513
576
          }
1514
20.8k
          AddElement();
1515
20.8k
          break;
1516
1.24k
        }
1517
1.27k
        case Atom::MATH:
1518
2.42k
        case Atom::SVG: {
1519
2.42k
          ReconstructActiveFormattingElements();
1520
2.42k
          if (token_.atom == Atom::MATH) {
1521
1.27k
            AdjustMathMLAttributeNames(&token_.attributes);
1522
1.27k
          } else {
1523
1.15k
            AdjustSVGAttributeNames(&token_.attributes);
1524
1.15k
          }
1525
2.42k
          AdjustForeignAttributes(&token_.attributes);
1526
2.42k
          AddElement();
1527
2.42k
          top()->name_space_ = AtomUtil::ToString(token_.atom);
1528
2.42k
          if (has_self_closing_token_) {
1529
256
            open_elements_stack_.Pop();
1530
256
            AcknowledgeSelfClosingTag();
1531
256
          }
1532
2.42k
          return true;
1533
0
          break;
1534
1.27k
        }
1535
0
        case Atom::CAPTION:
1536
204
        case Atom::COL:
1537
204
        case Atom::COLGROUP:
1538
270
        case Atom::FRAME:
1539
593
        case Atom::HEAD:
1540
659
        case Atom::TBODY:
1541
868
        case Atom::TD:
1542
1.09k
        case Atom::TFOOT:
1543
1.52k
        case Atom::TH:
1544
1.75k
        case Atom::THEAD:
1545
1.97k
        case Atom::TR: {
1546
          // Ignore the token.
1547
1.97k
          break;
1548
1.75k
        }
1549
5.88M
        default:
1550
5.88M
          ReconstructActiveFormattingElements();
1551
5.88M
          AddElement();
1552
11.4M
      }
1553
10.3M
      break;
1554
11.4M
    }
1555
10.3M
    case TokenType::END_TAG_TOKEN: {
1556
335k
      switch (token_.atom) {
1557
13.8k
        case Atom::BODY:
1558
13.8k
          if (ElementInScope(Scope::DefaultScope, Atom::BODY)) {
1559
13.8k
            insertion_mode_ = std::bind(&Parser::AfterBodyIM, this);
1560
13.8k
          }
1561
13.8k
          break;
1562
6.08k
        case Atom::HTML: {
1563
6.08k
          if (ElementInScope(Scope::DefaultScope, Atom::BODY)) {
1564
6.08k
            ParseImpliedToken(TokenType::END_TAG_TOKEN, Atom::BODY,
1565
6.08k
                              AtomUtil::ToString(Atom::BODY));
1566
6.08k
            return false;
1567
6.08k
          }
1568
0
          return true;
1569
0
          break;
1570
6.08k
        }
1571
0
        case Atom::ADDRESS:
1572
0
        case Atom::ARTICLE:
1573
10
        case Atom::ASIDE:
1574
10
        case Atom::BLOCKQUOTE:
1575
342
        case Atom::BUTTON:
1576
342
        case Atom::CENTER:
1577
342
        case Atom::DETAILS:
1578
342
        case Atom::DIALOG:
1579
408
        case Atom::DIR:
1580
474
        case Atom::DIV:
1581
679
        case Atom::DL:
1582
679
        case Atom::FIELDSET:
1583
679
        case Atom::FIGCAPTION:
1584
679
        case Atom::FIGURE:
1585
745
        case Atom::FOOTER:
1586
950
        case Atom::HEADER:
1587
950
        case Atom::HGROUP:
1588
950
        case Atom::LISTING:
1589
951
        case Atom::MAIN:
1590
1.01k
        case Atom::MENU:
1591
1.05k
        case Atom::NAV:
1592
1.25k
        case Atom::OL:
1593
1.46k
        case Atom::PRE:
1594
1.46k
        case Atom::SECTION:
1595
1.46k
        case Atom::SUMMARY:
1596
1.53k
        case Atom::UL: {
1597
1.53k
          PopUntil(Scope::DefaultScope, token_.atom);
1598
1.53k
          break;
1599
1.46k
        }
1600
2.83k
        case Atom::FORM: {
1601
2.83k
          if (open_elements_stack_.Contains(Atom::TEMPLATE)) {
1602
0
            int i = IndexOfElementInScope(Scope::DefaultScope, {Atom::FORM});
1603
0
            if (i == -1) {
1604
              // Ignore the token.
1605
0
              return true;
1606
0
            }
1607
0
            GenerateImpliedEndTags();
1608
0
            if (open_elements_stack_.at(i)->atom_ != Atom::FORM) {
1609
              // Ignore the token.
1610
0
              return true;
1611
0
            }
1612
0
            PopUntil(Scope::DefaultScope, Atom::FORM);
1613
2.83k
          } else {
1614
2.83k
            Node* node = form_;
1615
2.83k
            form_ = nullptr;
1616
2.83k
            int i = IndexOfElementInScope(Scope::DefaultScope, {Atom::FORM});
1617
2.83k
            if (!node || i == -1 || open_elements_stack_.at(i) != node) {
1618
              // Ignore the token.
1619
1.34k
              return true;
1620
1.34k
            }
1621
1.49k
            GenerateImpliedEndTags();
1622
1.49k
            open_elements_stack_.Remove(node);
1623
1.49k
          }
1624
1.49k
          break;
1625
2.83k
        }
1626
1.63k
        case Atom::P: {
1627
1.63k
          if (!ElementInScope(Scope::ButtonScope, Atom::P)) {
1628
1.38k
            ParseImpliedToken(TokenType::START_TAG_TOKEN, Atom::P,
1629
1.38k
                              AtomUtil::ToString(Atom::P));
1630
1.38k
          }
1631
1.63k
          PopUntil(Scope::ButtonScope, Atom::P);
1632
1.63k
          break;
1633
2.83k
        }
1634
172k
        case Atom::LI: {
1635
172k
          PopUntil(Scope::ListItemScope, Atom::LI);
1636
172k
          break;
1637
2.83k
        }
1638
81
        case Atom::DD:
1639
331
        case Atom::DT: {
1640
331
          PopUntil(Scope::DefaultScope, token_.atom);
1641
331
          break;
1642
81
        }
1643
215
        case Atom::H1:
1644
412
        case Atom::H2:
1645
482
        case Atom::H3:
1646
3.45k
        case Atom::H4:
1647
4.07k
        case Atom::H5:
1648
4.33k
        case Atom::H6: {
1649
4.33k
          PopUntil(Scope::DefaultScope, Atom::H1, Atom::H2, Atom::H3, Atom::H4,
1650
4.33k
                   Atom::H5, Atom::H6);
1651
4.33k
          break;
1652
4.07k
        }
1653
4.66k
        case Atom::A:
1654
21.9k
        case Atom::B:
1655
21.9k
        case Atom::BIG:
1656
22.0k
        case Atom::CODE:
1657
22.2k
        case Atom::EM:
1658
22.4k
        case Atom::FONT:
1659
78.9k
        case Atom::I:
1660
79.3k
        case Atom::NOBR:
1661
89.1k
        case Atom::S:
1662
89.1k
        case Atom::SMALL:
1663
89.3k
        case Atom::STRIKE:
1664
89.3k
        case Atom::STRONG:
1665
89.6k
        case Atom::TT:
1666
93.3k
        case Atom::U: {
1667
93.3k
          InBodyEndTagFormatting(token_.atom,
1668
93.3k
                                 token_.atom != Atom::UNKNOWN
1669
93.3k
                                     ? AtomUtil::ToString(token_.atom)
1670
93.3k
                                     : token_.data);
1671
93.3k
          break;
1672
89.6k
        }
1673
799
        case Atom::APPLET:
1674
799
        case Atom::MARQUEE:
1675
799
        case Atom::OBJECT: {
1676
799
          if (PopUntil(Scope::DefaultScope, token_.atom)) {
1677
551
            ClearActiveFormattingElements();
1678
551
          }
1679
799
          break;
1680
799
        }
1681
291
        case Atom::BR: {
1682
291
          token_.token_type = TokenType::START_TAG_TOKEN;
1683
291
          return false;
1684
0
          break;
1685
799
        }
1686
0
        case Atom::TEMPLATE: {
1687
0
          return InHeadIM();
1688
0
          break;
1689
799
        }
1690
37.9k
        default:
1691
37.9k
          InBodyEndTagOther(token_.atom, token_.data);
1692
335k
      }
1693
327k
      break;
1694
335k
    }
1695
1.39M
    case TokenType::COMMENT_TOKEN: {
1696
1.39M
      Node* node = document_->NewNode(NodeType::COMMENT_NODE);
1697
1.39M
      node->SetManufactured(token_.is_manufactured);
1698
1.39M
      if (record_node_offsets_) {
1699
0
        node->line_col_in_html_src_ = token_.line_col_in_html_src;
1700
0
      }
1701
1.39M
      node->data_ = token_.data;
1702
1.39M
      AddChild(node);
1703
1.39M
      break;
1704
335k
    }
1705
11.4k
    case TokenType::ERROR_TOKEN: {
1706
11.4k
      if (template_stack_.size() > 0) {
1707
0
        insertion_mode_ = std::bind(&Parser::InTemplateIM, this);
1708
0
        return false;
1709
11.4k
      } else {
1710
63.8k
        for (Node* n : open_elements_stack_) {
1711
63.8k
          switch (n->atom_) {
1712
614
            case Atom::DD:
1713
1.26k
            case Atom::LI:
1714
1.26k
            case Atom::OPTGROUP:
1715
1.26k
            case Atom::OPTION:
1716
1.54k
            case Atom::P:
1717
6.36k
            case Atom::RB:
1718
6.79k
            case Atom::RP:
1719
8.11k
            case Atom::RT:
1720
8.20k
            case Atom::RTC:
1721
8.95k
            case Atom::TBODY:
1722
9.26k
            case Atom::TD:
1723
9.32k
            case Atom::TFOOT:
1724
46.5k
            case Atom::TH:
1725
46.5k
            case Atom::THEAD:
1726
47.1k
            case Atom::TR:
1727
52.1k
            case Atom::BODY:
1728
57.4k
            case Atom::HTML:
1729
              // Ignore.
1730
57.4k
              break;
1731
6.36k
            default:
1732
6.36k
              return true;
1733
63.8k
          }
1734
63.8k
        }
1735
11.4k
      }
1736
5.06k
      break;
1737
11.4k
    }
1738
5.06k
    default:
1739
1
      break;
1740
13.7M
  }
1741
1742
12.5M
  return true;
1743
13.7M
}  // NOLINT(readability/fn_size)
1744
// Parser::InBodyIM end.
1745
1746
266k
void Parser::InBodyEndTagFormatting(Atom tag_atom, std::string_view tag_name) {
1747
  // This is the "adoption agency" algorithm, described at
1748
  // https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
1749
1750
  // TODO: this is a fairly literal line-by-line translation of that algorithm.
1751
  // Once the code successfully parses the comprehensive test suite, we should
1752
  // refactor this code to be more idiomatic.
1753
1754
  // Steps 1-2
1755
266k
  if (auto current = open_elements_stack_.Top();
1756
266k
      current->data_ == tag_name &&
1757
266k
      active_formatting_elements_stack_.Index(current) == -1) {
1758
0
    open_elements_stack_.Pop();
1759
0
    return;
1760
0
  }
1761
1762
  // Steps 3-5. The outer loop.
1763
438k
  for (int i = 0; i < 8; ++i) {
1764
    // Step 6. Find the formatting element.
1765
429k
    Node* formatting_element = nullptr;
1766
16.7M
    for (int j = active_formatting_elements_stack_.size() - 1; j >= 0; --j) {
1767
16.7M
      if (active_formatting_elements_stack_.at(j)->node_type_ ==
1768
16.7M
          NodeType::SCOPE_MARKER_NODE) {
1769
786
        break;
1770
786
      }
1771
16.6M
      if (active_formatting_elements_stack_.at(j)->atom_ == tag_atom) {
1772
420k
        formatting_element = active_formatting_elements_stack_.at(j);
1773
420k
        break;
1774
420k
      }
1775
16.6M
    }
1776
1777
429k
    if (!formatting_element) {
1778
8.52k
      InBodyEndTagOther(tag_atom, tag_name);
1779
8.52k
      return;
1780
8.52k
    }
1781
1782
    // Step 7. Ignore the tag if formatting element is not in the stack of open
1783
    // elements.
1784
420k
    int fe_index = open_elements_stack_.Index(formatting_element);
1785
420k
    if (fe_index == -1) {
1786
3.37k
      active_formatting_elements_stack_.Remove(formatting_element);
1787
3.37k
      return;
1788
3.37k
    }
1789
1790
    // Step 8. Ignore the tag if formatting element is not in the scope.
1791
417k
    if (!ElementInScope(Scope::DefaultScope, tag_atom)) {
1792
      // Ignore the tag.
1793
168
      return;
1794
168
    }
1795
1796
    // Step 9. This step is omitted because it's just a parse error but no
1797
    // need to return.
1798
1799
    // Steps 10-11. Find the furthest block.
1800
417k
    Node* furthest_block = nullptr;
1801
4.15M
    for (int k = fe_index; k < open_elements_stack_.size(); ++k) {
1802
3.91M
      if (open_elements_stack_.at(k)->IsSpecialElement()) {
1803
172k
        furthest_block = open_elements_stack_.at(k);
1804
172k
        break;
1805
172k
      }
1806
3.91M
    }
1807
1808
417k
    if (!furthest_block) {
1809
244k
      Node* e = open_elements_stack_.Pop();
1810
2.47M
      while (e != formatting_element) {
1811
2.22M
        e = open_elements_stack_.Pop();
1812
2.22M
      }
1813
244k
      active_formatting_elements_stack_.Remove(e);
1814
244k
      return;
1815
244k
    }
1816
1817
    // Steps 12-13. Find the common ancestor and bookmark node.
1818
172k
    Node* common_ancestor = open_elements_stack_.at(fe_index - 1);
1819
172k
    auto bookmark = active_formatting_elements_stack_.Index(formatting_element);
1820
1821
    // Step 14. The inner loop. Find the last_node to reparent.
1822
172k
    Node* last_node = furthest_block;
1823
172k
    Node* node = furthest_block;
1824
172k
    int x = open_elements_stack_.Index(node);
1825
    // Step 14.1.
1826
172k
    int j = 0;
1827
1.26M
    while (true) {
1828
      // Step 14.2.
1829
1.26M
      j++;
1830
      // Step 14.3.
1831
1.26M
      x--;
1832
1.26M
      node = open_elements_stack_.at(x);
1833
      // Step 14.4. Go to the next step if node is formatting element.
1834
1.26M
      if (node == formatting_element) break;
1835
1836
      // Step 14.5. Remove node from the list of active formatting elements if
1837
      // inner loop counter is greater than three and node is in the list of
1838
      // active formatting elements.
1839
1.09M
      if (int ni = active_formatting_elements_stack_.Index(node);
1840
1.09M
          j > 3 && ni > -1) {
1841
408k
        active_formatting_elements_stack_.Remove(node);
1842
        // If any element of the list of active formatting elements is removed,
1843
        // we need to take care whether bookmark should be decremented or not.
1844
        // This is because the value of bookmark may exceed the size of the
1845
        // list by removing elements from the list.
1846
408k
        if (ni <= bookmark) {
1847
407k
          bookmark--;
1848
407k
        }
1849
408k
        continue;
1850
408k
      }
1851
1852
      // Step 14.6. Continue the next inner loop if node is not in the list of
1853
      // active formatting elements.
1854
685k
      if (active_formatting_elements_stack_.Index(node) == -1) {
1855
609k
        open_elements_stack_.Remove(node);
1856
609k
        continue;
1857
609k
      }
1858
1859
      // Step 14.7.
1860
76.7k
      Node* clone = document_->CloneNode(node);
1861
76.7k
      active_formatting_elements_stack_.Replace(
1862
76.7k
          active_formatting_elements_stack_.Index(node), clone);
1863
76.7k
      open_elements_stack_.Replace(open_elements_stack_.Index(node), clone);
1864
76.7k
      node = clone;
1865
1866
      // Step 14.8.
1867
76.7k
      if (last_node == furthest_block) {
1868
45.0k
        bookmark = active_formatting_elements_stack_.Index(node) + 1;
1869
45.0k
      }
1870
      // Step 14.9.
1871
76.7k
      if (last_node->Parent()) {
1872
45.0k
        last_node = last_node->Parent()->RemoveChild(last_node);
1873
45.0k
      }
1874
76.7k
      node->AppendChild(last_node);
1875
1876
      // Step 14.10.
1877
76.7k
      last_node = node;
1878
76.7k
    }
1879
1880
    // Step 15. Reparent lastNode to the common ancestor,
1881
    // or for misnested table nodes, to the foster parent.
1882
172k
    if (last_node->Parent()) {
1883
127k
      last_node = last_node->Parent()->RemoveChild(last_node);
1884
127k
    }
1885
1886
172k
    switch (common_ancestor->atom_) {
1887
449
      case Atom::TABLE:
1888
805
      case Atom::TBODY:
1889
806
      case Atom::TFOOT:
1890
807
      case Atom::THEAD:
1891
1.40k
      case Atom::TR:
1892
1.40k
        FosterParent(last_node);
1893
1.40k
        break;
1894
171k
      default:
1895
171k
        common_ancestor->AppendChild(last_node);
1896
172k
    }
1897
1898
    // Steps 16-18. Reparent nodes from the furthest block's children
1899
    // to a clone of the formatting element.
1900
172k
    Node* clone = document_->CloneNode(formatting_element);
1901
172k
    furthest_block->ReparentChildrenTo(clone);
1902
172k
    furthest_block->AppendChild(clone);
1903
1904
    // Step 19. Fix up the list of active formatting elements.
1905
172k
    int old_loc = active_formatting_elements_stack_.Index(formatting_element);
1906
172k
    if (old_loc != -1 && old_loc < bookmark) {
1907
      // Move the bookmark with the rest of the list.
1908
45.0k
      bookmark--;
1909
45.0k
    }
1910
1911
172k
    active_formatting_elements_stack_.Remove(formatting_element);
1912
172k
    active_formatting_elements_stack_.Insert(bookmark, clone);
1913
1914
    // Step 20. Fix up the stack of open elements.
1915
172k
    open_elements_stack_.Remove(formatting_element);
1916
172k
    open_elements_stack_.Insert(open_elements_stack_.Index(furthest_block) + 1,
1917
172k
                                clone);
1918
172k
  }
1919
266k
}  // Parser::InBodyEndTagFormatting.
1920
1921
46.4k
void Parser::InBodyEndTagOther(Atom tag_atom, std::string_view tag_name) {
1922
16.7M
  for (int i = open_elements_stack_.size() - 1; i >= 0; --i) {
1923
    // Two element nodes have the same tag if they have the same Data (a
1924
    // string-typed field). As an optimization, for common HTML tags, each
1925
    // Data string is assigned a unique, non-zero Atom (a uint32-typed
1926
    // field), since integer comparison is faster than string comparison.
1927
    // Uncommon (custom) tags get a zero Atom.
1928
    //
1929
    // The if condition here is equivalent to (node->data_ == tag_name).
1930
16.7M
    if (open_elements_stack_.at(i)->atom_ == tag_atom &&
1931
16.7M
        ((tag_atom != Atom::UNKNOWN) ||
1932
13.6M
         (open_elements_stack_.at(i)->data_ == tag_name))) {
1933
10.9k
      open_elements_stack_.Pop(open_elements_stack_.size() - i);
1934
10.9k
      break;
1935
10.9k
    }
1936
1937
16.7M
    if (open_elements_stack_.at(i)->IsSpecialElement()) break;
1938
16.7M
  }
1939
46.4k
}  // Parser::InBodyEndTagOther.
1940
1941
// Section 12.2.6.4.8.
1942
44.2k
bool Parser::TextIM() {
1943
44.2k
  switch (token_.token_type) {
1944
1.00k
    case TokenType::ERROR_TOKEN:
1945
1.00k
      open_elements_stack_.Pop();
1946
1.00k
      break;
1947
20.4k
    case TokenType::TEXT_TOKEN: {
1948
20.4k
      std::string_view data_view(token_.data);
1949
20.4k
      Node* node = open_elements_stack_.Top();
1950
20.4k
      if ((node->atom_ == Atom::TEXTAREA) && !node->FirstChild()) {
1951
        // Ignore a newline at the start of a <textarea> block.
1952
0
        if (!data_view.empty() && data_view.front() == '\r') {
1953
0
          data_view.remove_prefix(1);
1954
0
        }
1955
0
        if (!data_view.empty() && data_view.front() == '\n') {
1956
0
          data_view.remove_prefix(1);
1957
0
        }
1958
0
      }
1959
20.4k
      if (data_view.empty()) return true;
1960
20.4k
      AddText(data_view.data());
1961
20.4k
      return true;
1962
20.4k
    }
1963
22.7k
    case TokenType::END_TAG_TOKEN:
1964
22.7k
      open_elements_stack_.Pop();
1965
22.7k
      break;
1966
0
    default:
1967
0
      break;
1968
44.2k
  }
1969
23.7k
  insertion_mode_ = original_insertion_mode_;
1970
23.7k
  original_insertion_mode_ = nullptr;
1971
23.7k
  return token_.token_type == TokenType::END_TAG_TOKEN;
1972
44.2k
}  // Parser::TextIM.
1973
1974
// Section 12.2.6.4.9.
1975
3.72M
bool Parser::InTableIM() {
1976
3.72M
  switch (token_.token_type) {
1977
142k
    case TokenType::TEXT_TOKEN: {
1978
142k
      Strings::ReplaceAny(&token_.data, Strings::kNullChar, "");
1979
142k
      switch (open_elements_stack_.Top()->atom_) {
1980
28.2k
        case Atom::TABLE:
1981
29.6k
        case Atom::TBODY:
1982
33.4k
        case Atom::TFOOT:
1983
33.7k
        case Atom::THEAD:
1984
35.2k
        case Atom::TR: {
1985
          // All whitespace including \x00.
1986
35.2k
          if (Strings::IsAllWhitespaceChars(token_.data,
1987
35.2k
                                            Strings::kWhitespaceOrNull)) {
1988
423
            AddText(token_.data);
1989
423
            return true;
1990
423
          }
1991
34.8k
          break;
1992
35.2k
        }
1993
107k
        default:
1994
107k
          break;
1995
142k
      }
1996
141k
      break;
1997
142k
    }
1998
3.54M
    case TokenType::START_TAG_TOKEN: {
1999
3.54M
      switch (token_.atom) {
2000
0
        case Atom::CAPTION: {
2001
0
          ClearStackToContext(Scope::TableScope);
2002
0
          active_formatting_elements_stack_.Push(scope_marker_);
2003
0
          AddElement();
2004
0
          insertion_mode_ = std::bind(&Parser::InCaptionIM, this);
2005
0
          return true;
2006
0
        }
2007
4.16k
        case Atom::COLGROUP: {
2008
4.16k
          ClearStackToContext(Scope::TableScope);
2009
4.16k
          AddElement();
2010
4.16k
          insertion_mode_ = std::bind(&Parser::InColumnGroupIM, this);
2011
4.16k
          return true;
2012
0
        }
2013
4.16k
        case Atom::COL: {
2014
4.16k
          ParseImpliedToken(TokenType::START_TAG_TOKEN, Atom::COLGROUP,
2015
4.16k
                            AtomUtil::ToString(Atom::COLGROUP));
2016
4.16k
          return false;
2017
0
        }
2018
997k
        case Atom::TBODY:
2019
1.00M
        case Atom::TFOOT:
2020
1.00M
        case Atom::THEAD: {
2021
1.00M
          ClearStackToContext(Scope::TableScope);
2022
1.00M
          AddElement();
2023
1.00M
          insertion_mode_ = std::bind(&Parser::InTableBodyIM, this);
2024
1.00M
          return true;
2025
1.00M
        }
2026
424k
        case Atom::TD:
2027
993k
        case Atom::TH:
2028
994k
        case Atom::TR: {
2029
994k
          ParseImpliedToken(TokenType::START_TAG_TOKEN, Atom::TBODY,
2030
994k
                            AtomUtil::ToString(Atom::TBODY));
2031
994k
          return false;
2032
993k
        }
2033
134k
        case Atom::TABLE: {
2034
134k
          if (PopUntil(Scope::TableScope, Atom::TABLE)) {
2035
134k
            ResetInsertionMode();
2036
134k
            return false;
2037
134k
          }
2038
          // Ignore the token.
2039
0
          return true;
2040
134k
        }
2041
1
        case Atom::STYLE:
2042
1
        case Atom::SCRIPT:
2043
1
        case Atom::TEMPLATE: {
2044
1
          return InHeadIM();
2045
1
        }
2046
0
        case Atom::INPUT: {
2047
0
          for (auto& attr : token_.attributes) {
2048
0
            if (attr.key == "type" &&
2049
0
                Strings::EqualFold(attr.value, "hidden")) {
2050
0
              AddElement();
2051
0
              open_elements_stack_.Pop();
2052
0
              return true;
2053
0
            }
2054
0
          }
2055
0
          break;
2056
          // Otherwise drop down to the default action.
2057
0
        }
2058
503
        case Atom::FORM: {
2059
503
          if (open_elements_stack_.Contains(Atom::TEMPLATE) || form_) {
2060
            // Ignore the token.
2061
200
            return true;
2062
200
          }
2063
303
          AddElement();
2064
303
          form_ = open_elements_stack_.Pop();
2065
303
          break;
2066
503
        }
2067
0
        case Atom::SELECT: {
2068
0
          ReconstructActiveFormattingElements();
2069
0
          switch (top()->atom_) {
2070
0
            case Atom::TABLE:
2071
0
            case Atom::TBODY:
2072
0
            case Atom::TFOOT:
2073
0
            case Atom::THEAD:
2074
0
            case Atom::TR:
2075
0
              foster_parenting_ = true;
2076
0
              break;
2077
0
            default:
2078
              // Ignore remaining tags.
2079
0
              break;
2080
0
          }
2081
0
          AddElement();
2082
0
          foster_parenting_ = false;
2083
0
          frameset_ok_ = false;
2084
0
          insertion_mode_ = std::bind(&Parser::InSelectInTableIM, this);
2085
0
          return true;
2086
0
        }
2087
1.39M
        default:
2088
          // Ignore remaining tags.
2089
1.39M
          break;
2090
3.54M
      }
2091
1.39M
      break;
2092
3.54M
    }
2093
1.39M
    case TokenType::END_TAG_TOKEN: {
2094
35.6k
      switch (token_.atom) {
2095
18.0k
        case Atom::TABLE:
2096
18.0k
          if (PopUntil(Scope::TableScope, Atom::TABLE)) {
2097
18.0k
            ResetInsertionMode();
2098
18.0k
            return true;
2099
18.0k
          }
2100
          // Ignore the token.
2101
0
          return true;
2102
241
        case Atom::BODY:
2103
241
        case Atom::CAPTION:
2104
495
        case Atom::COL:
2105
495
        case Atom::COLGROUP:
2106
750
        case Atom::HTML:
2107
1.99k
        case Atom::TBODY:
2108
2.21k
        case Atom::TD:
2109
5.90k
        case Atom::TFOOT:
2110
6.10k
        case Atom::TH:
2111
6.54k
        case Atom::THEAD:
2112
6.91k
        case Atom::TR:
2113
          // Ignore the token.
2114
6.91k
          return true;
2115
0
        case Atom::TEMPLATE:
2116
0
          return InHeadIM();
2117
10.6k
        default:
2118
          // Ignore.
2119
10.6k
          break;
2120
35.6k
      }
2121
10.6k
      break;
2122
35.6k
    }
2123
10.6k
    case TokenType::COMMENT_TOKEN: {
2124
707
      Node* node = document_->NewNode(NodeType::COMMENT_NODE);
2125
707
      node->SetManufactured(token_.is_manufactured);
2126
707
      if (record_node_offsets_) {
2127
0
        node->line_col_in_html_src_ = token_.line_col_in_html_src;
2128
0
      }
2129
707
      node->data_ = token_.data;
2130
707
      AddChild(node);
2131
707
      return true;
2132
35.6k
    }
2133
0
    case TokenType::DOCTYPE_TOKEN: {
2134
      // Ignore the token.
2135
0
      return true;
2136
35.6k
    }
2137
1.29k
    case TokenType::ERROR_TOKEN: {
2138
1.29k
      return InBodyIM();
2139
35.6k
    }
2140
0
    default:
2141
0
      break;
2142
3.72M
  }
2143
2144
1.55M
  foster_parenting_ = true;
2145
1.55M
  defer(foster_parenting_ = false;);
2146
1.55M
  return InBodyIM();
2147
3.72M
}  // Parser::InTableIM.
2148
2149
// Section 12.2.6.4.11.
2150
0
bool Parser::InCaptionIM() {
2151
0
  switch (token_.token_type) {
2152
0
    case TokenType::START_TAG_TOKEN: {
2153
0
      switch (token_.atom) {
2154
0
        case Atom::CAPTION:
2155
0
        case Atom::COL:
2156
0
        case Atom::COLGROUP:
2157
0
        case Atom::TBODY:
2158
0
        case Atom::TD:
2159
0
        case Atom::TFOOT:
2160
0
        case Atom::THEAD:
2161
0
        case Atom::TR: {
2162
0
          if (PopUntil(Scope::TableScope, Atom::CAPTION)) {
2163
0
            ClearActiveFormattingElements();
2164
0
            insertion_mode_ = std::bind(&Parser::InTableIM, this);
2165
0
            return false;
2166
0
          }
2167
          // Ignore the token.
2168
0
          return true;
2169
0
        }
2170
0
        case Atom::SELECT: {
2171
0
          ReconstructActiveFormattingElements();
2172
0
          AddElement();
2173
0
          frameset_ok_ = false;
2174
0
          insertion_mode_ = std::bind(&Parser::InSelectInTableIM, this);
2175
0
          return true;
2176
0
        }
2177
0
        default:
2178
0
          break;
2179
0
      }
2180
0
      break;
2181
0
    }
2182
0
    case TokenType::END_TAG_TOKEN: {
2183
0
      switch (token_.atom) {
2184
0
        case Atom::CAPTION: {
2185
0
          if (PopUntil(Scope::TableScope, Atom::CAPTION)) {
2186
0
            ClearActiveFormattingElements();
2187
0
            insertion_mode_ = std::bind(&Parser::InTableIM, this);
2188
0
          }
2189
0
          return true;
2190
0
        }
2191
0
        case Atom::TABLE: {
2192
0
          if (PopUntil(Scope::TableScope, Atom::CAPTION)) {
2193
0
            ClearActiveFormattingElements();
2194
0
            insertion_mode_ = std::bind(&Parser::InTableIM, this);
2195
0
            return false;
2196
0
          }
2197
          // Ignore the token.
2198
0
          return true;
2199
0
        }
2200
0
        case Atom::BODY:
2201
0
        case Atom::COL:
2202
0
        case Atom::COLGROUP:
2203
0
        case Atom::HTML:
2204
0
        case Atom::TBODY:
2205
0
        case Atom::TD:
2206
0
        case Atom::TFOOT:
2207
0
        case Atom::TH:
2208
0
        case Atom::THEAD:
2209
0
        case Atom::TR: {
2210
          // Ignore the token.
2211
0
          return true;
2212
0
        }
2213
0
        default:
2214
0
          break;
2215
0
      }
2216
0
      break;
2217
0
    }
2218
0
    default:
2219
0
      break;
2220
0
  }
2221
2222
0
  return InBodyIM();
2223
0
}  // Parser::InCaptionIM.
2224
2225
// Section 12.2.6.4.12.
2226
9.96k
bool Parser::InColumnGroupIM() {
2227
9.96k
  switch (token_.token_type) {
2228
1.45k
    case TokenType::TEXT_TOKEN: {
2229
1.45k
      std::string s = token_.data;
2230
1.45k
      Strings::TrimLeft(&s);
2231
1.45k
      if (s.size() < token_.data.size()) {
2232
        // Add the initial whitespace to the current node.
2233
136
        AddText(token_.data.substr(0, token_.data.size() - s.size()));
2234
136
        if (s.empty()) return true;
2235
22
        token_.data = s;
2236
22
      }
2237
1.33k
      break;
2238
1.45k
    }
2239
1.33k
    case TokenType::COMMENT_TOKEN: {
2240
167
      Node* node = document_->NewNode(NodeType::COMMENT_NODE);
2241
167
      node->SetManufactured(token_.is_manufactured);
2242
167
      if (record_node_offsets_) {
2243
0
        node->line_col_in_html_src_ = token_.line_col_in_html_src;
2244
0
      }
2245
167
      node->data_ = token_.data;
2246
167
      AddChild(node);
2247
167
      return true;
2248
1.45k
    }
2249
0
    case TokenType::DOCTYPE_TOKEN: {
2250
      // Ignore the token.
2251
0
      return true;
2252
1.45k
    }
2253
7.66k
    case TokenType::START_TAG_TOKEN: {
2254
7.66k
      switch (token_.atom) {
2255
67
        case Atom::HTML: {
2256
67
          return InBodyIM();
2257
0
        }
2258
5.09k
        case Atom::COL: {
2259
5.09k
          AddElement();
2260
5.09k
          open_elements_stack_.Pop();
2261
5.09k
          AcknowledgeSelfClosingTag();
2262
5.09k
          return true;
2263
0
        }
2264
0
        case Atom::TEMPLATE: {
2265
0
          return InHeadIM();
2266
0
        }
2267
2.50k
        default:
2268
2.50k
          break;
2269
7.66k
      }
2270
2.50k
      break;
2271
7.66k
    }
2272
2.50k
    case TokenType::END_TAG_TOKEN: {
2273
589
      switch (token_.atom) {
2274
0
        case Atom::COLGROUP:
2275
0
          if (open_elements_stack_.Top()->atom_ == Atom::COLGROUP) {
2276
0
            open_elements_stack_.Pop();
2277
0
            insertion_mode_ = std::bind(&Parser::InTableIM, this);
2278
0
          }
2279
0
          return true;
2280
365
        case Atom::COL:
2281
          // Ignore the token.
2282
365
          return true;
2283
0
        case Atom::TEMPLATE:
2284
0
          return InHeadIM();
2285
224
        default:
2286
224
          break;
2287
589
      }
2288
224
      break;
2289
589
    }
2290
224
    case TokenType::ERROR_TOKEN: {
2291
94
      return InBodyIM();
2292
589
    }
2293
0
    default:
2294
0
      break;
2295
9.96k
  }
2296
2297
4.06k
  if (open_elements_stack_.Top()->atom_ != Atom::COLGROUP) {
2298
0
    return true;
2299
0
  }
2300
4.06k
  open_elements_stack_.Pop();
2301
4.06k
  insertion_mode_ = std::bind(&Parser::InTableIM, this);
2302
4.06k
  return false;
2303
4.06k
}  // Parser::InColumnGroupIM.
2304
2305
// Section 12.2.6.4.13.
2306
2.04M
bool Parser::InTableBodyIM() {
2307
2.04M
  switch (token_.token_type) {
2308
2.00M
    case TokenType::START_TAG_TOKEN: {
2309
2.00M
      switch (token_.atom) {
2310
1.00M
        case Atom::TR: {
2311
1.00M
          ClearStackToContext(Scope::TableBodyScope);
2312
1.00M
          AddElement();
2313
1.00M
          insertion_mode_ = std::bind(&Parser::InRowIM, this);
2314
1.00M
          return true;
2315
0
        }
2316
424k
        case Atom::TD:
2317
994k
        case Atom::TH: {
2318
994k
          ParseImpliedToken(TokenType::START_TAG_TOKEN, Atom::TR,
2319
994k
                            AtomUtil::ToString(Atom::TR));
2320
994k
          return false;
2321
424k
        }
2322
0
        case Atom::CAPTION:
2323
1.82k
        case Atom::COL:
2324
1.82k
        case Atom::COLGROUP:
2325
4.32k
        case Atom::TBODY:
2326
4.96k
        case Atom::TFOOT:
2327
5.91k
        case Atom::THEAD: {
2328
5.91k
          if (PopUntil(Scope::TableScope, Atom::TBODY, Atom::THEAD,
2329
5.91k
                       Atom::TFOOT)) {
2330
5.91k
            insertion_mode_ = std::bind(&Parser::InTableIM, this);
2331
5.91k
            return false;
2332
5.91k
          }
2333
          // Ignore the token.
2334
0
          return true;
2335
5.91k
        }
2336
7.96k
        default:
2337
7.96k
          break;
2338
2.00M
      }
2339
7.96k
      break;
2340
2.00M
    }
2341
27.4k
    case TokenType::END_TAG_TOKEN: {
2342
27.4k
      switch (token_.atom) {
2343
887
        case Atom::TBODY:
2344
4.82k
        case Atom::TFOOT:
2345
5.26k
        case Atom::THEAD: {
2346
5.26k
          if (ElementInScope(Scope::TableScope, token_.atom)) {
2347
5.25k
            ClearStackToContext(Scope::TableBodyScope);
2348
5.25k
            open_elements_stack_.Pop();
2349
5.25k
            insertion_mode_ = std::bind(&Parser::InTableIM, this);
2350
5.25k
          }
2351
5.26k
          return true;
2352
4.82k
        }
2353
17.9k
        case Atom::TABLE: {
2354
17.9k
          if (PopUntil(Scope::TableScope, Atom::TBODY, Atom::THEAD,
2355
17.9k
                       Atom::TFOOT)) {
2356
17.9k
            insertion_mode_ = std::bind(&Parser::InTableIM, this);
2357
17.9k
            return false;
2358
17.9k
          }
2359
          // Ignore the token.
2360
0
          return true;
2361
17.9k
        }
2362
132
        case Atom::BODY:
2363
132
        case Atom::CAPTION:
2364
198
        case Atom::COL:
2365
198
        case Atom::COLGROUP:
2366
1.73k
        case Atom::HTML:
2367
1.95k
        case Atom::TD:
2368
2.57k
        case Atom::TH:
2369
3.56k
        case Atom::TR: {
2370
          // Ignore the token.
2371
3.56k
          return true;
2372
2.57k
        }
2373
592
        default:
2374
592
          break;
2375
27.4k
      }
2376
592
      break;
2377
27.4k
    }
2378
592
    case TokenType::COMMENT_TOKEN: {
2379
559
      Node* node = document_->NewNode(NodeType::COMMENT_NODE);
2380
559
      node->SetManufactured(token_.is_manufactured);
2381
559
      if (record_node_offsets_) {
2382
0
        node->line_col_in_html_src_ = token_.line_col_in_html_src;
2383
0
      }
2384
559
      node->data_ = token_.data;
2385
559
      AddChild(node);
2386
559
      return true;
2387
27.4k
    }
2388
6.20k
    default:
2389
6.20k
      break;
2390
2.04M
  }
2391
2392
14.7k
  return InTableIM();
2393
2.04M
}  // Parser::InTableBodyIM.
2394
2395
// Section 12.2.6.4.14.
2396
1.98M
bool Parser::InRowIM() {
2397
1.98M
  switch (token_.token_type) {
2398
1.95M
    case TokenType::START_TAG_TOKEN: {
2399
1.95M
      switch (token_.atom) {
2400
430k
        case Atom::TD:
2401
1.06M
        case Atom::TH: {
2402
1.06M
          ClearStackToContext(Scope::TableRowScope);
2403
1.06M
          AddElement();
2404
1.06M
          active_formatting_elements_stack_.Push(scope_marker_);
2405
1.06M
          insertion_mode_ = std::bind(&Parser::InCellIM, this);
2406
1.06M
          return true;
2407
430k
        }
2408
0
        case Atom::CAPTION:
2409
1.82k
        case Atom::COL:
2410
1.82k
        case Atom::COLGROUP:
2411
1.85k
        case Atom::TBODY:
2412
1.89k
        case Atom::TFOOT:
2413
2.31k
        case Atom::THEAD:
2414
6.44k
        case Atom::TR: {
2415
6.44k
          if (PopUntil(Scope::TableScope, Atom::TR)) {
2416
6.44k
            insertion_mode_ = std::bind(&Parser::InTableBodyIM, this);
2417
6.44k
            return false;
2418
6.44k
          }
2419
          // Ignore the token.
2420
0
          return true;
2421
0
          break;
2422
6.44k
        }
2423
883k
        default:
2424
883k
          break;
2425
1.95M
      }
2426
883k
      break;
2427
1.95M
    }
2428
883k
    case TokenType::END_TAG_TOKEN: {
2429
31.5k
      switch (token_.atom) {
2430
2.37k
        case Atom::TR: {
2431
2.37k
          if (PopUntil(Scope::TableScope, Atom::TR)) {
2432
2.37k
            insertion_mode_ = std::bind(&Parser::InTableBodyIM, this);
2433
2.37k
          }
2434
          // Ignore the token.
2435
2.37k
          return true;
2436
0
        }
2437
17.9k
        case Atom::TABLE: {
2438
17.9k
          if (PopUntil(Scope::TableScope, Atom::TR)) {
2439
17.9k
            insertion_mode_ = std::bind(&Parser::InTableBodyIM, this);
2440
17.9k
            return false;
2441
17.9k
          }
2442
          // Ignore the token.
2443
0
          return true;
2444
17.9k
        }
2445
453
        case Atom::TBODY:
2446
487
        case Atom::TFOOT:
2447
787
        case Atom::THEAD: {
2448
787
          if (ElementInScope(Scope::TableScope, token_.atom)) {
2449
686
            ParseImpliedToken(TokenType::END_TAG_TOKEN, Atom::TR,
2450
686
                              AtomUtil::ToString(Atom::TR));
2451
686
            return false;
2452
686
          }
2453
          // Ignore the token.
2454
101
          return true;
2455
787
        }
2456
238
        case Atom::BODY:
2457
238
        case Atom::CAPTION:
2458
634
        case Atom::COL:
2459
634
        case Atom::COLGROUP:
2460
891
        case Atom::HTML:
2461
1.31k
        case Atom::TD:
2462
1.63k
        case Atom::TH: {
2463
          // Ignore the token.
2464
1.63k
          return true;
2465
1.31k
        }
2466
8.76k
        default:
2467
8.76k
          break;
2468
31.5k
      }
2469
8.76k
      break;
2470
31.5k
    }
2471
8.76k
    default:
2472
7.01k
      break;
2473
1.98M
  }
2474
2475
899k
  return InTableIM();
2476
1.98M
}  // Parser::InRowIM.
2477
2478
// Section 12.2.6.4.15.
2479
3.81M
bool Parser::InCellIM() {
2480
3.81M
  switch (token_.token_type) {
2481
3.73M
    case TokenType::START_TAG_TOKEN: {
2482
3.73M
      switch (token_.atom) {
2483
0
        case Atom::CAPTION:
2484
1.51k
        case Atom::COL:
2485
1.51k
        case Atom::COLGROUP:
2486
1.53k
        case Atom::TBODY:
2487
7.30k
        case Atom::TD:
2488
7.32k
        case Atom::TFOOT:
2489
65.3k
        case Atom::TH:
2490
65.3k
        case Atom::THEAD:
2491
66.3k
        case Atom::TR: {
2492
66.3k
          if (PopUntil(Scope::TableScope, Atom::TD, Atom::TH)) {
2493
            // Close the cell and reprocess.
2494
66.3k
            ClearActiveFormattingElements();
2495
66.3k
            insertion_mode_ = std::bind(&Parser::InRowIM, this);
2496
66.3k
            return false;
2497
66.3k
          }
2498
          // Ignore the token.
2499
0
          return true;
2500
66.3k
        }
2501
0
        case Atom::SELECT: {
2502
0
          ReconstructActiveFormattingElements();
2503
0
          AddElement();
2504
0
          frameset_ok_ = false;
2505
0
          insertion_mode_ = std::bind(&Parser::InSelectInTableIM, this);
2506
0
          return true;
2507
66.3k
        }
2508
3.67M
        default:
2509
3.67M
          break;
2510
3.73M
      }
2511
3.67M
      break;
2512
3.73M
    }
2513
3.67M
    case TokenType::END_TAG_TOKEN: {
2514
24.2k
      switch (token_.atom) {
2515
579
        case Atom::TD:
2516
1.99k
        case Atom::TH: {
2517
1.99k
          if (!PopUntil(Scope::TableScope, token_.atom)) {
2518
            // Ignore the token.
2519
386
            return true;
2520
386
          }
2521
1.60k
          ClearActiveFormattingElements();
2522
1.60k
          insertion_mode_ = std::bind(&Parser::InRowIM, this);
2523
1.60k
          return true;
2524
1.99k
        }
2525
256
        case Atom::BODY:
2526
256
        case Atom::CAPTION:
2527
453
        case Atom::COL:
2528
453
        case Atom::COLGROUP:
2529
487
        case Atom::HTML: {
2530
          // Ignore the token.
2531
487
          return true;
2532
453
        }
2533
17.9k
        case Atom::TABLE:
2534
18.3k
        case Atom::TBODY:
2535
18.3k
        case Atom::TFOOT:
2536
18.5k
        case Atom::THEAD:
2537
19.4k
        case Atom::TR: {
2538
19.4k
          if (!ElementInScope(Scope::TableScope, token_.atom)) {
2539
            // Ignore the token.
2540
260
            return true;
2541
260
          }
2542
          // Close the cell and reprocess.
2543
19.2k
          if (PopUntil(Scope::TableScope, Atom::TD, Atom::TH)) {
2544
19.2k
            ClearActiveFormattingElements();
2545
19.2k
          }
2546
19.2k
          insertion_mode_ = std::bind(&Parser::InRowIM, this);
2547
19.2k
          return false;
2548
19.4k
        }
2549
2.25k
        default:
2550
2.25k
          break;
2551
24.2k
      }
2552
2.25k
      break;
2553
24.2k
    }
2554
49.5k
    default:
2555
49.5k
      break;
2556
3.81M
  }
2557
3.72M
  return InBodyIM();
2558
3.81M
}  // Parser::InCellIM.
2559
2560
// Section 12.2.6.4.16.
2561
0
bool Parser::InSelectIM() {
2562
0
  switch (token_.token_type) {
2563
0
    case TokenType::TEXT_TOKEN: {
2564
0
      Strings::ReplaceAny(&token_.data, Strings::kNullChar, "");
2565
0
      AddText(token_.data);
2566
0
      break;
2567
0
    }
2568
0
    case TokenType::START_TAG_TOKEN: {
2569
0
      switch (token_.atom) {
2570
0
        case Atom::HTML: {
2571
0
          return InBodyIM();
2572
0
        }
2573
0
        case Atom::OPTION: {
2574
0
          if (top()->atom_ == Atom::OPTION) {
2575
0
            open_elements_stack_.Pop();
2576
0
          }
2577
0
          AddElement();
2578
0
          break;
2579
0
        }
2580
0
        case Atom::OPTGROUP: {
2581
0
          if (top()->atom_ == Atom::OPTION) {
2582
0
            open_elements_stack_.Pop();
2583
0
          }
2584
0
          if (top()->atom_ == Atom::OPTGROUP) {
2585
0
            open_elements_stack_.Pop();
2586
0
          }
2587
0
          AddElement();
2588
0
          break;
2589
0
        }
2590
0
        case Atom::SELECT: {
2591
0
          if (PopUntil(Scope::SelectScope, Atom::SELECT)) {
2592
0
            ResetInsertionMode();
2593
0
          }
2594
          // Ignore the token.
2595
0
          return true;
2596
0
        }
2597
0
        case Atom::INPUT:
2598
0
        case Atom::KEYGEN:
2599
0
        case Atom::TEXTAREA: {
2600
0
          if (ElementInScope(Scope::SelectScope, Atom::SELECT)) {
2601
0
            ParseImpliedToken(TokenType::END_TAG_TOKEN, Atom::SELECT,
2602
0
                              AtomUtil::ToString(Atom::SELECT));
2603
0
            return false;
2604
0
          }
2605
          // In order to properly ignore <textarea>, we need to change the
2606
          // tokenizer mode.
2607
0
          tokenizer_->NextIsNotRawText();
2608
          // Ignore the token.
2609
0
          return true;
2610
0
        }
2611
0
        case Atom::SCRIPT:
2612
0
        case Atom::TEMPLATE: {
2613
0
          return InHeadIM();
2614
0
        }
2615
0
        case Atom::IFRAME:
2616
0
        case Atom::NOEMBED:
2617
0
        case Atom::NOFRAMES:
2618
0
        case Atom::NOSCRIPT:
2619
0
        case Atom::PLAINTEXT:
2620
0
        case Atom::STYLE:
2621
0
        case Atom::TITLE:
2622
0
        case Atom::XMP: {
2623
          // Don't let the tokenizer go into raw text mode when there are raw
2624
          // tags to be ignored. These tags should be ignored from the tokenizer
2625
          // properly.
2626
0
          tokenizer_->NextIsNotRawText();
2627
          // Ignore the token.
2628
0
          return true;
2629
0
        }
2630
0
        default:
2631
0
          break;
2632
0
      }
2633
0
      break;
2634
0
    }
2635
0
    case TokenType::END_TAG_TOKEN: {
2636
0
      switch (token_.atom) {
2637
0
        case Atom::OPTION: {
2638
0
          if (top()->atom_ == Atom::OPTION) {
2639
0
            open_elements_stack_.Pop();
2640
0
          }
2641
0
          break;
2642
0
        }
2643
0
        case Atom::OPTGROUP: {
2644
0
          int i = open_elements_stack_.size() - 1;
2645
0
          Node* node = open_elements_stack_.at(i);
2646
0
          if (node && node->atom_ == Atom::OPTION) {
2647
0
            i--;
2648
0
          }
2649
0
          node = open_elements_stack_.at(i);
2650
0
          if (node && node->atom_ == Atom::OPTGROUP) {
2651
0
            open_elements_stack_.Pop(open_elements_stack_.size() - i);
2652
0
          }
2653
0
          break;
2654
0
        }
2655
0
        case Atom::SELECT: {
2656
0
          if (!PopUntil(Scope::SelectScope, Atom::SELECT)) {
2657
            // Ignore the token.
2658
0
            return true;
2659
0
          }
2660
0
          ResetInsertionMode();
2661
0
          break;
2662
0
        }
2663
0
        case Atom::TEMPLATE: {
2664
0
          return InHeadIM();
2665
0
        }
2666
0
        default:
2667
0
          break;
2668
0
      }
2669
0
      break;
2670
0
    }
2671
0
    case TokenType::COMMENT_TOKEN: {
2672
0
      Node* node = document_->NewNode(NodeType::COMMENT_NODE);
2673
0
      node->SetManufactured(token_.is_manufactured);
2674
0
      if (record_node_offsets_) {
2675
0
        node->line_col_in_html_src_ = token_.line_col_in_html_src;
2676
0
      }
2677
0
      node->data_ = token_.data;
2678
0
      AddChild(node);
2679
0
      break;
2680
0
    }
2681
0
    case TokenType::DOCTYPE_TOKEN: {
2682
      // Ignore the token.
2683
0
      return true;
2684
0
    }
2685
0
    case TokenType::ERROR_TOKEN: {
2686
0
      return InBodyIM();
2687
0
    }
2688
0
    default:
2689
0
      break;
2690
0
  }
2691
2692
0
  return true;
2693
0
}  // Parser::InSelectIM.
2694
2695
// Section 12.2.6.4.17.
2696
0
bool Parser::InSelectInTableIM() {
2697
0
  switch (token_.token_type) {
2698
0
    case TokenType::START_TAG_TOKEN:
2699
0
    case TokenType::END_TAG_TOKEN: {
2700
0
      switch (token_.atom) {
2701
0
        case Atom::CAPTION:
2702
0
        case Atom::TABLE:
2703
0
        case Atom::TBODY:
2704
0
        case Atom::TFOOT:
2705
0
        case Atom::THEAD:
2706
0
        case Atom::TR:
2707
0
        case Atom::TD:
2708
0
        case Atom::TH: {
2709
0
          if (token_.token_type == TokenType::END_TAG_TOKEN &&
2710
0
              !ElementInScope(Scope::TableScope, token_.atom)) {
2711
            // Ignore the token.
2712
0
            return true;
2713
0
          }
2714
          // This is like p.popUntil(selectScope, a.Select), but it also
2715
          // matches <math select>, not just <select>. Matching the MathML
2716
          // tag is arguably incorrect (conceptually), but it mimics what
2717
          // Chromium does.
2718
0
          for (int i = open_elements_stack_.size() - 1; i >= 0; --i) {
2719
0
            if (open_elements_stack_.at(i)->atom_ == Atom::SELECT) {
2720
0
              open_elements_stack_.Pop(open_elements_stack_.size() - i);
2721
0
              break;
2722
0
            }
2723
0
          }
2724
0
          ResetInsertionMode();
2725
0
          return false;
2726
0
        }
2727
0
        default:
2728
0
          break;
2729
0
      }
2730
0
      break;
2731
0
    }
2732
0
    default:
2733
0
      break;
2734
0
  }
2735
2736
0
  return InSelectIM();
2737
0
}  // Parser::InSelectInTableIM.
2738
2739
// Section 12.2.6.4.18.
2740
0
bool Parser::InTemplateIM() {
2741
0
  switch (token_.token_type) {
2742
0
    case TokenType::TEXT_TOKEN:
2743
0
    case TokenType::COMMENT_TOKEN:
2744
0
    case TokenType::DOCTYPE_TOKEN:
2745
0
      return InBodyIM();
2746
0
    case TokenType::START_TAG_TOKEN: {
2747
0
      switch (token_.atom) {
2748
0
        case Atom::BASE:
2749
0
        case Atom::BASEFONT:
2750
0
        case Atom::BGSOUND:
2751
0
        case Atom::LINK:
2752
0
        case Atom::META:
2753
0
        case Atom::NOFRAMES:
2754
0
        case Atom::SCRIPT:
2755
0
        case Atom::STYLE:
2756
0
        case Atom::TEMPLATE:
2757
0
        case Atom::TITLE:
2758
0
          return InHeadIM();
2759
0
        case Atom::CAPTION:
2760
0
        case Atom::COLGROUP:
2761
0
        case Atom::TBODY:
2762
0
        case Atom::TFOOT:
2763
0
        case Atom::THEAD: {
2764
0
          template_stack_.pop_back();
2765
0
          template_stack_.push_back(std::bind(&Parser::InTableIM, this));
2766
0
          insertion_mode_ = std::bind(&Parser::InTableIM, this);
2767
0
          return false;
2768
0
        }
2769
0
        case Atom::COL: {
2770
0
          template_stack_.pop_back();
2771
0
          template_stack_.push_back(std::bind(&Parser::InColumnGroupIM, this));
2772
0
          insertion_mode_ = std::bind(&Parser::InColumnGroupIM, this);
2773
0
          return false;
2774
0
        }
2775
0
        case Atom::TR: {
2776
0
          template_stack_.pop_back();
2777
0
          template_stack_.push_back(std::bind(&Parser::InTableBodyIM, this));
2778
0
          insertion_mode_ = std::bind(&Parser::InTableBodyIM, this);
2779
0
          return false;
2780
0
        }
2781
0
        case Atom::TD:
2782
0
        case Atom::TH: {
2783
0
          template_stack_.pop_back();
2784
0
          template_stack_.push_back(std::bind(&Parser::InRowIM, this));
2785
0
          insertion_mode_ = std::bind(&Parser::InRowIM, this);
2786
0
          return false;
2787
0
        }
2788
0
        default:
2789
0
          template_stack_.pop_back();
2790
0
          template_stack_.push_back(std::bind(&Parser::InBodyIM, this));
2791
0
          insertion_mode_ = std::bind(&Parser::InBodyIM, this);
2792
0
          return false;
2793
0
      }
2794
0
    }
2795
0
    case TokenType::END_TAG_TOKEN: {
2796
0
      switch (token_.atom) {
2797
0
        case Atom::TEMPLATE:
2798
0
          return InHeadIM();
2799
0
        default:
2800
          // Ignore the token.
2801
0
          return true;
2802
0
      }
2803
0
    }
2804
0
    case TokenType::ERROR_TOKEN: {
2805
0
      if (!open_elements_stack_.Contains(Atom::TEMPLATE)) {
2806
        // Ignore the token.
2807
0
        return true;
2808
0
      }
2809
      // TODO: remove this divergence from the HTML5 spec.
2810
      //
2811
      // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
2812
0
      GenerateImpliedEndTags();
2813
0
      for (int i = open_elements_stack_.size() - 1; i >= 0; --i) {
2814
0
        Node* node = open_elements_stack_.at(i);
2815
0
        if (node->name_space_.empty() && node->atom_ == Atom::TEMPLATE) {
2816
0
          open_elements_stack_.Pop(open_elements_stack_.size() - i);
2817
0
          break;
2818
0
        }
2819
0
      }
2820
0
      ClearActiveFormattingElements();
2821
0
      template_stack_.pop_back();
2822
0
      ResetInsertionMode();
2823
0
      return false;
2824
0
    }
2825
0
    default:
2826
0
      break;
2827
0
  }
2828
0
  return false;
2829
0
}  // Parser::InTemplateIM.
2830
2831
// Section 12.2.6.4.19.
2832
16.5k
bool Parser::AfterBodyIM() {
2833
16.5k
  switch (token_.token_type) {
2834
20
    case TokenType::ERROR_TOKEN:
2835
      // Stop parsing.
2836
20
      return true;
2837
3.40k
    case TokenType::TEXT_TOKEN:
2838
      // https://www.w3.org/TR/2011/WD-html5-20110113/tokenization.html#parsing-main-afterbody
2839
3.40k
      if (token_.data.find_first_not_of(Strings::kWhitespace) ==
2840
3.40k
          std::string::npos) {
2841
        // It was all whitesapce.
2842
136
        return InBodyIM();
2843
136
      }
2844
3.27k
      break;
2845
3.27k
    case TokenType::START_TAG_TOKEN:
2846
2.28k
      if (token_.atom == Atom::HTML) {
2847
0
        return InBodyIM();
2848
0
      }
2849
2.28k
      break;
2850
8.23k
    case TokenType::END_TAG_TOKEN:
2851
8.23k
      if (token_.atom == Atom::HTML) {
2852
6.08k
        if (!fragment_) {
2853
6.08k
          insertion_mode_ = std::bind(&Parser::AfterAfterBodyIM, this);
2854
6.08k
        }
2855
6.08k
        return true;
2856
6.08k
      }
2857
2.15k
      break;
2858
2.63k
    case TokenType::COMMENT_TOKEN: {
2859
      // The comment is attached to the <html> element.
2860
2.63k
      CHECK(open_elements_stack_.size() > 0 &&
2861
2.63k
            open_elements_stack_.at(0)->atom_ == Atom::HTML)
2862
2.63k
          << "html: bad parser state: <html> element not found, in the "
2863
2.63k
             "after-body insertion mode";
2864
2.63k
      Node* node = document_->NewNode(NodeType::COMMENT_NODE);
2865
2.63k
      node->SetManufactured(token_.is_manufactured);
2866
2.63k
      if (record_node_offsets_) {
2867
0
        node->line_col_in_html_src_ = token_.line_col_in_html_src;
2868
0
      }
2869
2.63k
      node->data_ = token_.data;
2870
2.63k
      open_elements_stack_.at(0)->AppendChild(node);
2871
2.63k
      return true;
2872
8.23k
    }
2873
0
    default:
2874
0
      break;
2875
16.5k
  }
2876
2877
7.70k
  insertion_mode_ = std::bind(&Parser::InBodyIM, this);
2878
7.70k
  return false;
2879
16.5k
}  // Parser::AfterBodyIM.
2880
2881
// Section 12.2.6.4.20.
2882
0
bool Parser::InFramesetIM() {
2883
0
  switch (token_.token_type) {
2884
0
    case TokenType::COMMENT_TOKEN: {
2885
0
      Node* node = document_->NewNode(NodeType::COMMENT_NODE);
2886
0
      node->SetManufactured(token_.is_manufactured);
2887
0
      if (record_node_offsets_) {
2888
0
        node->line_col_in_html_src_ = token_.line_col_in_html_src;
2889
0
      }
2890
0
      node->data_ = token_.data;
2891
0
      AddChild(node);
2892
0
      break;
2893
0
    }
2894
0
    case TokenType::TEXT_TOKEN: {
2895
0
      std::string whitespace_only = ExtractWhitespace(token_.data);
2896
0
      if (!whitespace_only.empty()) AddText(whitespace_only);
2897
0
      break;
2898
0
    }
2899
0
    case TokenType::START_TAG_TOKEN:
2900
0
      switch (token_.atom) {
2901
0
        case Atom::HTML:
2902
0
          return InBodyIM();
2903
0
        case Atom::FRAMESET:
2904
0
          AddElement();
2905
0
          break;
2906
0
        case Atom::FRAME:
2907
0
          AddElement();
2908
0
          open_elements_stack_.Pop();
2909
0
          AcknowledgeSelfClosingTag();
2910
0
          break;
2911
0
        case Atom::NOFRAMES:
2912
0
          return InHeadIM();
2913
0
        default:
2914
0
          break;
2915
0
      }
2916
0
      break;
2917
0
    case TokenType::END_TAG_TOKEN:
2918
0
      switch (token_.atom) {
2919
0
        case Atom::FRAMESET:
2920
0
          if (open_elements_stack_.Top()->atom_ != Atom::HTML) {
2921
0
            open_elements_stack_.Pop();
2922
0
            if (open_elements_stack_.Top()->atom_ != Atom::FRAMESET) {
2923
0
              insertion_mode_ = std::bind(&Parser::AfterFramesetIM, this);
2924
0
              return true;
2925
0
            }
2926
0
          }
2927
0
          break;
2928
0
        default:
2929
0
          break;
2930
0
      }
2931
0
      break;
2932
0
    default:
2933
      // Ignore the token.
2934
0
      break;
2935
0
  }
2936
0
  return true;
2937
0
}  // Parser::InFramesetIM.
2938
2939
// Section 12.2.6.4.21.
2940
0
bool Parser::AfterFramesetIM() {
2941
0
  switch (token_.token_type) {
2942
0
    case TokenType::COMMENT_TOKEN: {
2943
0
      Node* node = document_->NewNode(NodeType::COMMENT_NODE);
2944
0
      node->SetManufactured(token_.is_manufactured);
2945
0
      if (record_node_offsets_) {
2946
0
        node->line_col_in_html_src_ = token_.line_col_in_html_src;
2947
0
      }
2948
0
      node->data_ = token_.data;
2949
0
      AddChild(node);
2950
0
      break;
2951
0
    }
2952
0
    case TokenType::TEXT_TOKEN: {
2953
0
      std::string whitespace_only = ExtractWhitespace(token_.data);
2954
0
      if (!whitespace_only.empty()) AddText(whitespace_only);
2955
0
      break;
2956
0
    }
2957
0
    case TokenType::START_TAG_TOKEN:
2958
0
      switch (token_.atom) {
2959
0
        case Atom::HTML:
2960
0
          return InBodyIM();
2961
0
        case Atom::NOFRAMES:
2962
0
          return InHeadIM();
2963
0
        default:
2964
0
          break;
2965
0
      }
2966
0
      break;
2967
0
    case TokenType::END_TAG_TOKEN:
2968
0
      switch (token_.atom) {
2969
0
        case Atom::HTML:
2970
0
          insertion_mode_ = std::bind(&Parser::AfterAfterFramesetIM, this);
2971
0
          return true;
2972
0
        default:
2973
0
          break;
2974
0
      }
2975
0
      break;
2976
0
    default:
2977
      // Ignore the token.
2978
0
      break;
2979
0
  }
2980
0
  return true;
2981
0
}  // Parser::AfterFramesetIM.
2982
2983
// Section 12.2.6.4.22.
2984
7.62k
bool Parser::AfterAfterBodyIM() {
2985
7.62k
  switch (token_.token_type) {
2986
68
    case TokenType::ERROR_TOKEN:
2987
      // Stop parsing.
2988
68
      return true;
2989
673
    case TokenType::TEXT_TOKEN: {
2990
673
      if (token_.data.find_first_not_of(Strings::kWhitespace) ==
2991
673
          std::string::npos) {
2992
179
        return InBodyIM();
2993
179
      }
2994
494
      break;
2995
673
    }
2996
1.27k
    case TokenType::START_TAG_TOKEN:
2997
1.27k
      if (token_.atom == Atom::HTML) {
2998
1.00k
        return InBodyIM();
2999
1.00k
      }
3000
275
      break;
3001
358
    case TokenType::COMMENT_TOKEN: {
3002
358
      Node* node = document_->NewNode(NodeType::COMMENT_NODE);
3003
358
      node->SetManufactured(token_.is_manufactured);
3004
358
      if (record_node_offsets_) {
3005
0
        node->line_col_in_html_src_ = token_.line_col_in_html_src;
3006
0
      }
3007
358
      node->data_ = token_.data;
3008
358
      document_->root_node_->AppendChild(node);
3009
358
      return true;
3010
1.27k
    }
3011
0
    case TokenType::DOCTYPE_TOKEN:
3012
0
      return InBodyIM();
3013
5.24k
    default:
3014
5.24k
      break;
3015
7.62k
  }
3016
3017
6.01k
  insertion_mode_ = std::bind(&Parser::InBodyIM, this);
3018
6.01k
  return false;
3019
7.62k
}  // Parser::AfterAfterBodyIM.
3020
3021
// Section 12.2.6.4.23.
3022
0
bool Parser::AfterAfterFramesetIM() {
3023
0
  switch (token_.token_type) {
3024
0
    case TokenType::COMMENT_TOKEN: {
3025
0
      Node* node = document_->NewNode(NodeType::COMMENT_NODE);
3026
0
      node->SetManufactured(token_.is_manufactured);
3027
0
      if (record_node_offsets_) {
3028
0
        node->line_col_in_html_src_ = token_.line_col_in_html_src;
3029
0
      }
3030
0
      node->data_ = token_.data;
3031
0
      document_->root_node_->AppendChild(node);
3032
0
      break;
3033
0
    }
3034
0
    case TokenType::TEXT_TOKEN: {
3035
0
      std::string whitespace_only = ExtractWhitespace(token_.data);
3036
0
      if (!whitespace_only.empty()) {
3037
0
        token_.data = whitespace_only;
3038
0
        return InBodyIM();
3039
0
      }
3040
0
      break;
3041
0
    }
3042
0
    case TokenType::START_TAG_TOKEN:
3043
0
      switch (token_.atom) {
3044
0
        case Atom::HTML:
3045
0
          return InBodyIM();
3046
0
        case Atom::NOFRAMES:
3047
0
          return InHeadIM();
3048
0
        default:
3049
0
          break;
3050
0
      }
3051
0
      break;
3052
0
    case TokenType::DOCTYPE_TOKEN:
3053
0
      return InBodyIM();
3054
0
    default:
3055
0
      break;
3056
0
  }
3057
0
  return true;
3058
0
}  // Parser::AfterAfterFramesetIM.
3059
3060
20.0M
Node* Parser::AdjustedCurrentNode() {
3061
20.0M
  if (open_elements_stack_.size() == 1 && fragment_ && context_node_)
3062
0
    return context_node_;
3063
20.0M
  return open_elements_stack_.Top();
3064
20.0M
}
3065
3066
// Section 12.2.6.5.
3067
377k
bool Parser::ParseForeignContent() {
3068
377k
  switch (token_.token_type) {
3069
6.56k
    case TokenType::TEXT_TOKEN: {
3070
6.56k
      if (frameset_ok_) {
3071
225
        frameset_ok_ = (token_.data.find_first_not_of(
3072
225
                            Strings::kWhitespaceOrNull) == std::string::npos);
3073
225
      }
3074
      // Replaces null char with \ufffd replacement character.
3075
6.56k
      Strings::ReplaceAny(&token_.data, Strings::kNullChar,
3076
6.56k
                          Strings::kNullReplacementChar);
3077
6.56k
      AddText(token_.data);
3078
6.56k
      break;
3079
0
    }
3080
634
    case TokenType::COMMENT_TOKEN: {
3081
634
      Node* node = document_->NewNode(NodeType::COMMENT_NODE);
3082
634
      node->SetManufactured(token_.is_manufactured);
3083
634
      if (record_node_offsets_) {
3084
0
        node->line_col_in_html_src_ = token_.line_col_in_html_src;
3085
0
      }
3086
634
      node->data_ = token_.data;
3087
634
      AddChild(node);
3088
634
      break;
3089
0
    }
3090
355k
    case TokenType::START_TAG_TOKEN: {
3091
355k
      if (!fragment_) {
3092
355k
        auto breaktout_tag = std::find(std::begin(kBreakoutTags),
3093
355k
                                       std::end(kBreakoutTags), token_.atom);
3094
355k
        bool is_breakout_tag = breaktout_tag != std::end(kBreakoutTags);
3095
3096
355k
        if (token_.atom == Atom::FONT) {
3097
2.63k
          for (auto& attr : token_.attributes) {
3098
2.63k
            std::string key = attr.key;
3099
2.63k
            if (key == "color" || key == "face" || key == "size") {
3100
0
              is_breakout_tag = true;
3101
0
              break;
3102
0
            }
3103
2.63k
          }
3104
800
        }
3105
355k
        if (is_breakout_tag) {
3106
46.4k
          for (int i = open_elements_stack_.size() - 1; i >= 0; --i) {
3107
46.4k
            Node* node = open_elements_stack_.at(i);
3108
46.4k
            if (node->name_space_.empty() || HtmlIntegrationPoint(*node) ||
3109
46.4k
                MathMLTextIntegrationPoint(*node)) {
3110
995
              open_elements_stack_.Pop(open_elements_stack_.size() - i - 1);
3111
995
              break;
3112
995
            }
3113
46.4k
          }
3114
995
          return false;
3115
995
        }
3116
355k
      }
3117
3118
354k
      Node* current = AdjustedCurrentNode();
3119
354k
      if (current->name_space_ == "math") {
3120
298k
        AdjustMathMLAttributeNames(&token_.attributes);
3121
298k
      } else if (current->name_space_ == "svg") {
3122
2.00M
        for (auto [name, adjusted] : kSvgTagNameAdjustments) {
3123
2.00M
          if (name == token_.atom) {
3124
0
            token_.atom = adjusted;
3125
0
          }
3126
2.00M
        }
3127
55.7k
        AdjustSVGAttributeNames(&token_.attributes);
3128
55.7k
      } else {
3129
0
        CHECK(false) << "html: bad parser state: unexpected namespace";
3130
0
      }
3131
3132
354k
      AdjustForeignAttributes(&token_.attributes);
3133
354k
      auto& ns = current->name_space_;
3134
354k
      AddElement();
3135
354k
      top()->name_space_ = ns;
3136
354k
      if (!ns.empty()) {
3137
        // Don't let the tokenizer go into raw text mode in foreign content.
3138
        // (e.g. in an SVG <title> tag).
3139
354k
        tokenizer_->NextIsNotRawText();
3140
354k
      }
3141
354k
      if (has_self_closing_token_) {
3142
2.76k
        open_elements_stack_.Pop();
3143
2.76k
        AcknowledgeSelfClosingTag();
3144
2.76k
      }
3145
354k
      break;
3146
355k
    }
3147
15.1k
    case TokenType::END_TAG_TOKEN:
3148
67.9k
      for (int i = open_elements_stack_.size() - 1; i >= 0; --i) {
3149
67.9k
        if (open_elements_stack_.at(i)->name_space_.empty()) {
3150
14.5k
          return insertion_mode_();
3151
14.5k
        }
3152
3153
53.4k
        auto sn = open_elements_stack_.at(i);
3154
53.4k
        auto node_data = sn->atom_ != Atom::UNKNOWN
3155
53.4k
                             ? AtomUtil::ToString(sn->atom_)
3156
53.4k
                             : sn->data_;
3157
53.4k
        auto token_data = token_.atom != Atom::UNKNOWN
3158
53.4k
                              ? AtomUtil::ToString(token_.atom)
3159
53.4k
                              : token_.data;
3160
3161
53.4k
        if (Strings::EqualFold(node_data, token_data)) {
3162
680
          open_elements_stack_.Pop(open_elements_stack_.size() - i);
3163
680
          break;
3164
680
        }
3165
53.4k
      }
3166
680
      return true;
3167
0
    default:
3168
      // Ignore the token.
3169
0
      break;
3170
377k
  }
3171
361k
  return true;
3172
377k
}  // Parser::ParseForeignContent.
3173
3174
// Section 12.2.6.
3175
19.7M
bool Parser::InForeignContent() {
3176
19.7M
  if (open_elements_stack_.size() == 0) return false;
3177
3178
19.6M
  Node* node = AdjustedCurrentNode();
3179
19.6M
  if (node->name_space_.empty()) return false;
3180
383k
  Atom token_atom = token_.atom;
3181
383k
  TokenType token_type = token_.token_type;
3182
383k
  if (MathMLTextIntegrationPoint(*node)) {
3183
16.7k
    if (token_type == TokenType::START_TAG_TOKEN &&
3184
16.7k
        token_atom != Atom::MGLYPH && token_atom != Atom::MALIGNMARK) {
3185
3.55k
      return false;
3186
3.55k
    }
3187
13.1k
    if (token_type == TokenType::TEXT_TOKEN) {
3188
1.61k
      return false;
3189
1.61k
    }
3190
13.1k
  }
3191
3192
378k
  if (node->name_space_ == "math" && node->atom_ == Atom::ANNOTATION_XML &&
3193
378k
      token_type == TokenType::START_TAG_TOKEN && token_atom == Atom::SVG) {
3194
0
    return false;
3195
0
  }
3196
3197
378k
  if (HtmlIntegrationPoint(*node) &&
3198
378k
      (token_type == TokenType::START_TAG_TOKEN ||
3199
133
       token_type == TokenType::TEXT_TOKEN)) {
3200
25
    return false;
3201
25
  }
3202
3203
378k
  if (token_type == TokenType::ERROR_TOKEN) {
3204
1.07k
    return false;
3205
1.07k
  }
3206
3207
377k
  return true;
3208
378k
}  // Parser::InForeignContent.
3209
3210
// Section 12.2.6.2.
3211
6.13k
void Parser::ParseGenericRawTextElement() {
3212
6.13k
  AddElement();
3213
6.13k
  original_insertion_mode_ = insertion_mode_;
3214
6.13k
  insertion_mode_ = std::bind(&Parser::TextIM, this);
3215
6.13k
}
3216
3217
void Parser::ParseImpliedToken(TokenType token_type, Atom atom,
3218
2.04M
                               const std::string& data) {
3219
  // Copy original token.
3220
2.04M
  Token real_token = {.token_type = token_.token_type,
3221
2.04M
                      .atom = token_.atom,
3222
2.04M
                      .data = token_.data,
3223
2.04M
                      .line_col_in_html_src = token_.line_col_in_html_src,
3224
2.04M
                      .attributes = token_.attributes};
3225
2.04M
  bool self_closing = has_self_closing_token_;
3226
  // Create implied tokens.
3227
2.04M
  token_ = {.token_type = token_type,
3228
2.04M
            .atom = atom,
3229
2.04M
            .data = data,
3230
            // For reporting purposes implied tokens are assumed to be parsed at
3231
            // the current tag location.
3232
2.04M
            .line_col_in_html_src = token_.line_col_in_html_src,
3233
2.04M
            .attributes = {}};
3234
2.04M
  has_self_closing_token_ = false;
3235
3236
  // Accounting for manufactured tags.
3237
2.04M
  if (token_type == TokenType::START_TAG_TOKEN) {
3238
2.02M
    switch (atom) {
3239
11.4k
      case Atom::HTML:
3240
11.4k
        document_->metadata_.has_manufactured_html = true;
3241
11.4k
        break;
3242
11.4k
      case Atom::HEAD:
3243
11.4k
        document_->metadata_.has_manufactured_head = true;
3244
11.4k
        break;
3245
10.9k
      case Atom::BODY:
3246
10.9k
        document_->metadata_.has_manufactured_body = true;
3247
10.9k
        break;
3248
1.99M
      default:
3249
1.99M
        break;
3250
2.02M
    }
3251
2.02M
  }
3252
3253
2.04M
  ParseCurrentToken();
3254
  // Restore original token.
3255
2.04M
  token_ = {.token_type = real_token.token_type,
3256
2.04M
            .atom = real_token.atom,
3257
2.04M
            .data = real_token.data,
3258
2.04M
            .line_col_in_html_src = token_.line_col_in_html_src,
3259
2.04M
            .attributes = real_token.attributes};
3260
2.04M
  has_self_closing_token_ = self_closing;
3261
2.04M
}  // Parser::ParseImpliedToken.
3262
3263
17.3M
void Parser::ParseCurrentToken() {
3264
17.3M
  if (token_.token_type == TokenType::SELF_CLOSING_TAG_TOKEN) {
3265
10.7k
    has_self_closing_token_ = true;
3266
10.7k
    token_.token_type = TokenType::START_TAG_TOKEN;
3267
10.7k
  }
3268
3269
17.3M
  bool consumed = false;
3270
3271
37.0M
  while (!consumed) {
3272
19.7M
    if (InForeignContent()) {
3273
377k
      consumed = ParseForeignContent();
3274
19.3M
    } else {
3275
19.3M
      consumed = insertion_mode_();
3276
19.3M
    }
3277
19.7M
  }
3278
3279
17.3M
  if (has_self_closing_token_) {
3280
    // This is a parse error, but ignore it.
3281
7.72k
    has_self_closing_token_ = false;
3282
7.72k
  }
3283
17.3M
}  // Parser::ParseCurrentToken.
3284
3285
26.8k
void Parser::CopyAttributes(Node* node, Token token) const {
3286
26.8k
  if (token.attributes.empty()) return;
3287
14.4k
  std::set<std::string> attr_keys;
3288
14.4k
  std::transform(node->attributes_.begin(), node->attributes_.end(),
3289
14.4k
                 std::inserter(attr_keys, attr_keys.begin()),
3290
117k
                 [](const Attribute& attr) -> std::string { return attr.key; });
3291
34.7k
  for (const Attribute& attr : token.attributes) {
3292
34.7k
    if (attr_keys.find(attr.key) == attr_keys.end()) {
3293
4.41k
      node->attributes_.push_back(attr);
3294
4.41k
      attr_keys.insert(attr.key);
3295
4.41k
    }
3296
34.7k
  }
3297
14.4k
}  // Parser::CopyAttributes.
3298
3299
16.9k
void Parser::RecordBaseURLMetadata(Node* base_node) {
3300
16.9k
  if (base_node->Type() != NodeType::ELEMENT_NODE ||
3301
16.9k
      base_node->DataAtom() != Atom::BASE) return;
3302
3303
17.1k
  for (auto& attr : base_node->Attributes()) {
3304
17.1k
    if (Strings::EqualFold(attr.key, "href")) {
3305
195
      document_->metadata_.base_url.first = attr.value;
3306
16.9k
    } else if (Strings::EqualFold(attr.key, "target")) {
3307
0
      document_->metadata_.base_url.second = attr.value;
3308
0
    }
3309
17.1k
  }
3310
16.8k
}
3311
3312
735
void Parser::RecordLinkRelCanonical(Node* link_node) {
3313
735
  if (link_node->Type() != NodeType::ELEMENT_NODE ||
3314
735
      link_node->DataAtom() != Atom::LINK) return;
3315
3316
603
  bool canonical = false;
3317
603
  std::string canonical_url;
3318
2.08k
  for (auto& attr : link_node->Attributes()) {
3319
2.08k
    if (Strings::EqualFold(attr.key, "rel") &&
3320
2.08k
        Strings::EqualFold(attr.value, "canonical")) {
3321
0
      canonical = true;
3322
2.08k
    } else if (Strings::EqualFold(attr.key, "href")) {
3323
0
      canonical_url = attr.value;
3324
0
    }
3325
2.08k
  }
3326
603
  if (canonical && !canonical_url.empty()) {
3327
0
    document_->metadata_.canonical_url = canonical_url;
3328
0
  }
3329
603
}
3330
3331
namespace {
3332
// Returns only whitespace characters in s.
3333
// <space><space>foo<space>bar<space> returns 4 spaces.
3334
0
std::string ExtractWhitespace(const std::string& s) {
3335
0
  std::string only_whitespaces;
3336
0
  std::copy_if(
3337
0
      s.begin(), s.end(),
3338
0
      only_whitespaces.begin(),  // Unused, populated directly in predicate.
3339
0
      [&only_whitespaces](char c) -> bool {
3340
0
        if (Strings::kWhitespace.find(c) != std::string::npos) {
3341
0
          only_whitespaces.push_back(c);
3342
0
        }
3343
0
        return false;
3344
0
      });
3345
0
  return only_whitespaces;
3346
0
}
3347
3348
}  // namespace
3349
3350
}  // namespace htmlparser