Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/parso/python/diff.py: 41%

1"""

2The diff parser is trying to be a faster version of the normal parser by trying

3to reuse the nodes of a previous pass over the same file. This is also called

4incremental parsing in parser literature. The difference is mostly that with

5incremental parsing you get a range that needs to be reparsed. Here we

6calculate that range ourselves by using difflib. After that it's essentially

7incremental parsing.

9The biggest issue of this approach is that we reuse nodes in a mutable way. The

10intial design and idea is quite problematic for this parser, but it is also

11pretty fast. Measurements showed that just copying nodes in Python is simply

12quite a bit slower (especially for big files >3 kLOC). Therefore we did not

13want to get rid of the mutable nodes, since this is usually not an issue.

15This is by far the hardest software I ever wrote, exactly because the initial

16design is crappy. When you have to account for a lot of mutable state, it

17creates a ton of issues that you would otherwise not have. This file took

18probably 3-6 months to write, which is insane for a parser.

20There is a fuzzer in that helps test this whole thing. Please use it if you

21make changes here. If you run the fuzzer like::

23 test/fuzz_diff_parser.py random -n 100000

25you can be pretty sure that everything is still fine. I sometimes run the

26fuzzer up to 24h to make sure everything is still ok.

27"""

28import re

29import difflib

30from collections import namedtuple

31import logging

33from parso.utils import split_lines

34from parso.python.parser import Parser

35from parso.python.tree import EndMarker

36from parso.python.tokenize import PythonToken, BOM_UTF8_STRING

37from parso.python.token import PythonTokenTypes

39LOG = logging.getLogger(__name__)

40DEBUG_DIFF_PARSER = False

42_INDENTATION_TOKENS = 'INDENT', 'ERROR_DEDENT', 'DEDENT'

44NEWLINE = PythonTokenTypes.NEWLINE

45DEDENT = PythonTokenTypes.DEDENT

46NAME = PythonTokenTypes.NAME

47ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT

48ENDMARKER = PythonTokenTypes.ENDMARKER

51def _is_indentation_error_leaf(node):

52 return node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS

55def _get_previous_leaf_if_indentation(leaf):

56 while leaf and _is_indentation_error_leaf(leaf):

57 leaf = leaf.get_previous_leaf()

58 return leaf

61def _get_next_leaf_if_indentation(leaf):

62 while leaf and _is_indentation_error_leaf(leaf):

63 leaf = leaf.get_next_leaf()

64 return leaf

67def _get_suite_indentation(tree_node):

68 return _get_indentation(tree_node.children[1])

71def _get_indentation(tree_node):

72 return tree_node.start_pos[1]

75def _assert_valid_graph(node):

76 """

77 Checks if the parent/children relationship is correct.

79 This is a check that only runs during debugging/testing.

80 """

81 try:

82 children = node.children

83 except AttributeError:

84 # Ignore INDENT is necessary, because indent/dedent tokens don't

85 # contain value/prefix and are just around, because of the tokenizer.

86 if node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS:

87 assert not node.value

88 assert not node.prefix

89 return

91 # Calculate the content between two start positions.

92 previous_leaf = _get_previous_leaf_if_indentation(node.get_previous_leaf())

93 if previous_leaf is None:

94 content = node.prefix

95 previous_start_pos = 1, 0

96 else:

97 assert previous_leaf.end_pos <= node.start_pos, \

98 (previous_leaf, node)

100 content = previous_leaf.value + node.prefix

101 previous_start_pos = previous_leaf.start_pos

102

103 if '\n' in content or '\r' in content:

104 splitted = split_lines(content)

105 line = previous_start_pos[0] + len(splitted) - 1

106 actual = line, len(splitted[-1])

107 else:

108 actual = previous_start_pos[0], previous_start_pos[1] + len(content)

109 if content.startswith(BOM_UTF8_STRING) \

110 and node.get_start_pos_of_prefix() == (1, 0):

111 # Remove the byte order mark

112 actual = actual[0], actual[1] - 1

113

114 assert node.start_pos == actual, (node.start_pos, actual)

115 else:

116 for child in children:

117 assert child.parent == node, (node, child)

118 _assert_valid_graph(child)

119

120

121def _assert_nodes_are_equal(node1, node2):

122 try:

123 children1 = node1.children

124 except AttributeError:

125 assert not hasattr(node2, 'children'), (node1, node2)

126 assert node1.value == node2.value, (node1, node2)

127 assert node1.type == node2.type, (node1, node2)

128 assert node1.prefix == node2.prefix, (node1, node2)

129 assert node1.start_pos == node2.start_pos, (node1, node2)

130 return

131 else:

132 try:

133 children2 = node2.children

134 except AttributeError:

135 assert False, (node1, node2)

136 for n1, n2 in zip(children1, children2):

137 _assert_nodes_are_equal(n1, n2)

138 assert len(children1) == len(children2), '\n' + repr(children1) + '\n' + repr(children2)

139

140

141def _get_debug_error_message(module, old_lines, new_lines):

142 current_lines = split_lines(module.get_code(), keepends=True)

143 current_diff = difflib.unified_diff(new_lines, current_lines)

144 old_new_diff = difflib.unified_diff(old_lines, new_lines)

145 import parso

146 return (

147 "There's an issue with the diff parser. Please "

148 "report (parso v%s) - Old/New:\n%s\nActual Diff (May be empty):\n%s"

149 % (parso.__version__, ''.join(old_new_diff), ''.join(current_diff))

150 )

151

152

153def _get_last_line(node_or_leaf):

154 last_leaf = node_or_leaf.get_last_leaf()

155 if _ends_with_newline(last_leaf):

156 return last_leaf.start_pos[0]

157 else:

158 n = last_leaf.get_next_leaf()

159 if n.type == 'endmarker' and '\n' in n.prefix:

160 # This is a very special case and has to do with error recovery in

161 # Parso. The problem is basically that there's no newline leaf at

162 # the end sometimes (it's required in the grammar, but not needed

163 # actually before endmarker, CPython just adds a newline to make

164 # source code pass the parser, to account for that Parso error

165 # recovery allows small_stmt instead of simple_stmt).

166 return last_leaf.end_pos[0] + 1

167 return last_leaf.end_pos[0]

168

169

170def _skip_dedent_error_leaves(leaf):

171 while leaf is not None and leaf.type == 'error_leaf' and leaf.token_type == 'DEDENT':

172 leaf = leaf.get_previous_leaf()

173 return leaf

174

175

176def _ends_with_newline(leaf, suffix=''):

177 leaf = _skip_dedent_error_leaves(leaf)

178

179 if leaf.type == 'error_leaf':

180 typ = leaf.token_type.lower()

181 else:

182 typ = leaf.type

183

184 return typ == 'newline' or suffix.endswith('\n') or suffix.endswith('\r')

185

186

187def _flows_finished(pgen_grammar, stack):

188 """

189 if, while, for and try might not be finished, because another part might

190 still be parsed.

191 """

192 for stack_node in stack:

193 if stack_node.nonterminal in ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt'):

194 return False

195 return True

196

197

198def _func_or_class_has_suite(node):

199 if node.type == 'decorated':

200 node = node.children[-1]

201 if node.type in ('async_funcdef', 'async_stmt'):

202 node = node.children[-1]

203 return node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite'

204

205

206def _suite_or_file_input_is_valid(pgen_grammar, stack):

207 if not _flows_finished(pgen_grammar, stack):

208 return False

209

210 for stack_node in reversed(stack):

211 if stack_node.nonterminal == 'decorator':

212 # A decorator is only valid with the upcoming function.

213 return False

214

215 if stack_node.nonterminal == 'suite':

216 # If only newline is in the suite, the suite is not valid, yet.

217 return len(stack_node.nodes) > 1

218 # Not reaching a suite means that we're dealing with file_input levels

219 # where there's no need for a valid statement in it. It can also be empty.

220 return True

221

222

223def _is_flow_node(node):

224 if node.type == 'async_stmt':

225 node = node.children[1]

226 try:

227 value = node.children[0].value

228 except AttributeError:

229 return False

230 return value in ('if', 'for', 'while', 'try', 'with')

231

232

233class _PositionUpdatingFinished(Exception):

234 pass

235

236

237def _update_positions(nodes, line_offset, last_leaf):

238 for node in nodes:

239 try:

240 children = node.children

241 except AttributeError:

242 # Is a leaf

243 node.line += line_offset

244 if node is last_leaf:

245 raise _PositionUpdatingFinished

246 else:

247 _update_positions(children, line_offset, last_leaf)

248

249

250class DiffParser:

251 """

252 An advanced form of parsing a file faster. Unfortunately comes with huge

253 side effects. It changes the given module.

254 """

255 def __init__(self, pgen_grammar, tokenizer, module):

256 self._pgen_grammar = pgen_grammar

257 self._tokenizer = tokenizer

258 self._module = module

259

260 def _reset(self):

261 self._copy_count = 0

262 self._parser_count = 0

263

264 self._nodes_tree = _NodesTree(self._module)

265

266 def update(self, old_lines, new_lines):

267 '''

268 The algorithm works as follows:

269

270 Equal:

271 - Assure that the start is a newline, otherwise parse until we get

272 one.

273 - Copy from parsed_until_line + 1 to max(i2 + 1)

274 - Make sure that the indentation is correct (e.g. add DEDENT)

275 - Add old and change positions

276 Insert:

277 - Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not

278 much more.

279

280 Returns the new module node.

281 '''

282 LOG.debug('diff parser start')

283 # Reset the used names cache so they get regenerated.

284 self._module._used_names = None

285

286 self._parser_lines_new = new_lines

287

288 self._reset()

289

290 line_length = len(new_lines)

291 sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new)

292 opcodes = sm.get_opcodes()

293 LOG.debug('line_lengths old: %s; new: %s' % (len(old_lines), line_length))

294

295 for operation, i1, i2, j1, j2 in opcodes:

296 LOG.debug('-> code[%s] old[%s:%s] new[%s:%s]',

297 operation, i1 + 1, i2, j1 + 1, j2)

298

299 if j2 == line_length and new_lines[-1] == '':

300 # The empty part after the last newline is not relevant.

301 j2 -= 1

302

303 if operation == 'equal':

304 line_offset = j1 - i1

305 self._copy_from_old_parser(line_offset, i1 + 1, i2, j2)

306 elif operation == 'replace':

307 self._parse(until_line=j2)

308 elif operation == 'insert':

309 self._parse(until_line=j2)

310 else:

311 assert operation == 'delete'

312

313 # With this action all change will finally be applied and we have a

314 # changed module.

315 self._nodes_tree.close()

316

317 if DEBUG_DIFF_PARSER:

318 # If there is reasonable suspicion that the diff parser is not

319 # behaving well, this should be enabled.

320 try:

321 code = ''.join(new_lines)

322 assert self._module.get_code() == code

323 _assert_valid_graph(self._module)

324 without_diff_parser_module = Parser(

325 self._pgen_grammar,

326 error_recovery=True

327 ).parse(self._tokenizer(new_lines))

328 _assert_nodes_are_equal(self._module, without_diff_parser_module)

329 except AssertionError:

330 print(_get_debug_error_message(self._module, old_lines, new_lines))

331 raise

332

333 last_pos = self._module.end_pos[0]

334 if last_pos != line_length:

335 raise Exception(

336 ('(%s != %s) ' % (last_pos, line_length))

337 + _get_debug_error_message(self._module, old_lines, new_lines)

338 )

339 LOG.debug('diff parser end')

340 return self._module

341

342 def _enabled_debugging(self, old_lines, lines_new):

343 if self._module.get_code() != ''.join(lines_new):

344 LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new))

345

346 def _copy_from_old_parser(self, line_offset, start_line_old, until_line_old, until_line_new):

347 last_until_line = -1

348 while until_line_new > self._nodes_tree.parsed_until_line:

349 parsed_until_line_old = self._nodes_tree.parsed_until_line - line_offset

350 line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)

351 if line_stmt is None:

352 # Parse 1 line at least. We don't need more, because we just

353 # want to get into a state where the old parser has statements

354 # again that can be copied (e.g. not lines within parentheses).

355 self._parse(self._nodes_tree.parsed_until_line + 1)

356 else:

357 p_children = line_stmt.parent.children

358 index = p_children.index(line_stmt)

359

360 if start_line_old == 1 \

361 and p_children[0].get_first_leaf().prefix.startswith(BOM_UTF8_STRING):

362 # If there's a BOM in the beginning, just reparse. It's too

363 # complicated to account for it otherwise.

364 copied_nodes = []

365 else:

366 from_ = self._nodes_tree.parsed_until_line + 1

367 copied_nodes = self._nodes_tree.copy_nodes(

368 p_children[index:],

369 until_line_old,

370 line_offset

371 )

372 # Match all the nodes that are in the wanted range.

373 if copied_nodes:

374 self._copy_count += 1

375

376 to = self._nodes_tree.parsed_until_line

377

378 LOG.debug('copy old[%s:%s] new[%s:%s]',

379 copied_nodes[0].start_pos[0],

380 copied_nodes[-1].end_pos[0] - 1, from_, to)

381 else:

382 # We have copied as much as possible (but definitely not too

383 # much). Therefore we just parse a bit more.

384 self._parse(self._nodes_tree.parsed_until_line + 1)

385 # Since there are potential bugs that might loop here endlessly, we

386 # just stop here.

387 assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line

388 last_until_line = self._nodes_tree.parsed_until_line

389

390 def _get_old_line_stmt(self, old_line):

391 leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)

392

393 if _ends_with_newline(leaf):

394 leaf = leaf.get_next_leaf()

395 if leaf.get_start_pos_of_prefix()[0] == old_line:

396 node = leaf

397 while node.parent.type not in ('file_input', 'suite'):

398 node = node.parent

399

400 # Make sure that if only the `else:` line of an if statement is

401 # copied that not the whole thing is going to be copied.

402 if node.start_pos[0] >= old_line:

403 return node

404 # Must be on the same line. Otherwise we need to parse that bit.

405 return None

406

407 def _parse(self, until_line):

408 """

409 Parses at least until the given line, but might just parse more until a

410 valid state is reached.

411 """

412 last_until_line = 0

413 while until_line > self._nodes_tree.parsed_until_line:

414 node = self._try_parse_part(until_line)

415 nodes = node.children

416

417 self._nodes_tree.add_parsed_nodes(nodes, self._keyword_token_indents)

418 if self._replace_tos_indent is not None:

419 self._nodes_tree.indents[-1] = self._replace_tos_indent

420

421 LOG.debug(

422 'parse_part from %s to %s (to %s in part parser)',

423 nodes[0].get_start_pos_of_prefix()[0],

424 self._nodes_tree.parsed_until_line,

425 node.end_pos[0] - 1

426 )

427 # Since the tokenizer sometimes has bugs, we cannot be sure that

428 # this loop terminates. Therefore assert that there's always a

429 # change.

430 assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line

431 last_until_line = self._nodes_tree.parsed_until_line

432

433 def _try_parse_part(self, until_line):

434 """

435 Sets up a normal parser that uses a spezialized tokenizer to only parse

436 until a certain position (or a bit longer if the statement hasn't

437 ended.

438 """

439 self._parser_count += 1

440 # TODO speed up, shouldn't copy the whole list all the time.

441 # memoryview?

442 parsed_until_line = self._nodes_tree.parsed_until_line

443 lines_after = self._parser_lines_new[parsed_until_line:]

444 tokens = self._diff_tokenize(

445 lines_after,

446 until_line,

447 line_offset=parsed_until_line

448 )

449 self._active_parser = Parser(

450 self._pgen_grammar,

451 error_recovery=True

452 )

453 return self._active_parser.parse(tokens=tokens)

454

455 def _diff_tokenize(self, lines, until_line, line_offset=0):

456 was_newline = False

457 indents = self._nodes_tree.indents

458 initial_indentation_count = len(indents)

459

460 tokens = self._tokenizer(

461 lines,

462 start_pos=(line_offset + 1, 0),

463 indents=indents,

464 is_first_token=line_offset == 0,

465 )

466 stack = self._active_parser.stack

467 self._replace_tos_indent = None

468 self._keyword_token_indents = {}

469 # print('start', line_offset + 1, indents)

470 for token in tokens:

471 # print(token, indents)

472 typ = token.type

473 if typ == DEDENT:

474 if len(indents) < initial_indentation_count:

475 # We are done here, only thing that can come now is an

476 # endmarker or another dedented code block.

477 while True:

478 typ, string, start_pos, prefix = token = next(tokens)

479 if typ in (DEDENT, ERROR_DEDENT):

480 if typ == ERROR_DEDENT:

481 # We want to force an error dedent in the next

482 # parser/pass. To make this possible we just

483 # increase the location by one.

484 self._replace_tos_indent = start_pos[1] + 1

485 pass

486 else:

487 break

488

489 if '\n' in prefix or '\r' in prefix:

490 prefix = re.sub(r'[^\n\r]+\Z', '', prefix)

491 else:

492 assert start_pos[1] >= len(prefix), repr(prefix)

493 if start_pos[1] - len(prefix) == 0:

494 prefix = ''

495 yield PythonToken(

496 ENDMARKER, '',

497 start_pos,

498 prefix

499 )

500 break

501 elif typ == NEWLINE and token.start_pos[0] >= until_line:

502 was_newline = True

503 elif was_newline:

504 was_newline = False

505 if len(indents) == initial_indentation_count:

506 # Check if the parser is actually in a valid suite state.

507 if _suite_or_file_input_is_valid(self._pgen_grammar, stack):

508 yield PythonToken(ENDMARKER, '', token.start_pos, '')

509 break

510

511 if typ == NAME and token.string in ('class', 'def'):

512 self._keyword_token_indents[token.start_pos] = list(indents)

513

514 yield token

515

516

517class _NodesTreeNode:

518 _ChildrenGroup = namedtuple(

519 '_ChildrenGroup',

520 'prefix children line_offset last_line_offset_leaf')

521

522 def __init__(self, tree_node, parent=None, indentation=0):

523 self.tree_node = tree_node

524 self._children_groups = []

525 self.parent = parent

526 self._node_children = []

527 self.indentation = indentation

528

529 def finish(self):

530 children = []

531 for prefix, children_part, line_offset, last_line_offset_leaf in self._children_groups:

532 first_leaf = _get_next_leaf_if_indentation(

533 children_part[0].get_first_leaf()

534 )

535

536 first_leaf.prefix = prefix + first_leaf.prefix

537 if line_offset != 0:

538 try:

539 _update_positions(

540 children_part, line_offset, last_line_offset_leaf)

541 except _PositionUpdatingFinished:

542 pass

543 children += children_part

544 self.tree_node.children = children

545 # Reset the parents

546 for node in children:

547 node.parent = self.tree_node

548

549 for node_child in self._node_children:

550 node_child.finish()

551

552 def add_child_node(self, child_node):

553 self._node_children.append(child_node)

554

555 def add_tree_nodes(self, prefix, children, line_offset=0,

556 last_line_offset_leaf=None):

557 if last_line_offset_leaf is None:

558 last_line_offset_leaf = children[-1].get_last_leaf()

559 group = self._ChildrenGroup(

560 prefix, children, line_offset, last_line_offset_leaf

561 )

562 self._children_groups.append(group)

563

564 def get_last_line(self, suffix):

565 line = 0

566 if self._children_groups:

567 children_group = self._children_groups[-1]

568 last_leaf = _get_previous_leaf_if_indentation(

569 children_group.last_line_offset_leaf

570 )

571

572 line = last_leaf.end_pos[0] + children_group.line_offset

573

574 # Newlines end on the next line, which means that they would cover

575 # the next line. That line is not fully parsed at this point.

576 if _ends_with_newline(last_leaf, suffix):

577 line -= 1

578 line += len(split_lines(suffix)) - 1

579

580 if suffix and not suffix.endswith('\n') and not suffix.endswith('\r'):

581 # This is the end of a file (that doesn't end with a newline).

582 line += 1

583

584 if self._node_children:

585 return max(line, self._node_children[-1].get_last_line(suffix))

586 return line

587

588 def __repr__(self):

589 return '<%s: %s>' % (self.__class__.__name__, self.tree_node)

590

591

592class _NodesTree:

593 def __init__(self, module):

594 self._base_node = _NodesTreeNode(module)

595 self._working_stack = [self._base_node]

596 self._module = module

597 self._prefix_remainder = ''

598 self.prefix = ''

599 self.indents = [0]

600

601 @property

602 def parsed_until_line(self):

603 return self._working_stack[-1].get_last_line(self.prefix)

604

605 def _update_insertion_node(self, indentation):

606 for node in reversed(list(self._working_stack)):

607 if node.indentation < indentation or node is self._working_stack[0]:

608 return node

609 self._working_stack.pop()

610

611 def add_parsed_nodes(self, tree_nodes, keyword_token_indents):

612 old_prefix = self.prefix

613 tree_nodes = self._remove_endmarker(tree_nodes)

614 if not tree_nodes:

615 self.prefix = old_prefix + self.prefix

616 return

617

618 assert tree_nodes[0].type != 'newline'

619

620 node = self._update_insertion_node(tree_nodes[0].start_pos[1])

621 assert node.tree_node.type in ('suite', 'file_input')

622 node.add_tree_nodes(old_prefix, tree_nodes)

623 # tos = Top of stack

624 self._update_parsed_node_tos(tree_nodes[-1], keyword_token_indents)

625

626 def _update_parsed_node_tos(self, tree_node, keyword_token_indents):

627 if tree_node.type == 'suite':

628 def_leaf = tree_node.parent.children[0]

629 new_tos = _NodesTreeNode(

630 tree_node,

631 indentation=keyword_token_indents[def_leaf.start_pos][-1],

632 )

633 new_tos.add_tree_nodes('', list(tree_node.children))

634

635 self._working_stack[-1].add_child_node(new_tos)

636 self._working_stack.append(new_tos)

637

638 self._update_parsed_node_tos(tree_node.children[-1], keyword_token_indents)

639 elif _func_or_class_has_suite(tree_node):

640 self._update_parsed_node_tos(tree_node.children[-1], keyword_token_indents)

641

642 def _remove_endmarker(self, tree_nodes):

643 """

644 Helps cleaning up the tree nodes that get inserted.

645 """

646 last_leaf = tree_nodes[-1].get_last_leaf()

647 is_endmarker = last_leaf.type == 'endmarker'

648 self._prefix_remainder = ''

649 if is_endmarker:

650 prefix = last_leaf.prefix

651 separation = max(prefix.rfind('\n'), prefix.rfind('\r'))

652 if separation > -1:

653 # Remove the whitespace part of the prefix after a newline.

654 # That is not relevant if parentheses were opened. Always parse

655 # until the end of a line.

656 last_leaf.prefix, self._prefix_remainder = \

657 last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]

658

659 self.prefix = ''

660

661 if is_endmarker:

662 self.prefix = last_leaf.prefix

663

664 tree_nodes = tree_nodes[:-1]

665 return tree_nodes

666

667 def _get_matching_indent_nodes(self, tree_nodes, is_new_suite):

668 # There might be a random dedent where we have to stop copying.

669 # Invalid indents are ok, because the parser handled that

670 # properly before. An invalid dedent can happen, because a few

671 # lines above there was an invalid indent.

672 node_iterator = iter(tree_nodes)

673 if is_new_suite:

674 yield next(node_iterator)

675

676 first_node = next(node_iterator)

677 indent = _get_indentation(first_node)

678 if not is_new_suite and indent not in self.indents:

679 return

680 yield first_node

681

682 for n in node_iterator:

683 if _get_indentation(n) != indent:

684 return

685 yield n

686

687 def copy_nodes(self, tree_nodes, until_line, line_offset):

688 """

689 Copies tree nodes from the old parser tree.

690

691 Returns the number of tree nodes that were copied.

692 """

693 if tree_nodes[0].type in ('error_leaf', 'error_node'):

694 # Avoid copying errors in the beginning. Can lead to a lot of

695 # issues.

696 return []

697

698 indentation = _get_indentation(tree_nodes[0])

699 old_working_stack = list(self._working_stack)

700 old_prefix = self.prefix

701 old_indents = self.indents

702 self.indents = [i for i in self.indents if i <= indentation]

703

704 self._update_insertion_node(indentation)

705

706 new_nodes, self._working_stack, self.prefix, added_indents = self._copy_nodes(

707 list(self._working_stack),

708 tree_nodes,

709 until_line,

710 line_offset,

711 self.prefix,

712 )

713 if new_nodes:

714 self.indents += added_indents

715 else:

716 self._working_stack = old_working_stack

717 self.prefix = old_prefix

718 self.indents = old_indents

719 return new_nodes

720

721 def _copy_nodes(self, working_stack, nodes, until_line, line_offset,

722 prefix='', is_nested=False):

723 new_nodes = []

724 added_indents = []

725

726 nodes = list(self._get_matching_indent_nodes(

727 nodes,

728 is_new_suite=is_nested,

729 ))

730

731 new_prefix = ''

732 for node in nodes:

733 if node.start_pos[0] > until_line:

734 break

735

736 if node.type == 'endmarker':

737 break

738

739 if node.type == 'error_leaf' and node.token_type in ('DEDENT', 'ERROR_DEDENT'):

740 break

741 # TODO this check might take a bit of time for large files. We

742 # might want to change this to do more intelligent guessing or

743 # binary search.

744 if _get_last_line(node) > until_line:

745 # We can split up functions and classes later.

746 if _func_or_class_has_suite(node):

747 new_nodes.append(node)

748 break

749 try:

750 c = node.children

751 except AttributeError:

752 pass

753 else:

754 # This case basically appears with error recovery of one line

755 # suites like `def foo(): bar.-`. In this case we might not

756 # include a newline in the statement and we need to take care

757 # of that.

758 n = node

759 if n.type == 'decorated':

760 n = n.children[-1]

761 if n.type in ('async_funcdef', 'async_stmt'):

762 n = n.children[-1]

763 if n.type in ('classdef', 'funcdef'):

764 suite_node = n.children[-1]

765 else:

766 suite_node = c[-1]

767

768 if suite_node.type in ('error_leaf', 'error_node'):

769 break

770

771 new_nodes.append(node)

772

773 # Pop error nodes at the end from the list

774 if new_nodes:

775 while new_nodes:

776 last_node = new_nodes[-1]

777 if (last_node.type in ('error_leaf', 'error_node')

778 or _is_flow_node(new_nodes[-1])):

779 # Error leafs/nodes don't have a defined start/end. Error

780 # nodes might not end with a newline (e.g. if there's an

781 # open `(`). Therefore ignore all of them unless they are

782 # succeeded with valid parser state.

783 # If we copy flows at the end, they might be continued

784 # after the copy limit (in the new parser).

785 # In this while loop we try to remove until we find a newline.

786 new_prefix = ''

787 new_nodes.pop()

788 while new_nodes:

789 last_node = new_nodes[-1]

790 if last_node.get_last_leaf().type == 'newline':

791 break

792 new_nodes.pop()

793 continue

794 if len(new_nodes) > 1 and new_nodes[-2].type == 'error_node':

795 # The problem here is that Parso error recovery sometimes

796 # influences nodes before this node.

797 # Since the new last node is an error node this will get

798 # cleaned up in the next while iteration.

799 new_nodes.pop()

800 continue

801 break

802

803 if not new_nodes:

804 return [], working_stack, prefix, added_indents

805

806 tos = working_stack[-1]

807 last_node = new_nodes[-1]

808 had_valid_suite_last = False

809 # Pop incomplete suites from the list

810 if _func_or_class_has_suite(last_node):

811 suite = last_node

812 while suite.type != 'suite':

813 suite = suite.children[-1]

814

815 indent = _get_suite_indentation(suite)

816 added_indents.append(indent)

817

818 suite_tos = _NodesTreeNode(suite, indentation=_get_indentation(last_node))

819 # Don't need to pass line_offset here, it's already done by the

820 # parent.

821 suite_nodes, new_working_stack, new_prefix, ai = self._copy_nodes(

822 working_stack + [suite_tos], suite.children, until_line, line_offset,

823 is_nested=True,

824 )

825 added_indents += ai

826 if len(suite_nodes) < 2:

827 # A suite only with newline is not valid.

828 new_nodes.pop()

829 new_prefix = ''

830 else:

831 assert new_nodes

832 tos.add_child_node(suite_tos)

833 working_stack = new_working_stack

834 had_valid_suite_last = True

835

836 if new_nodes:

837 if not _ends_with_newline(new_nodes[-1].get_last_leaf()) and not had_valid_suite_last:

838 p = new_nodes[-1].get_next_leaf().prefix

839 # We are not allowed to remove the newline at the end of the

840 # line, otherwise it's going to be missing. This happens e.g.

841 # if a bracket is around before that moves newlines to

842 # prefixes.

843 new_prefix = split_lines(p, keepends=True)[0]

844

845 if had_valid_suite_last:

846 last = new_nodes[-1]

847 if last.type == 'decorated':

848 last = last.children[-1]

849 if last.type in ('async_funcdef', 'async_stmt'):

850 last = last.children[-1]

851 last_line_offset_leaf = last.children[-2].get_last_leaf()

852 assert last_line_offset_leaf == ':'

853 else:

854 last_line_offset_leaf = new_nodes[-1].get_last_leaf()

855 tos.add_tree_nodes(

856 prefix, new_nodes, line_offset, last_line_offset_leaf,

857 )

858 prefix = new_prefix

859 self._prefix_remainder = ''

860

861 return new_nodes, working_stack, prefix, added_indents

862

863 def close(self):

864 self._base_node.finish()

865

866 # Add an endmarker.

867 try:

868 last_leaf = self._module.get_last_leaf()

869 except IndexError:

870 end_pos = [1, 0]

871 else:

872 last_leaf = _skip_dedent_error_leaves(last_leaf)

873 end_pos = list(last_leaf.end_pos)

874 lines = split_lines(self.prefix)

875 assert len(lines) > 0

876 if len(lines) == 1:

877 if lines[0].startswith(BOM_UTF8_STRING) and end_pos == [1, 0]:

878 end_pos[1] -= 1

879 end_pos[1] += len(lines[0])

880 else:

881 end_pos[0] += len(lines) - 1

882 end_pos[1] = len(lines[-1])

883

884 endmarker = EndMarker('', tuple(end_pos), self.prefix + self._prefix_remainder)

885 endmarker.parent = self._module

886 self._module.children.append(endmarker)