Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/black/trans.py: 13%

1"""

2String transformers that can split and merge strings.

3"""

5import re

6from abc import ABC, abstractmethod

7from collections import defaultdict

8from collections.abc import Callable, Collection, Iterable, Iterator, Sequence

9from dataclasses import dataclass

10from typing import Any, ClassVar, Final, Literal, Optional, TypeVar, Union

12from mypy_extensions import trait

14from black.comments import contains_pragma_comment

15from black.lines import Line, append_leaves

16from black.mode import Feature, Mode

17from black.nodes import (

18 CLOSING_BRACKETS,

19 OPENING_BRACKETS,

20 STANDALONE_COMMENT,

21 is_empty_lpar,

22 is_empty_par,

23 is_empty_rpar,

24 is_part_of_annotation,

25 parent_type,

26 replace_child,

27 syms,

28)

29from black.rusty import Err, Ok, Result

30from black.strings import (

31 assert_is_leaf_string,

32 count_chars_in_width,

33 get_string_prefix,

34 has_triple_quotes,

35 normalize_string_quotes,

36 str_width,

37)

38from blib2to3.pgen2 import token

39from blib2to3.pytree import Leaf, Node

42class CannotTransform(Exception):

43 """Base class for errors raised by Transformers."""

46# types

47T = TypeVar("T")

48LN = Union[Leaf, Node]

49Transformer = Callable[[Line, Collection[Feature], Mode], Iterator[Line]]

50Index = int

51NodeType = int

52ParserState = int

53StringID = int

54TResult = Result[T, CannotTransform] # (T)ransform Result

55TMatchResult = TResult[list[Index]]

57SPLIT_SAFE_CHARS = frozenset(["\u3001", "\u3002", "\uff0c"]) # East Asian stops

60def TErr(err_msg: str) -> Err[CannotTransform]:

61 """(T)ransform Err

63 Convenience function used when working with the TResult type.

64 """

65 cant_transform = CannotTransform(err_msg)

66 return Err(cant_transform)

69def hug_power_op(

70 line: Line, features: Collection[Feature], mode: Mode

71) -> Iterator[Line]:

72 """A transformer which normalizes spacing around power operators."""

74 # Performance optimization to avoid unnecessary Leaf clones and other ops.

75 for leaf in line.leaves:

76 if leaf.type == token.DOUBLESTAR:

77 break

78 else:

79 raise CannotTransform("No doublestar token was found in the line.")

81 def is_simple_lookup(index: int, kind: Literal[1, -1]) -> bool:

82 # Brackets and parentheses indicate calls, subscripts, etc. ...

83 # basically stuff that doesn't count as "simple". Only a NAME lookup

84 # or dotted lookup (eg. NAME.NAME) is OK.

85 if kind == -1:

86 return handle_is_simple_look_up_prev(line, index, {token.RPAR, token.RSQB})

87 else:

88 return handle_is_simple_lookup_forward(

89 line, index, {token.LPAR, token.LSQB}

90 )

92 def is_simple_operand(index: int, kind: Literal[1, -1]) -> bool:

93 # An operand is considered "simple" if's a NAME, a numeric CONSTANT, a simple

94 # lookup (see above), with or without a preceding unary operator.

95 start = line.leaves[index]

96 if start.type in {token.NAME, token.NUMBER}:

97 return is_simple_lookup(index, kind)

99 if start.type in {token.PLUS, token.MINUS, token.TILDE}:

100 if line.leaves[index + 1].type in {token.NAME, token.NUMBER}:

101 # kind is always one as bases with a preceding unary op will be checked

102 # for simplicity starting from the next token (so it'll hit the check

103 # above).

104 return is_simple_lookup(index + 1, kind=1)

105

106 return False

107

108 new_line = line.clone()

109 should_hug = False

110 for idx, leaf in enumerate(line.leaves):

111 new_leaf = leaf.clone()

112 if should_hug:

113 new_leaf.prefix = ""

114 should_hug = False

115

116 should_hug = (

117 (0 < idx < len(line.leaves) - 1)

118 and leaf.type == token.DOUBLESTAR

119 and is_simple_operand(idx - 1, kind=-1)

120 and line.leaves[idx - 1].value != "lambda"

121 and is_simple_operand(idx + 1, kind=1)

122 )

123 if should_hug:

124 new_leaf.prefix = ""

125

126 # We have to be careful to make a new line properly:

127 # - bracket related metadata must be maintained (handled by Line.append)

128 # - comments need to copied over, updating the leaf IDs they're attached to

129 new_line.append(new_leaf, preformatted=True)

130 for comment_leaf in line.comments_after(leaf):

131 new_line.append(comment_leaf, preformatted=True)

132

133 yield new_line

134

135

136def handle_is_simple_look_up_prev(line: Line, index: int, disallowed: set[int]) -> bool:

137 """

138 Handling the determination of is_simple_lookup for the lines prior to the doublestar

139 token. This is required because of the need to isolate the chained expression

140 to determine the bracket or parenthesis belong to the single expression.

141 """

142 contains_disallowed = False

143 chain = []

144

145 while 0 <= index < len(line.leaves):

146 current = line.leaves[index]

147 chain.append(current)

148 if not contains_disallowed and current.type in disallowed:

149 contains_disallowed = True

150 if not is_expression_chained(chain):

151 return not contains_disallowed

153 index -= 1

155 return True

158def handle_is_simple_lookup_forward(

159 line: Line, index: int, disallowed: set[int]

160) -> bool:

161 """

162 Handling decision is_simple_lookup for the lines behind the doublestar token.

163 This function is simplified to keep consistent with the prior logic and the forward

164 case are more straightforward and do not need to care about chained expressions.

165 """

166 while 0 <= index < len(line.leaves):

167 current = line.leaves[index]

168 if current.type in disallowed:

169 return False

170 if current.type not in {token.NAME, token.DOT} or (

171 current.type == token.NAME and current.value == "for"

172 ):

173 # If the current token isn't disallowed, we'll assume this is simple as

174 # only the disallowed tokens are semantically attached to this lookup

175 # expression we're checking. Also, stop early if we hit the 'for' bit

176 # of a comprehension.

177 return True

179 index += 1

181 return True

184def is_expression_chained(chained_leaves: list[Leaf]) -> bool:

185 """

186 Function to determine if the variable is a chained call.

187 (e.g., foo.lookup, foo().lookup, (foo.lookup())) will be recognized as chained call)

188 """

189 if len(chained_leaves) < 2:

190 return True

191

192 current_leaf = chained_leaves[-1]

193 past_leaf = chained_leaves[-2]

194

195 if past_leaf.type == token.NAME:

196 return current_leaf.type in {token.DOT}

197 elif past_leaf.type in {token.RPAR, token.RSQB}:

198 return current_leaf.type in {token.RSQB, token.RPAR}

199 elif past_leaf.type in {token.LPAR, token.LSQB}:

200 return current_leaf.type in {token.NAME, token.LPAR, token.LSQB}

201 else:

202 return False

203

204

205class StringTransformer(ABC):

206 """

207 An implementation of the Transformer protocol that relies on its

208 subclasses overriding the template methods `do_match(...)` and

209 `do_transform(...)`.

210

211 This Transformer works exclusively on strings (for example, by merging

212 or splitting them).

213

214 The following sections can be found among the docstrings of each concrete

215 StringTransformer subclass.

216

217 Requirements:

218 Which requirements must be met of the given Line for this

219 StringTransformer to be applied?

220

221 Transformations:

222 If the given Line meets all of the above requirements, which string

223 transformations can you expect to be applied to it by this

224 StringTransformer?

225

226 Collaborations:

227 What contractual agreements does this StringTransformer have with other

228 StringTransfomers? Such collaborations should be eliminated/minimized

229 as much as possible.

230 """

231

232 __name__: Final = "StringTransformer"

233

234 # Ideally this would be a dataclass, but unfortunately mypyc breaks when used with

235 # `abc.ABC`.

236 def __init__(self, line_length: int, normalize_strings: bool) -> None:

237 self.line_length = line_length

238 self.normalize_strings = normalize_strings

239

240 @abstractmethod

241 def do_match(self, line: Line) -> TMatchResult:

242 """

243 Returns:

244 * Ok(string_indices) such that for each index, `line.leaves[index]`

245 is our target string if a match was able to be made. For

246 transformers that don't result in more lines (e.g. StringMerger,

247 StringParenStripper), multiple matches and transforms are done at

248 once to reduce the complexity.

249 OR

250 * Err(CannotTransform), if no match could be made.

251 """

252

253 @abstractmethod

254 def do_transform(

255 self, line: Line, string_indices: list[int]

256 ) -> Iterator[TResult[Line]]:

257 """

258 Yields:

259 * Ok(new_line) where new_line is the new transformed line.

260 OR

261 * Err(CannotTransform) if the transformation failed for some reason. The

262 `do_match(...)` template method should usually be used to reject

263 the form of the given Line, but in some cases it is difficult to

264 know whether or not a Line meets the StringTransformer's

265 requirements until the transformation is already midway.

266

267 Side Effects:

268 This method should NOT mutate @line directly, but it MAY mutate the

269 Line's underlying Node structure. (WARNING: If the underlying Node

270 structure IS altered, then this method should NOT be allowed to

271 yield an CannotTransform after that point.)

272 """

273

274 def __call__(

275 self, line: Line, _features: Collection[Feature], _mode: Mode

276 ) -> Iterator[Line]:

277 """

278 StringTransformer instances have a call signature that mirrors that of

279 the Transformer type.

280

281 Raises:

282 CannotTransform(...) if the concrete StringTransformer class is unable

283 to transform @line.

284 """

285 # Optimization to avoid calling `self.do_match(...)` when the line does

286 # not contain any string.

287 if not any(leaf.type == token.STRING for leaf in line.leaves):

288 raise CannotTransform("There are no strings in this line.")

289

290 match_result = self.do_match(line)

291

292 if isinstance(match_result, Err):

293 cant_transform = match_result.err()

294 raise CannotTransform(

295 f"The string transformer {self.__class__.__name__} does not recognize"

296 " this line as one that it can transform."

297 ) from cant_transform

298

299 string_indices = match_result.ok()

300

301 for line_result in self.do_transform(line, string_indices):

302 if isinstance(line_result, Err):

303 cant_transform = line_result.err()

304 raise CannotTransform(

305 "StringTransformer failed while attempting to transform string."

306 ) from cant_transform

307 line = line_result.ok()

308 yield line

309

310

311@dataclass

312class CustomSplit:

313 """A custom (i.e. manual) string split.

314

315 A single CustomSplit instance represents a single substring.

316

317 Examples:

318 Consider the following string:

319 ```

320 "Hi there friend."

321 " This is a custom"

322 f" string {split}."

323 ```

324

325 This string will correspond to the following three CustomSplit instances:

326 ```

327 CustomSplit(False, 16)

328 CustomSplit(False, 17)

329 CustomSplit(True, 16)

330 ```

331 """

332

333 has_prefix: bool

334 break_idx: int

335

336

337CustomSplitMapKey = tuple[StringID, str]

338

339

340@trait

341class CustomSplitMapMixin:

342 """

343 This mixin class is used to map merged strings to a sequence of

344 CustomSplits, which will then be used to re-split the strings iff none of

345 the resultant substrings go over the configured max line length.

346 """

347

348 _CUSTOM_SPLIT_MAP: ClassVar[dict[CustomSplitMapKey, tuple[CustomSplit, ...]]] = (

349 defaultdict(tuple)

350 )

351

352 @staticmethod

353 def _get_key(string: str) -> CustomSplitMapKey:

354 """

355 Returns:

356 A unique identifier that is used internally to map @string to a

357 group of custom splits.

358 """

359 return (id(string), string)

360

361 def add_custom_splits(

362 self, string: str, custom_splits: Iterable[CustomSplit]

363 ) -> None:

364 """Custom Split Map Setter Method

365

366 Side Effects:

367 Adds a mapping from @string to the custom splits @custom_splits.

368 """

369 key = self._get_key(string)

370 self._CUSTOM_SPLIT_MAP[key] = tuple(custom_splits)

371

372 def pop_custom_splits(self, string: str) -> list[CustomSplit]:

373 """Custom Split Map Getter Method

374

375 Returns:

376 * A list of the custom splits that are mapped to @string, if any

377 exist.

378 OR

379 * [], otherwise.

380

381 Side Effects:

382 Deletes the mapping between @string and its associated custom

383 splits (which are returned to the caller).

384 """

385 key = self._get_key(string)

386

387 custom_splits = self._CUSTOM_SPLIT_MAP[key]

388 del self._CUSTOM_SPLIT_MAP[key]

389

390 return list(custom_splits)

391

392 def has_custom_splits(self, string: str) -> bool:

393 """

394 Returns:

395 True iff @string is associated with a set of custom splits.

396 """

397 key = self._get_key(string)

398 return key in self._CUSTOM_SPLIT_MAP

399

400

401class StringMerger(StringTransformer, CustomSplitMapMixin):

402 """StringTransformer that merges strings together.

403

404 Requirements:

405 (A) The line contains adjacent strings such that ALL of the validation checks

406 listed in StringMerger._validate_msg(...)'s docstring pass.

407 OR

408 (B) The line contains a string which uses line continuation backslashes.

409

410 Transformations:

411 Depending on which of the two requirements above where met, either:

412

413 (A) The string group associated with the target string is merged.

414 OR

415 (B) All line-continuation backslashes are removed from the target string.

416

417 Collaborations:

418 StringMerger provides custom split information to StringSplitter.

419 """

420

421 def do_match(self, line: Line) -> TMatchResult:

422 LL = line.leaves

423

424 is_valid_index = is_valid_index_factory(LL)

425

426 string_indices = []

427 idx = 0

428 while is_valid_index(idx):

429 leaf = LL[idx]

430 if (

431 leaf.type == token.STRING

432 and is_valid_index(idx + 1)

433 and LL[idx + 1].type == token.STRING

434 ):

435 # Let's check if the string group contains an inline comment

436 # If we have a comment inline, we don't merge the strings

437 contains_comment = False

438 i = idx

439 while is_valid_index(i):

440 if LL[i].type != token.STRING:

441 break

442 if line.comments_after(LL[i]):

443 contains_comment = True

444 break

445 i += 1

446

447 if not contains_comment and not is_part_of_annotation(leaf):

448 string_indices.append(idx)

449

450 # Advance to the next non-STRING leaf.

451 idx += 2

452 while is_valid_index(idx) and LL[idx].type == token.STRING:

453 idx += 1

454

455 elif leaf.type == token.STRING and "\\\n" in leaf.value:

456 string_indices.append(idx)

457 # Advance to the next non-STRING leaf.

458 idx += 1

459 while is_valid_index(idx) and LL[idx].type == token.STRING:

460 idx += 1

461

462 else:

463 idx += 1

464

465 if string_indices:

466 return Ok(string_indices)

467 else:

468 return TErr("This line has no strings that need merging.")

469

470 def do_transform(

471 self, line: Line, string_indices: list[int]

472 ) -> Iterator[TResult[Line]]:

473 new_line = line

474

475 rblc_result = self._remove_backslash_line_continuation_chars(

476 new_line, string_indices

477 )

478 if isinstance(rblc_result, Ok):

479 new_line = rblc_result.ok()

480

481 msg_result = self._merge_string_group(new_line, string_indices)

482 if isinstance(msg_result, Ok):

483 new_line = msg_result.ok()

484

485 if isinstance(rblc_result, Err) and isinstance(msg_result, Err):

486 msg_cant_transform = msg_result.err()

487 rblc_cant_transform = rblc_result.err()

488 cant_transform = CannotTransform(

489 "StringMerger failed to merge any strings in this line."

490 )

491

492 # Chain the errors together using `__cause__`.

493 msg_cant_transform.__cause__ = rblc_cant_transform

494 cant_transform.__cause__ = msg_cant_transform

495

496 yield Err(cant_transform)

497 else:

498 yield Ok(new_line)

499

500 @staticmethod

501 def _remove_backslash_line_continuation_chars(

502 line: Line, string_indices: list[int]

503 ) -> TResult[Line]:

504 """

505 Merge strings that were split across multiple lines using

506 line-continuation backslashes.

507

508 Returns:

509 Ok(new_line), if @line contains backslash line-continuation

510 characters.

511 OR

512 Err(CannotTransform), otherwise.

513 """

514 LL = line.leaves

515

516 indices_to_transform = []

517 for string_idx in string_indices:

518 string_leaf = LL[string_idx]

519 if (

520 string_leaf.type == token.STRING

521 and "\\\n" in string_leaf.value

522 and not has_triple_quotes(string_leaf.value)

523 ):

524 indices_to_transform.append(string_idx)

525

526 if not indices_to_transform:

527 return TErr(

528 "Found no string leaves that contain backslash line continuation"

529 " characters."

530 )

531

532 new_line = line.clone()

533 new_line.comments = line.comments.copy()

534 append_leaves(new_line, line, LL)

535

536 for string_idx in indices_to_transform:

537 new_string_leaf = new_line.leaves[string_idx]

538 new_string_leaf.value = new_string_leaf.value.replace("\\\n", "")

539

540 return Ok(new_line)

541

542 def _merge_string_group(

543 self, line: Line, string_indices: list[int]

544 ) -> TResult[Line]:

545 """

546 Merges string groups (i.e. set of adjacent strings).

547

548 Each index from `string_indices` designates one string group's first

549 leaf in `line.leaves`.

550

551 Returns:

552 Ok(new_line), if ALL of the validation checks found in

553 _validate_msg(...) pass.

554 OR

555 Err(CannotTransform), otherwise.

556 """

557 LL = line.leaves

558

559 is_valid_index = is_valid_index_factory(LL)

560

561 # A dict of {string_idx: tuple[num_of_strings, string_leaf]}.

562 merged_string_idx_dict: dict[int, tuple[int, Leaf]] = {}

563 for string_idx in string_indices:

564 vresult = self._validate_msg(line, string_idx)

565 if isinstance(vresult, Err):

566 continue

567 merged_string_idx_dict[string_idx] = self._merge_one_string_group(

568 LL, string_idx, is_valid_index

569 )

570

571 if not merged_string_idx_dict:

572 return TErr("No string group is merged")

573

574 # Build the final line ('new_line') that this method will later return.

575 new_line = line.clone()

576 previous_merged_string_idx = -1

577 previous_merged_num_of_strings = -1

578 for i, leaf in enumerate(LL):

579 if i in merged_string_idx_dict:

580 previous_merged_string_idx = i

581 previous_merged_num_of_strings, string_leaf = merged_string_idx_dict[i]

582 new_line.append(string_leaf)

583

584 if (

585 previous_merged_string_idx

586 <= i

587 < previous_merged_string_idx + previous_merged_num_of_strings

588 ):

589 for comment_leaf in line.comments_after(leaf):

590 new_line.append(comment_leaf, preformatted=True)

591 continue

592

593 append_leaves(new_line, line, [leaf])

594

595 return Ok(new_line)

596

597 def _merge_one_string_group(

598 self, LL: list[Leaf], string_idx: int, is_valid_index: Callable[[int], bool]

599 ) -> tuple[int, Leaf]:

600 """

601 Merges one string group where the first string in the group is

602 `LL[string_idx]`.

603

604 Returns:

605 A tuple of `(num_of_strings, leaf)` where `num_of_strings` is the

606 number of strings merged and `leaf` is the newly merged string

607 to be replaced in the new line.

608 """

609 # If the string group is wrapped inside an Atom node, we must make sure

610 # to later replace that Atom with our new (merged) string leaf.

611 atom_node = LL[string_idx].parent

612

613 # We will place BREAK_MARK in between every two substrings that we

614 # merge. We will then later go through our final result and use the

615 # various instances of BREAK_MARK we find to add the right values to

616 # the custom split map.

617 BREAK_MARK = "@@@@@ BLACK BREAKPOINT MARKER @@@@@"

618

619 QUOTE = LL[string_idx].value[-1]

620

621 def make_naked(string: str, string_prefix: str) -> str:

622 """Strip @string (i.e. make it a "naked" string)

623

624 Pre-conditions:

625 * assert_is_leaf_string(@string)

626

627 Returns:

628 A string that is identical to @string except that

629 @string_prefix has been stripped, the surrounding QUOTE

630 characters have been removed, and any remaining QUOTE

631 characters have been escaped.

632 """

633 assert_is_leaf_string(string)

634 if "f" in string_prefix:

635 f_expressions = [

636 string[span[0] + 1 : span[1] - 1] # +-1 to get rid of curly braces

637 for span in iter_fexpr_spans(string)

638 ]

639 debug_expressions_contain_visible_quotes = any(

640 re.search(r".*[\'\"].*(?<![!:=])={1}(?!=)(?![^\s:])", expression)

641 for expression in f_expressions

642 )

643 if not debug_expressions_contain_visible_quotes:

644 # We don't want to toggle visible quotes in debug f-strings, as

645 # that would modify the AST

646 string = _toggle_fexpr_quotes(string, QUOTE)

647 # After quotes toggling, quotes in expressions won't be escaped

648 # because quotes can't be reused in f-strings. So we can simply

649 # let the escaping logic below run without knowing f-string

650 # expressions.

651

652 RE_EVEN_BACKSLASHES = r"(?:(?<!\\)(?:\\\\)*)"

653 naked_string = string[len(string_prefix) + 1 : -1]

654 naked_string = re.sub(

655 "(" + RE_EVEN_BACKSLASHES + ")" + QUOTE, r"\1\\" + QUOTE, naked_string

656 )

657 return naked_string

658

659 # Holds the CustomSplit objects that will later be added to the custom

660 # split map.

661 custom_splits = []

662

663 # Temporary storage for the 'has_prefix' part of the CustomSplit objects.

664 prefix_tracker = []

665

666 # Sets the 'prefix' variable. This is the prefix that the final merged

667 # string will have.

668 next_str_idx = string_idx

669 prefix = ""

670 while (

671 not prefix

672 and is_valid_index(next_str_idx)

673 and LL[next_str_idx].type == token.STRING

674 ):

675 prefix = get_string_prefix(LL[next_str_idx].value).lower()

676 next_str_idx += 1

677

678 # The next loop merges the string group. The final string will be

679 # contained in 'S'.

680 #

681 # The following convenience variables are used:

682 #

683 # S: string

684 # NS: naked string

685 # SS: next string

686 # NSS: naked next string

687 S = ""

688 NS = ""

689 num_of_strings = 0

690 next_str_idx = string_idx

691 while is_valid_index(next_str_idx) and LL[next_str_idx].type == token.STRING:

692 num_of_strings += 1

693

694 SS = LL[next_str_idx].value

695 next_prefix = get_string_prefix(SS).lower()

696

697 # If this is an f-string group but this substring is not prefixed

698 # with 'f'...

699 if "f" in prefix and "f" not in next_prefix:

700 # Then we must escape any braces contained in this substring.

701 SS = re.sub(r"(\{|\})", r"\1\1", SS)

702

703 NSS = make_naked(SS, next_prefix)

704

705 has_prefix = bool(next_prefix)

706 prefix_tracker.append(has_prefix)

707

708 S = prefix + QUOTE + NS + NSS + BREAK_MARK + QUOTE

709 NS = make_naked(S, prefix)

710

711 next_str_idx += 1

712

713 # Take a note on the index of the non-STRING leaf.

714 non_string_idx = next_str_idx

715

716 S_leaf = Leaf(token.STRING, S)

717 if self.normalize_strings:

718 S_leaf.value = normalize_string_quotes(S_leaf.value)

719

720 # Fill the 'custom_splits' list with the appropriate CustomSplit objects.

721 temp_string = S_leaf.value[len(prefix) + 1 : -1]

722 for has_prefix in prefix_tracker:

723 mark_idx = temp_string.find(BREAK_MARK)

724 assert (

725 mark_idx >= 0

726 ), "Logic error while filling the custom string breakpoint cache."

727

728 temp_string = temp_string[mark_idx + len(BREAK_MARK) :]

729 breakpoint_idx = mark_idx + (len(prefix) if has_prefix else 0) + 1

730 custom_splits.append(CustomSplit(has_prefix, breakpoint_idx))

731

732 string_leaf = Leaf(token.STRING, S_leaf.value.replace(BREAK_MARK, ""))

733

734 if atom_node is not None:

735 # If not all children of the atom node are merged (this can happen

736 # when there is a standalone comment in the middle) ...

737 if non_string_idx - string_idx < len(atom_node.children):

738 # We need to replace the old STRING leaves with the new string leaf.

739 first_child_idx = LL[string_idx].remove()

740 for idx in range(string_idx + 1, non_string_idx):

741 LL[idx].remove()

742 if first_child_idx is not None:

743 atom_node.insert_child(first_child_idx, string_leaf)

744 else:

745 # Else replace the atom node with the new string leaf.

746 replace_child(atom_node, string_leaf)

747

748 self.add_custom_splits(string_leaf.value, custom_splits)

749 return num_of_strings, string_leaf

750

751 @staticmethod

752 def _validate_msg(line: Line, string_idx: int) -> TResult[None]:

753 """Validate (M)erge (S)tring (G)roup

754

755 Transform-time string validation logic for _merge_string_group(...).

756

757 Returns:

758 * Ok(None), if ALL validation checks (listed below) pass.

759 OR

760 * Err(CannotTransform), if any of the following are true:

761 - The target string group does not contain ANY stand-alone comments.

762 - The target string is not in a string group (i.e. it has no

763 adjacent strings).

764 - The string group has more than one inline comment.

765 - The string group has an inline comment that appears to be a pragma.

766 - The set of all string prefixes in the string group is of

767 length greater than one and is not equal to {"", "f"}.

768 - The string group consists of raw strings.

769 - The string group would merge f-strings with different quote types

770 and internal quotes.

771 - The string group is stringified type annotations. We don't want to

772 process stringified type annotations since pyright doesn't support

773 them spanning multiple string values. (NOTE: mypy, pytype, pyre do

774 support them, so we can change if pyright also gains support in the

775 future. See https://github.com/microsoft/pyright/issues/4359.)

776 """

777 # We first check for "inner" stand-alone comments (i.e. stand-alone

778 # comments that have a string leaf before them AND after them).

779 for inc in [1, -1]:

780 i = string_idx

781 found_sa_comment = False

782 is_valid_index = is_valid_index_factory(line.leaves)

783 while is_valid_index(i) and line.leaves[i].type in [

784 token.STRING,

785 STANDALONE_COMMENT,

786 ]:

787 if line.leaves[i].type == STANDALONE_COMMENT:

788 found_sa_comment = True

789 elif found_sa_comment:

790 return TErr(

791 "StringMerger does NOT merge string groups which contain "

792 "stand-alone comments."

793 )

794

795 i += inc

796

797 QUOTE = line.leaves[string_idx].value[-1]

798

799 num_of_inline_string_comments = 0

800 set_of_prefixes = set()

801 num_of_strings = 0

802 for leaf in line.leaves[string_idx:]:

803 if leaf.type != token.STRING:

804 # If the string group is trailed by a comma, we count the

805 # comments trailing the comma to be one of the string group's

806 # comments.

807 if leaf.type == token.COMMA and id(leaf) in line.comments:

808 num_of_inline_string_comments += 1

809 break

810

811 if has_triple_quotes(leaf.value):

812 return TErr("StringMerger does NOT merge multiline strings.")

813

814 num_of_strings += 1

815 prefix = get_string_prefix(leaf.value).lower()

816 if "r" in prefix:

817 return TErr("StringMerger does NOT merge raw strings.")

818

819 set_of_prefixes.add(prefix)

820

821 if (

822 "f" in prefix

823 and leaf.value[-1] != QUOTE

824 and (

825 "'" in leaf.value[len(prefix) + 1 : -1]

826 or '"' in leaf.value[len(prefix) + 1 : -1]

827 )

828 ):

829 return TErr(

830 "StringMerger does NOT merge f-strings with different quote types"

831 " and internal quotes."

832 )

833

834 if id(leaf) in line.comments:

835 num_of_inline_string_comments += 1

836 if contains_pragma_comment(line.comments[id(leaf)]):

837 return TErr("Cannot merge strings which have pragma comments.")

838

839 if num_of_strings < 2:

840 return TErr(

841 f"Not enough strings to merge (num_of_strings={num_of_strings})."

842 )

843

844 if num_of_inline_string_comments > 1:

845 return TErr(

846 f"Too many inline string comments ({num_of_inline_string_comments})."

847 )

848

849 if len(set_of_prefixes) > 1 and set_of_prefixes != {"", "f"}:

850 return TErr(f"Too many different prefixes ({set_of_prefixes}).")

851

852 return Ok(None)

853

854

855class StringParenStripper(StringTransformer):

856 """StringTransformer that strips surrounding parentheses from strings.

857

858 Requirements:

859 The line contains a string which is surrounded by parentheses and:

860 - The target string is NOT the only argument to a function call.

861 - The target string is NOT a "pointless" string.

862 - The target string is NOT a dictionary value.

863 - If the target string contains a PERCENT, the brackets are not

864 preceded or followed by an operator with higher precedence than

865 PERCENT.

866

867 Transformations:

868 The parentheses mentioned in the 'Requirements' section are stripped.

869

870 Collaborations:

871 StringParenStripper has its own inherent usefulness, but it is also

872 relied on to clean up the parentheses created by StringParenWrapper (in

873 the event that they are no longer needed).

874 """

875

876 def do_match(self, line: Line) -> TMatchResult:

877 LL = line.leaves

878

879 is_valid_index = is_valid_index_factory(LL)

880

881 string_indices = []

882

883 idx = -1

884 while True:

885 idx += 1

886 if idx >= len(LL):

887 break

888 leaf = LL[idx]

889

890 # Should be a string...

891 if leaf.type != token.STRING:

892 continue

893

894 # If this is a "pointless" string...

895 if (

896 leaf.parent

897 and leaf.parent.parent

898 and leaf.parent.parent.type == syms.simple_stmt

899 ):

900 continue

901

902 # Should be preceded by a non-empty LPAR...

903 if (

904 not is_valid_index(idx - 1)

905 or LL[idx - 1].type != token.LPAR

906 or is_empty_lpar(LL[idx - 1])

907 ):

908 continue

909

910 # That LPAR should NOT be preceded by a colon (which could be a

911 # dictionary value), function name, or a closing bracket (which

912 # could be a function returning a function or a list/dictionary

913 # containing a function)...

914 if is_valid_index(idx - 2) and (

915 LL[idx - 2].type == token.COLON

916 or LL[idx - 2].type == token.NAME

917 or LL[idx - 2].type in CLOSING_BRACKETS

918 ):

919 continue

920

921 string_idx = idx

922

923 # Skip the string trailer, if one exists.

924 string_parser = StringParser()

925 next_idx = string_parser.parse(LL, string_idx)

926

927 # if the leaves in the parsed string include a PERCENT, we need to

928 # make sure the initial LPAR is NOT preceded by an operator with

929 # higher or equal precedence to PERCENT

930 if is_valid_index(idx - 2):

931 # mypy can't quite follow unless we name this

932 before_lpar = LL[idx - 2]

933 if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and (

934 (

935 before_lpar.type

936 in {

937 token.STAR,

938 token.AT,

939 token.SLASH,

940 token.DOUBLESLASH,

941 token.PERCENT,

942 token.TILDE,

943 token.DOUBLESTAR,

944 token.AWAIT,

945 token.LSQB,

946 token.LPAR,

947 }

948 )

949 or (

950 # only unary PLUS/MINUS

951 before_lpar.parent

952 and before_lpar.parent.type == syms.factor

953 and (before_lpar.type in {token.PLUS, token.MINUS})

954 )

955 ):

956 continue

957

958 # Should be followed by a non-empty RPAR...

959 if (

960 is_valid_index(next_idx)

961 and LL[next_idx].type == token.RPAR

962 and not is_empty_rpar(LL[next_idx])

963 ):

964 # That RPAR should NOT be followed by anything with higher

965 # precedence than PERCENT

966 if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in {

967 token.DOUBLESTAR,

968 token.LSQB,

969 token.LPAR,

970 token.DOT,

971 }:

972 continue

973

974 string_indices.append(string_idx)

975 idx = string_idx

976 while idx < len(LL) - 1 and LL[idx + 1].type == token.STRING:

977 idx += 1

978

979 if string_indices:

980 return Ok(string_indices)

981 return TErr("This line has no strings wrapped in parens.")

982

983 def do_transform(

984 self, line: Line, string_indices: list[int]

985 ) -> Iterator[TResult[Line]]:

986 LL = line.leaves

987

988 string_and_rpar_indices: list[int] = []

989 for string_idx in string_indices:

990 string_parser = StringParser()

991 rpar_idx = string_parser.parse(LL, string_idx)

992

993 should_transform = True

994 for leaf in (LL[string_idx - 1], LL[rpar_idx]):

995 if line.comments_after(leaf):

996 # Should not strip parentheses which have comments attached

997 # to them.

998 should_transform = False

999 break

1000 if should_transform:

1001 string_and_rpar_indices.extend((string_idx, rpar_idx))

1002

1003 if string_and_rpar_indices:

1004 yield Ok(self._transform_to_new_line(line, string_and_rpar_indices))

1005 else:

1006 yield Err(

1007 CannotTransform("All string groups have comments attached to them.")

1008 )

1009

1010 def _transform_to_new_line(

1011 self, line: Line, string_and_rpar_indices: list[int]

1012 ) -> Line:

1013 LL = line.leaves

1014

1015 new_line = line.clone()

1016 new_line.comments = line.comments.copy()

1017

1018 previous_idx = -1

1019 # We need to sort the indices, since string_idx and its matching

1020 # rpar_idx may not come in order, e.g. in

1021 # `("outer" % ("inner".join(items)))`, the "inner" string's

1022 # string_idx is smaller than "outer" string's rpar_idx.

1023 for idx in sorted(string_and_rpar_indices):

1024 leaf = LL[idx]

1025 lpar_or_rpar_idx = idx - 1 if leaf.type == token.STRING else idx

1026 append_leaves(new_line, line, LL[previous_idx + 1 : lpar_or_rpar_idx])

1027 if leaf.type == token.STRING:

1028 string_leaf = Leaf(token.STRING, LL[idx].value)

1029 LL[lpar_or_rpar_idx].remove() # Remove lpar.

1030 replace_child(LL[idx], string_leaf)

1031 new_line.append(string_leaf)

1032 # replace comments

1033 old_comments = new_line.comments.pop(id(LL[idx]), [])

1034 new_line.comments.setdefault(id(string_leaf), []).extend(old_comments)

1035 else:

1036 LL[lpar_or_rpar_idx].remove() # This is a rpar.

1037

1038 previous_idx = idx

1039

1040 # Append the leaves after the last idx:

1041 append_leaves(new_line, line, LL[idx + 1 :])

1042

1043 return new_line

1044

1045

1046class BaseStringSplitter(StringTransformer):

1047 """

1048 Abstract class for StringTransformers which transform a Line's strings by splitting

1049 them or placing them on their own lines where necessary to avoid going over

1050 the configured line length.

1051

1052 Requirements:

1053 * The target string value is responsible for the line going over the

1054 line length limit. It follows that after all of black's other line

1055 split methods have been exhausted, this line (or one of the resulting

1056 lines after all line splits are performed) would still be over the

1057 line_length limit unless we split this string.

1058 AND

1059

1060 * The target string is NOT a "pointless" string (i.e. a string that has

1061 no parent or siblings).

1062 AND

1063

1064 * The target string is not followed by an inline comment that appears

1065 to be a pragma.

1066 AND

1067

1068 * The target string is not a multiline (i.e. triple-quote) string.

1069 """

1070

1071 STRING_OPERATORS: Final = [

1072 token.EQEQUAL,

1073 token.GREATER,

1074 token.GREATEREQUAL,

1075 token.LESS,

1076 token.LESSEQUAL,

1077 token.NOTEQUAL,

1078 token.PERCENT,

1079 token.PLUS,

1080 token.STAR,

1081 ]

1082

1083 @abstractmethod

1084 def do_splitter_match(self, line: Line) -> TMatchResult:

1085 """

1086 BaseStringSplitter asks its clients to override this method instead of

1087 `StringTransformer.do_match(...)`.

1088

1089 Follows the same protocol as `StringTransformer.do_match(...)`.

1090

1091 Refer to `help(StringTransformer.do_match)` for more information.

1092 """

1093

1094 def do_match(self, line: Line) -> TMatchResult:

1095 match_result = self.do_splitter_match(line)

1096 if isinstance(match_result, Err):

1097 return match_result

1098

1099 string_indices = match_result.ok()

1100 assert len(string_indices) == 1, (

1101 f"{self.__class__.__name__} should only find one match at a time, found"

1102 f" {len(string_indices)}"

1103 )

1104 string_idx = string_indices[0]

1105 vresult = self._validate(line, string_idx)

1106 if isinstance(vresult, Err):

1107 return vresult

1108

1109 return match_result

1110

1111 def _validate(self, line: Line, string_idx: int) -> TResult[None]:

1112 """

1113 Checks that @line meets all of the requirements listed in this classes'

1114 docstring. Refer to `help(BaseStringSplitter)` for a detailed

1115 description of those requirements.

1116

1117 Returns:

1118 * Ok(None), if ALL of the requirements are met.

1119 OR

1120 * Err(CannotTransform), if ANY of the requirements are NOT met.

1121 """

1122 LL = line.leaves

1123

1124 string_leaf = LL[string_idx]

1125

1126 max_string_length = self._get_max_string_length(line, string_idx)

1127 if len(string_leaf.value) <= max_string_length:

1128 return TErr(

1129 "The string itself is not what is causing this line to be too long."

1130 )

1131

1132 if not string_leaf.parent or [L.type for L in string_leaf.parent.children] == [

1133 token.STRING,

1134 token.NEWLINE,

1135 ]:

1136 return TErr(

1137 f"This string ({string_leaf.value}) appears to be pointless (i.e. has"

1138 " no parent)."

1139 )

1140

1141 if id(line.leaves[string_idx]) in line.comments and contains_pragma_comment(

1142 line.comments[id(line.leaves[string_idx])]

1143 ):

1144 return TErr(

1145 "Line appears to end with an inline pragma comment. Splitting the line"

1146 " could modify the pragma's behavior."

1147 )

1148

1149 if has_triple_quotes(string_leaf.value):

1150 return TErr("We cannot split multiline strings.")

1151

1152 return Ok(None)

1153

1154 def _get_max_string_length(self, line: Line, string_idx: int) -> int:

1155 """

1156 Calculates the max string length used when attempting to determine

1157 whether or not the target string is responsible for causing the line to

1158 go over the line length limit.

1159

1160 WARNING: This method is tightly coupled to both StringSplitter and

1161 (especially) StringParenWrapper. There is probably a better way to

1162 accomplish what is being done here.

1163

1164 Returns:

1165 max_string_length: such that `line.leaves[string_idx].value >

1166 max_string_length` implies that the target string IS responsible

1167 for causing this line to exceed the line length limit.

1168 """

1169 LL = line.leaves

1170

1171 is_valid_index = is_valid_index_factory(LL)

1172

1173 # We use the shorthand "WMA4" in comments to abbreviate "We must

1174 # account for". When giving examples, we use STRING to mean some/any

1175 # valid string.

1176 #

1177 # Finally, we use the following convenience variables:

1178 #

1179 # P: The leaf that is before the target string leaf.

1180 # N: The leaf that is after the target string leaf.

1181 # NN: The leaf that is after N.

1182

1183 # WMA4 the whitespace at the beginning of the line.

1184 offset = line.depth * 4

1185

1186 if is_valid_index(string_idx - 1):

1187 p_idx = string_idx - 1

1188 if (

1189 LL[string_idx - 1].type == token.LPAR

1190 and LL[string_idx - 1].value == ""

1191 and string_idx >= 2

1192 ):

1193 # If the previous leaf is an empty LPAR placeholder, we should skip it.

1194 p_idx -= 1

1195

1196 P = LL[p_idx]

1197 if P.type in self.STRING_OPERATORS:

1198 # WMA4 a space and a string operator (e.g. `+ STRING` or `== STRING`).

1199 offset += len(str(P)) + 1

1200

1201 if P.type == token.COMMA:

1202 # WMA4 a space, a comma, and a closing bracket [e.g. `), STRING`].

1203 offset += 3

1204

1205 if P.type in [token.COLON, token.EQUAL, token.PLUSEQUAL, token.NAME]:

1206 # This conditional branch is meant to handle dictionary keys,

1207 # variable assignments, 'return STRING' statement lines, and

1208 # 'else STRING' ternary expression lines.

1209

1210 # WMA4 a single space.

1211 offset += 1

1212

1213 # WMA4 the lengths of any leaves that came before that space,

1214 # but after any closing bracket before that space.

1215 for leaf in reversed(LL[: p_idx + 1]):

1216 offset += len(str(leaf))

1217 if leaf.type in CLOSING_BRACKETS:

1218 break

1219

1220 if is_valid_index(string_idx + 1):

1221 N = LL[string_idx + 1]

1222 if N.type == token.RPAR and N.value == "" and len(LL) > string_idx + 2:

1223 # If the next leaf is an empty RPAR placeholder, we should skip it.

1224 N = LL[string_idx + 2]

1225

1226 if N.type == token.COMMA:

1227 # WMA4 a single comma at the end of the string (e.g `STRING,`).

1228 offset += 1

1229

1230 if is_valid_index(string_idx + 2):

1231 NN = LL[string_idx + 2]

1232

1233 if N.type == token.DOT and NN.type == token.NAME:

1234 # This conditional branch is meant to handle method calls invoked

1235 # off of a string literal up to and including the LPAR character.

1236

1237 # WMA4 the '.' character.

1238 offset += 1

1239

1240 if (

1241 is_valid_index(string_idx + 3)

1242 and LL[string_idx + 3].type == token.LPAR

1243 ):

1244 # WMA4 the left parenthesis character.

1245 offset += 1

1246

1247 # WMA4 the length of the method's name.

1248 offset += len(NN.value)

1249

1250 has_comments = False

1251 for comment_leaf in line.comments_after(LL[string_idx]):

1252 if not has_comments:

1253 has_comments = True

1254 # WMA4 two spaces before the '#' character.

1255 offset += 2

1256

1257 # WMA4 the length of the inline comment.

1258 offset += len(comment_leaf.value)

1259

1260 max_string_length = count_chars_in_width(str(line), self.line_length - offset)

1261 return max_string_length

1262

1263 @staticmethod

1264 def _prefer_paren_wrap_match(LL: list[Leaf]) -> Optional[int]:

1265 """

1266 Returns:

1267 string_idx such that @LL[string_idx] is equal to our target (i.e.

1268 matched) string, if this line matches the "prefer paren wrap" statement

1269 requirements listed in the 'Requirements' section of the StringParenWrapper

1270 class's docstring.

1271 OR

1272 None, otherwise.

1273 """

1274 # The line must start with a string.

1275 if LL[0].type != token.STRING:

1276 return None

1277

1278 matching_nodes = [

1279 syms.listmaker,

1280 syms.dictsetmaker,

1281 syms.testlist_gexp,

1282 ]

1283 # If the string is an immediate child of a list/set/tuple literal...

1284 if (

1285 parent_type(LL[0]) in matching_nodes

1286 or parent_type(LL[0].parent) in matching_nodes

1287 ):

1288 # And the string is surrounded by commas (or is the first/last child)...

1289 prev_sibling = LL[0].prev_sibling

1290 next_sibling = LL[0].next_sibling

1291 if (

1292 not prev_sibling

1293 and not next_sibling

1294 and parent_type(LL[0]) == syms.atom

1295 ):

1296 # If it's an atom string, we need to check the parent atom's siblings.

1297 parent = LL[0].parent

1298 assert parent is not None # For type checkers.

1299 prev_sibling = parent.prev_sibling

1300 next_sibling = parent.next_sibling

1301 if (not prev_sibling or prev_sibling.type == token.COMMA) and (

1302 not next_sibling or next_sibling.type == token.COMMA

1303 ):

1304 return 0

1305

1306 return None

1307

1308

1309def iter_fexpr_spans(s: str) -> Iterator[tuple[int, int]]:

1310 """

1311 Yields spans corresponding to expressions in a given f-string.

1312 Spans are half-open ranges (left inclusive, right exclusive).

1313 Assumes the input string is a valid f-string, but will not crash if the input

1314 string is invalid.

1315 """

1316 stack: list[int] = [] # our curly paren stack

1317 i = 0

1318 while i < len(s):

1319 if s[i] == "{":

1320 # if we're in a string part of the f-string, ignore escaped curly braces

1321 if not stack and i + 1 < len(s) and s[i + 1] == "{":

1322 i += 2

1323 continue

1324 stack.append(i)

1325 i += 1

1326 continue

1327

1328 if s[i] == "}":

1329 if not stack:

1330 i += 1

1331 continue

1332 j = stack.pop()

1333 # we've made it back out of the expression! yield the span

1334 if not stack:

1335 yield (j, i + 1)

1336 i += 1

1337 continue

1338

1339 # if we're in an expression part of the f-string, fast-forward through strings

1340 # note that backslashes are not legal in the expression portion of f-strings

1341 if stack:

1342 delim = None

1343 if s[i : i + 3] in ("'''", '"""'):

1344 delim = s[i : i + 3]

1345 elif s[i] in ("'", '"'):

1346 delim = s[i]

1347 if delim:

1348 i += len(delim)

1349 while i < len(s) and s[i : i + len(delim)] != delim:

1350 i += 1

1351 i += len(delim)

1352 continue

1353 i += 1

1354

1355

1356def fstring_contains_expr(s: str) -> bool:

1357 return any(iter_fexpr_spans(s))

1358

1359

1360def _toggle_fexpr_quotes(fstring: str, old_quote: str) -> str:

1361 """

1362 Toggles quotes used in f-string expressions that are `old_quote`.

1363

1364 f-string expressions can't contain backslashes, so we need to toggle the

1365 quotes if the f-string itself will end up using the same quote. We can

1366 simply toggle without escaping because, quotes can't be reused in f-string

1367 expressions. They will fail to parse.

1368

1369 NOTE: If PEP 701 is accepted, above statement will no longer be true.

1370 Though if quotes can be reused, we can simply reuse them without updates or

1371 escaping, once Black figures out how to parse the new grammar.

1372 """

1373 new_quote = "'" if old_quote == '"' else '"'

1374 parts = []

1375 previous_index = 0

1376 for start, end in iter_fexpr_spans(fstring):

1377 parts.append(fstring[previous_index:start])

1378 parts.append(fstring[start:end].replace(old_quote, new_quote))

1379 previous_index = end

1380 parts.append(fstring[previous_index:])

1381 return "".join(parts)

1382

1383

1384class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):

1385 """

1386 StringTransformer that splits "atom" strings (i.e. strings which exist on

1387 lines by themselves).

1388

1389 Requirements:

1390 * The line consists ONLY of a single string (possibly prefixed by a

1391 string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE

1392 a trailing comma.

1393 AND

1394 * All of the requirements listed in BaseStringSplitter's docstring.

1395

1396 Transformations:

1397 The string mentioned in the 'Requirements' section is split into as

1398 many substrings as necessary to adhere to the configured line length.

1399

1400 In the final set of substrings, no substring should be smaller than

1401 MIN_SUBSTR_SIZE characters.

1402

1403 The string will ONLY be split on spaces (i.e. each new substring should

1404 start with a space). Note that the string will NOT be split on a space

1405 which is escaped with a backslash.

1406

1407 If the string is an f-string, it will NOT be split in the middle of an

1408 f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x

1409 else bar()} is an f-expression).

1410

1411 If the string that is being split has an associated set of custom split

1412 records and those custom splits will NOT result in any line going over

1413 the configured line length, those custom splits are used. Otherwise the

1414 string is split as late as possible (from left-to-right) while still

1415 adhering to the transformation rules listed above.

1416

1417 Collaborations:

1418 StringSplitter relies on StringMerger to construct the appropriate

1419 CustomSplit objects and add them to the custom split map.

1420 """

1421

1422 MIN_SUBSTR_SIZE: Final = 6

1423

1424 def do_splitter_match(self, line: Line) -> TMatchResult:

1425 LL = line.leaves

1426

1427 if self._prefer_paren_wrap_match(LL) is not None:

1428 return TErr("Line needs to be wrapped in parens first.")

1429

1430 is_valid_index = is_valid_index_factory(LL)

1431

1432 idx = 0

1433

1434 # The first two leaves MAY be the 'not in' keywords...

1435 if (

1436 is_valid_index(idx)

1437 and is_valid_index(idx + 1)

1438 and [LL[idx].type, LL[idx + 1].type] == [token.NAME, token.NAME]

1439 and str(LL[idx]) + str(LL[idx + 1]) == "not in"

1440 ):

1441 idx += 2

1442 # Else the first leaf MAY be a string operator symbol or the 'in' keyword...

1443 elif is_valid_index(idx) and (

1444 LL[idx].type in self.STRING_OPERATORS

1445 or LL[idx].type == token.NAME

1446 and str(LL[idx]) == "in"

1447 ):

1448 idx += 1

1449

1450 # The next/first leaf MAY be an empty LPAR...

1451 if is_valid_index(idx) and is_empty_lpar(LL[idx]):

1452 idx += 1

1453

1454 # The next/first leaf MUST be a string...

1455 if not is_valid_index(idx) or LL[idx].type != token.STRING:

1456 return TErr("Line does not start with a string.")

1457

1458 string_idx = idx

1459

1460 # Skip the string trailer, if one exists.

1461 string_parser = StringParser()

1462 idx = string_parser.parse(LL, string_idx)

1463

1464 # That string MAY be followed by an empty RPAR...

1465 if is_valid_index(idx) and is_empty_rpar(LL[idx]):

1466 idx += 1

1467

1468 # That string / empty RPAR leaf MAY be followed by a comma...

1469 if is_valid_index(idx) and LL[idx].type == token.COMMA:

1470 idx += 1

1471

1472 # But no more leaves are allowed...

1473 if is_valid_index(idx):

1474 return TErr("This line does not end with a string.")

1475

1476 return Ok([string_idx])

1477

1478 def do_transform(

1479 self, line: Line, string_indices: list[int]

1480 ) -> Iterator[TResult[Line]]:

1481 LL = line.leaves

1482 assert len(string_indices) == 1, (

1483 f"{self.__class__.__name__} should only find one match at a time, found"

1484 f" {len(string_indices)}"

1485 )

1486 string_idx = string_indices[0]

1487

1488 QUOTE = LL[string_idx].value[-1]

1489

1490 is_valid_index = is_valid_index_factory(LL)

1491 insert_str_child = insert_str_child_factory(LL[string_idx])

1492

1493 prefix = get_string_prefix(LL[string_idx].value).lower()

1494

1495 # We MAY choose to drop the 'f' prefix from substrings that don't

1496 # contain any f-expressions, but ONLY if the original f-string

1497 # contains at least one f-expression. Otherwise, we will alter the AST

1498 # of the program.

1499 drop_pointless_f_prefix = ("f" in prefix) and fstring_contains_expr(

1500 LL[string_idx].value

1501 )

1502

1503 first_string_line = True

1504

1505 string_op_leaves = self._get_string_operator_leaves(LL)

1506 string_op_leaves_length = (

1507 sum(len(str(prefix_leaf)) for prefix_leaf in string_op_leaves) + 1

1508 if string_op_leaves

1509 else 0

1510 )

1511

1512 def maybe_append_string_operators(new_line: Line) -> None:

1513 """

1514 Side Effects:

1515 If @line starts with a string operator and this is the first

1516 line we are constructing, this function appends the string

1517 operator to @new_line and replaces the old string operator leaf

1518 in the node structure. Otherwise this function does nothing.

1519 """

1520 maybe_prefix_leaves = string_op_leaves if first_string_line else []

1521 for i, prefix_leaf in enumerate(maybe_prefix_leaves):

1522 replace_child(LL[i], prefix_leaf)

1523 new_line.append(prefix_leaf)

1524

1525 ends_with_comma = (

1526 is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA

1527 )

1528

1529 def max_last_string_column() -> int:

1530 """

1531 Returns:

1532 The max allowed width of the string value used for the last

1533 line we will construct. Note that this value means the width

1534 rather than the number of characters (e.g., many East Asian

1535 characters expand to two columns).

1536 """

1537 result = self.line_length

1538 result -= line.depth * 4

1539 result -= 1 if ends_with_comma else 0

1540 result -= string_op_leaves_length

1541 return result

1542

1543 # --- Calculate Max Break Width (for string value)

1544 # We start with the line length limit

1545 max_break_width = self.line_length

1546 # The last index of a string of length N is N-1.

1547 max_break_width -= 1

1548 # Leading whitespace is not present in the string value (e.g. Leaf.value).

1549 max_break_width -= line.depth * 4

1550 if max_break_width < 0:

1551 yield TErr(

1552 f"Unable to split {LL[string_idx].value} at such high of a line depth:"

1553 f" {line.depth}"

1554 )

1555 return

1556

1557 # Check if StringMerger registered any custom splits.

1558 custom_splits = self.pop_custom_splits(LL[string_idx].value)

1559 # We use them ONLY if none of them would produce lines that exceed the

1560 # line limit.

1561 use_custom_breakpoints = bool(

1562 custom_splits

1563 and all(csplit.break_idx <= max_break_width for csplit in custom_splits)

1564 )

1565

1566 # Temporary storage for the remaining chunk of the string line that

1567 # can't fit onto the line currently being constructed.

1568 rest_value = LL[string_idx].value

1569

1570 def more_splits_should_be_made() -> bool:

1571 """

1572 Returns:

1573 True iff `rest_value` (the remaining string value from the last

1574 split), should be split again.

1575 """

1576 if use_custom_breakpoints:

1577 return len(custom_splits) > 1

1578 else:

1579 return str_width(rest_value) > max_last_string_column()

1580

1581 string_line_results: list[Ok[Line]] = []

1582 while more_splits_should_be_made():

1583 if use_custom_breakpoints:

1584 # Custom User Split (manual)

1585 csplit = custom_splits.pop(0)

1586 break_idx = csplit.break_idx

1587 else:

1588 # Algorithmic Split (automatic)

1589 max_bidx = (

1590 count_chars_in_width(rest_value, max_break_width)

1591 - string_op_leaves_length

1592 )

1593 maybe_break_idx = self._get_break_idx(rest_value, max_bidx)

1594 if maybe_break_idx is None:

1595 # If we are unable to algorithmically determine a good split

1596 # and this string has custom splits registered to it, we

1597 # fall back to using them--which means we have to start

1598 # over from the beginning.

1599 if custom_splits:

1600 rest_value = LL[string_idx].value

1601 string_line_results = []

1602 first_string_line = True

1603 use_custom_breakpoints = True

1604 continue

1605

1606 # Otherwise, we stop splitting here.

1607 break

1608

1609 break_idx = maybe_break_idx

1610

1611 # --- Construct `next_value`

1612 next_value = rest_value[:break_idx] + QUOTE

1613

1614 # HACK: The following 'if' statement is a hack to fix the custom

1615 # breakpoint index in the case of either: (a) substrings that were

1616 # f-strings but will have the 'f' prefix removed OR (b) substrings

1617 # that were not f-strings but will now become f-strings because of

1618 # redundant use of the 'f' prefix (i.e. none of the substrings

1619 # contain f-expressions but one or more of them had the 'f' prefix

1620 # anyway; in which case, we will prepend 'f' to _all_ substrings).

1621 #

1622 # There is probably a better way to accomplish what is being done

1623 # here...

1624 #

1625 # If this substring is an f-string, we _could_ remove the 'f'

1626 # prefix, and the current custom split did NOT originally use a

1627 # prefix...

1628 if (

1629 use_custom_breakpoints

1630 and not csplit.has_prefix

1631 and (

1632 # `next_value == prefix + QUOTE` happens when the custom

1633 # split is an empty string.

1634 next_value == prefix + QUOTE

1635 or next_value != self._normalize_f_string(next_value, prefix)

1636 )

1637 ):

1638 # Then `csplit.break_idx` will be off by one after removing

1639 # the 'f' prefix.

1640 break_idx += 1

1641 next_value = rest_value[:break_idx] + QUOTE

1642

1643 if drop_pointless_f_prefix:

1644 next_value = self._normalize_f_string(next_value, prefix)

1645

1646 # --- Construct `next_leaf`

1647 next_leaf = Leaf(token.STRING, next_value)

1648 insert_str_child(next_leaf)

1649 self._maybe_normalize_string_quotes(next_leaf)

1650

1651 # --- Construct `next_line`

1652 next_line = line.clone()

1653 maybe_append_string_operators(next_line)

1654 next_line.append(next_leaf)

1655 string_line_results.append(Ok(next_line))

1656

1657 rest_value = prefix + QUOTE + rest_value[break_idx:]

1658 first_string_line = False

1659

1660 yield from string_line_results

1661

1662 if drop_pointless_f_prefix:

1663 rest_value = self._normalize_f_string(rest_value, prefix)

1664

1665 rest_leaf = Leaf(token.STRING, rest_value)

1666 insert_str_child(rest_leaf)

1667

1668 # NOTE: I could not find a test case that verifies that the following

1669 # line is actually necessary, but it seems to be. Otherwise we risk

1670 # not normalizing the last substring, right?

1671 self._maybe_normalize_string_quotes(rest_leaf)

1672

1673 last_line = line.clone()

1674 maybe_append_string_operators(last_line)

1675

1676 # If there are any leaves to the right of the target string...

1677 if is_valid_index(string_idx + 1):

1678 # We use `temp_value` here to determine how long the last line

1679 # would be if we were to append all the leaves to the right of the

1680 # target string to the last string line.

1681 temp_value = rest_value

1682 for leaf in LL[string_idx + 1 :]:

1683 temp_value += str(leaf)

1684 if leaf.type == token.LPAR:

1685 break

1686

1687 # Try to fit them all on the same line with the last substring...

1688 if (

1689 str_width(temp_value) <= max_last_string_column()

1690 or LL[string_idx + 1].type == token.COMMA

1691 ):

1692 last_line.append(rest_leaf)

1693 append_leaves(last_line, line, LL[string_idx + 1 :])

1694 yield Ok(last_line)

1695 # Otherwise, place the last substring on one line and everything

1696 # else on a line below that...

1697 else:

1698 last_line.append(rest_leaf)

1699 yield Ok(last_line)

1700

1701 non_string_line = line.clone()

1702 append_leaves(non_string_line, line, LL[string_idx + 1 :])

1703 yield Ok(non_string_line)

1704 # Else the target string was the last leaf...

1705 else:

1706 last_line.append(rest_leaf)

1707 last_line.comments = line.comments.copy()

1708 yield Ok(last_line)

1709

1710 def _iter_nameescape_slices(self, string: str) -> Iterator[tuple[Index, Index]]:

1711 r"""

1712 Yields:

1713 All ranges of @string which, if @string were to be split there,

1714 would result in the splitting of an \N{...} expression (which is NOT

1715 allowed).

1716 """

1717 # True - the previous backslash was unescaped

1718 # False - the previous backslash was escaped *or* there was no backslash

1719 previous_was_unescaped_backslash = False

1720 it = iter(enumerate(string))

1721 for idx, c in it:

1722 if c == "\\":

1723 previous_was_unescaped_backslash = not previous_was_unescaped_backslash

1724 continue

1725 if not previous_was_unescaped_backslash or c != "N":

1726 previous_was_unescaped_backslash = False

1727 continue

1728 previous_was_unescaped_backslash = False

1729

1730 begin = idx - 1 # the position of backslash before \N{...}

1731 for idx, c in it:

1732 if c == "}":

1733 end = idx

1734 break

1735 else:

1736 # malformed nameescape expression?

1737 # should have been detected by AST parsing earlier...

1738 raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")

1739 yield begin, end

1740

1741 def _iter_fexpr_slices(self, string: str) -> Iterator[tuple[Index, Index]]:

1742 """

1743 Yields:

1744 All ranges of @string which, if @string were to be split there,

1745 would result in the splitting of an f-expression (which is NOT

1746 allowed).

1747 """

1748 if "f" not in get_string_prefix(string).lower():

1749 return

1750 yield from iter_fexpr_spans(string)

1751

1752 def _get_illegal_split_indices(self, string: str) -> set[Index]:

1753 illegal_indices: set[Index] = set()

1754 iterators = [

1755 self._iter_fexpr_slices(string),

1756 self._iter_nameescape_slices(string),

1757 ]

1758 for it in iterators:

1759 for begin, end in it:

1760 illegal_indices.update(range(begin, end))

1761 return illegal_indices

1762

1763 def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:

1764 """

1765 This method contains the algorithm that StringSplitter uses to

1766 determine which character to split each string at.

1767

1768 Args:

1769 @string: The substring that we are attempting to split.

1770 @max_break_idx: The ideal break index. We will return this value if it

1771 meets all the necessary conditions. In the likely event that it

1772 doesn't we will try to find the closest index BELOW @max_break_idx

1773 that does. If that fails, we will expand our search by also

1774 considering all valid indices ABOVE @max_break_idx.

1775

1776 Pre-Conditions:

1777 * assert_is_leaf_string(@string)

1778 * 0 <= @max_break_idx < len(@string)

1779

1780 Returns:

1781 break_idx, if an index is able to be found that meets all of the

1782 conditions listed in the 'Transformations' section of this classes'

1783 docstring.

1784 OR

1785 None, otherwise.

1786 """

1787 is_valid_index = is_valid_index_factory(string)

1788

1789 assert is_valid_index(max_break_idx)

1790 assert_is_leaf_string(string)

1791

1792 _illegal_split_indices = self._get_illegal_split_indices(string)

1793

1794 def breaks_unsplittable_expression(i: Index) -> bool:

1795 """

1796 Returns:

1797 True iff returning @i would result in the splitting of an

1798 unsplittable expression (which is NOT allowed).

1799 """

1800 return i in _illegal_split_indices

1801

1802 def passes_all_checks(i: Index) -> bool:

1803 """

1804 Returns:

1805 True iff ALL of the conditions listed in the 'Transformations'

1806 section of this classes' docstring would be met by returning @i.

1807 """

1808 is_space = string[i] == " "

1809 is_split_safe = is_valid_index(i - 1) and string[i - 1] in SPLIT_SAFE_CHARS

1810

1811 is_not_escaped = True

1812 j = i - 1

1813 while is_valid_index(j) and string[j] == "\\":

1814 is_not_escaped = not is_not_escaped

1815 j -= 1

1816

1817 is_big_enough = (

1818 len(string[i:]) >= self.MIN_SUBSTR_SIZE

1819 and len(string[:i]) >= self.MIN_SUBSTR_SIZE

1820 )

1821 return (

1822 (is_space or is_split_safe)

1823 and is_not_escaped

1824 and is_big_enough

1825 and not breaks_unsplittable_expression(i)

1826 )

1827

1828 # First, we check all indices BELOW @max_break_idx.

1829 break_idx = max_break_idx

1830 while is_valid_index(break_idx - 1) and not passes_all_checks(break_idx):

1831 break_idx -= 1

1832

1833 if not passes_all_checks(break_idx):

1834 # If that fails, we check all indices ABOVE @max_break_idx.

1835 #

1836 # If we are able to find a valid index here, the next line is going

1837 # to be longer than the specified line length, but it's probably

1838 # better than doing nothing at all.

1839 break_idx = max_break_idx + 1

1840 while is_valid_index(break_idx + 1) and not passes_all_checks(break_idx):

1841 break_idx += 1

1842

1843 if not is_valid_index(break_idx) or not passes_all_checks(break_idx):

1844 return None

1845

1846 return break_idx

1847

1848 def _maybe_normalize_string_quotes(self, leaf: Leaf) -> None:

1849 if self.normalize_strings:

1850 leaf.value = normalize_string_quotes(leaf.value)

1851

1852 def _normalize_f_string(self, string: str, prefix: str) -> str:

1853 """

1854 Pre-Conditions:

1855 * assert_is_leaf_string(@string)

1856

1857 Returns:

1858 * If @string is an f-string that contains no f-expressions, we

1859 return a string identical to @string except that the 'f' prefix

1860 has been stripped and all double braces (i.e. '{{' or '}}') have

1861 been normalized (i.e. turned into '{' or '}').

1862 OR

1863 * Otherwise, we return @string.

1864 """

1865 assert_is_leaf_string(string)

1866

1867 if "f" in prefix and not fstring_contains_expr(string):

1868 new_prefix = prefix.replace("f", "")

1869

1870 temp = string[len(prefix) :]

1871 temp = re.sub(r"\{\{", "{", temp)

1872 temp = re.sub(r"\}\}", "}", temp)

1873 new_string = temp

1874

1875 return f"{new_prefix}{new_string}"

1876 else:

1877 return string

1878

1879 def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> list[Leaf]:

1880 LL = list(leaves)

1881

1882 string_op_leaves = []

1883 i = 0

1884 while LL[i].type in self.STRING_OPERATORS + [token.NAME]:

1885 prefix_leaf = Leaf(LL[i].type, str(LL[i]).strip())

1886 string_op_leaves.append(prefix_leaf)

1887 i += 1

1888 return string_op_leaves

1889

1890

1891class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin):

1892 """

1893 StringTransformer that wraps strings in parens and then splits at the LPAR.

1894

1895 Requirements:

1896 All of the requirements listed in BaseStringSplitter's docstring in

1897 addition to the requirements listed below:

1898

1899 * The line is a return/yield statement, which returns/yields a string.

1900 OR

1901 * The line is part of a ternary expression (e.g. `x = y if cond else

1902 z`) such that the line starts with `else <string>`, where <string> is

1903 some string.

1904 OR

1905 * The line is an assert statement, which ends with a string.

1906 OR

1907 * The line is an assignment statement (e.g. `x = <string>` or `x +=

1908 <string>`) such that the variable is being assigned the value of some

1909 string.

1910 OR

1911 * The line is a dictionary key assignment where some valid key is being

1912 assigned the value of some string.

1913 OR

1914 * The line is an lambda expression and the value is a string.

1915 OR

1916 * The line starts with an "atom" string that prefers to be wrapped in

1917 parens. It's preferred to be wrapped when it's is an immediate child of

1918 a list/set/tuple literal, AND the string is surrounded by commas (or is

1919 the first/last child).

1920

1921 Transformations:

1922 The chosen string is wrapped in parentheses and then split at the LPAR.

1923

1924 We then have one line which ends with an LPAR and another line that

1925 starts with the chosen string. The latter line is then split again at

1926 the RPAR. This results in the RPAR (and possibly a trailing comma)

1927 being placed on its own line.

1928

1929 NOTE: If any leaves exist to the right of the chosen string (except

1930 for a trailing comma, which would be placed after the RPAR), those

1931 leaves are placed inside the parentheses. In effect, the chosen

1932 string is not necessarily being "wrapped" by parentheses. We can,

1933 however, count on the LPAR being placed directly before the chosen

1934 string.

1935

1936 In other words, StringParenWrapper creates "atom" strings. These

1937 can then be split again by StringSplitter, if necessary.

1938

1939 Collaborations:

1940 In the event that a string line split by StringParenWrapper is

1941 changed such that it no longer needs to be given its own line,

1942 StringParenWrapper relies on StringParenStripper to clean up the

1943 parentheses it created.

1944

1945 For "atom" strings that prefers to be wrapped in parens, it requires

1946 StringSplitter to hold the split until the string is wrapped in parens.

1947 """

1948

1949 def do_splitter_match(self, line: Line) -> TMatchResult:

1950 LL = line.leaves

1951

1952 if line.leaves[-1].type in OPENING_BRACKETS:

1953 return TErr(

1954 "Cannot wrap parens around a line that ends in an opening bracket."

1955 )

1956

1957 string_idx = (

1958 self._return_match(LL)

1959 or self._else_match(LL)

1960 or self._assert_match(LL)

1961 or self._assign_match(LL)

1962 or self._dict_or_lambda_match(LL)

1963 or self._prefer_paren_wrap_match(LL)

1964 )

1965

1966 if string_idx is not None:

1967 string_value = line.leaves[string_idx].value

1968 # If the string has neither spaces nor East Asian stops...

1969 if not any(

1970 char == " " or char in SPLIT_SAFE_CHARS for char in string_value

1971 ):

1972 # And will still violate the line length limit when split...

1973 max_string_width = self.line_length - ((line.depth + 1) * 4)

1974 if str_width(string_value) > max_string_width:

1975 # And has no associated custom splits...

1976 if not self.has_custom_splits(string_value):

1977 # Then we should NOT put this string on its own line.

1978 return TErr(

1979 "We do not wrap long strings in parentheses when the"

1980 " resultant line would still be over the specified line"

1981 " length and can't be split further by StringSplitter."

1982 )

1983 return Ok([string_idx])

1984

1985 return TErr("This line does not contain any non-atomic strings.")

1986

1987 @staticmethod

1988 def _return_match(LL: list[Leaf]) -> Optional[int]:

1989 """

1990 Returns:

1991 string_idx such that @LL[string_idx] is equal to our target (i.e.

1992 matched) string, if this line matches the return/yield statement

1993 requirements listed in the 'Requirements' section of this classes'

1994 docstring.

1995 OR

1996 None, otherwise.

1997 """

1998 # If this line is a part of a return/yield statement and the first leaf

1999 # contains either the "return" or "yield" keywords...

2000 if parent_type(LL[0]) in [syms.return_stmt, syms.yield_expr] and LL[

2001 0

2002 ].value in ["return", "yield"]:

2003 is_valid_index = is_valid_index_factory(LL)

2004

2005 idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1

2006 # The next visible leaf MUST contain a string...

2007 if is_valid_index(idx) and LL[idx].type == token.STRING:

2008 return idx

2009

2010 return None

2011

2012 @staticmethod

2013 def _else_match(LL: list[Leaf]) -> Optional[int]:

2014 """

2015 Returns:

2016 string_idx such that @LL[string_idx] is equal to our target (i.e.

2017 matched) string, if this line matches the ternary expression

2018 requirements listed in the 'Requirements' section of this classes'

2019 docstring.

2020 OR

2021 None, otherwise.

2022 """

2023 # If this line is a part of a ternary expression and the first leaf

2024 # contains the "else" keyword...

2025 if (

2026 parent_type(LL[0]) == syms.test

2027 and LL[0].type == token.NAME

2028 and LL[0].value == "else"

2029 ):

2030 is_valid_index = is_valid_index_factory(LL)

2031

2032 idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1

2033 # The next visible leaf MUST contain a string...

2034 if is_valid_index(idx) and LL[idx].type == token.STRING:

2035 return idx

2036

2037 return None

2038

2039 @staticmethod

2040 def _assert_match(LL: list[Leaf]) -> Optional[int]:

2041 """

2042 Returns:

2043 string_idx such that @LL[string_idx] is equal to our target (i.e.

2044 matched) string, if this line matches the assert statement

2045 requirements listed in the 'Requirements' section of this classes'

2046 docstring.

2047 OR

2048 None, otherwise.

2049 """

2050 # If this line is a part of an assert statement and the first leaf

2051 # contains the "assert" keyword...

2052 if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert":

2053 is_valid_index = is_valid_index_factory(LL)

2054

2055 for i, leaf in enumerate(LL):

2056 # We MUST find a comma...

2057 if leaf.type == token.COMMA:

2058 idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1

2059

2060 # That comma MUST be followed by a string...

2061 if is_valid_index(idx) and LL[idx].type == token.STRING:

2062 string_idx = idx

2063

2064 # Skip the string trailer, if one exists.

2065 string_parser = StringParser()

2066 idx = string_parser.parse(LL, string_idx)

2067

2068 # But no more leaves are allowed...

2069 if not is_valid_index(idx):

2070 return string_idx

2071

2072 return None

2073

2074 @staticmethod

2075 def _assign_match(LL: list[Leaf]) -> Optional[int]:

2076 """

2077 Returns:

2078 string_idx such that @LL[string_idx] is equal to our target (i.e.

2079 matched) string, if this line matches the assignment statement

2080 requirements listed in the 'Requirements' section of this classes'

2081 docstring.

2082 OR

2083 None, otherwise.

2084 """

2085 # If this line is a part of an expression statement or is a function

2086 # argument AND the first leaf contains a variable name...

2087 if (

2088 parent_type(LL[0]) in [syms.expr_stmt, syms.argument, syms.power]

2089 and LL[0].type == token.NAME

2090 ):

2091 is_valid_index = is_valid_index_factory(LL)

2092

2093 for i, leaf in enumerate(LL):

2094 # We MUST find either an '=' or '+=' symbol...

2095 if leaf.type in [token.EQUAL, token.PLUSEQUAL]:

2096 idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1

2097

2098 # That symbol MUST be followed by a string...

2099 if is_valid_index(idx) and LL[idx].type == token.STRING:

2100 string_idx = idx

2101

2102 # Skip the string trailer, if one exists.

2103 string_parser = StringParser()

2104 idx = string_parser.parse(LL, string_idx)

2105

2106 # The next leaf MAY be a comma iff this line is a part

2107 # of a function argument...

2108 if (

2109 parent_type(LL[0]) == syms.argument

2110 and is_valid_index(idx)

2111 and LL[idx].type == token.COMMA

2112 ):

2113 idx += 1

2114

2115 # But no more leaves are allowed...

2116 if not is_valid_index(idx):

2117 return string_idx

2118

2119 return None

2120

2121 @staticmethod

2122 def _dict_or_lambda_match(LL: list[Leaf]) -> Optional[int]:

2123 """

2124 Returns:

2125 string_idx such that @LL[string_idx] is equal to our target (i.e.

2126 matched) string, if this line matches the dictionary key assignment

2127 statement or lambda expression requirements listed in the

2128 'Requirements' section of this classes' docstring.

2129 OR

2130 None, otherwise.

2131 """

2132 # If this line is a part of a dictionary key assignment or lambda expression...

2133 parent_types = [parent_type(LL[0]), parent_type(LL[0].parent)]

2134 if syms.dictsetmaker in parent_types or syms.lambdef in parent_types:

2135 is_valid_index = is_valid_index_factory(LL)

2136

2137 for i, leaf in enumerate(LL):

2138 # We MUST find a colon, it can either be dict's or lambda's colon...

2139 if leaf.type == token.COLON and i < len(LL) - 1:

2140 idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1

2141

2142 # That colon MUST be followed by a string...

2143 if is_valid_index(idx) and LL[idx].type == token.STRING:

2144 string_idx = idx

2145

2146 # Skip the string trailer, if one exists.

2147 string_parser = StringParser()

2148 idx = string_parser.parse(LL, string_idx)

2149

2150 # That string MAY be followed by a comma...

2151 if is_valid_index(idx) and LL[idx].type == token.COMMA:

2152 idx += 1

2153

2154 # But no more leaves are allowed...

2155 if not is_valid_index(idx):

2156 return string_idx

2157

2158 return None

2159

2160 def do_transform(

2161 self, line: Line, string_indices: list[int]

2162 ) -> Iterator[TResult[Line]]:

2163 LL = line.leaves

2164 assert len(string_indices) == 1, (

2165 f"{self.__class__.__name__} should only find one match at a time, found"

2166 f" {len(string_indices)}"

2167 )

2168 string_idx = string_indices[0]

2169

2170 is_valid_index = is_valid_index_factory(LL)

2171 insert_str_child = insert_str_child_factory(LL[string_idx])

2172

2173 comma_idx = -1

2174 ends_with_comma = False

2175 if LL[comma_idx].type == token.COMMA:

2176 ends_with_comma = True

2177

2178 leaves_to_steal_comments_from = [LL[string_idx]]

2179 if ends_with_comma:

2180 leaves_to_steal_comments_from.append(LL[comma_idx])

2181

2182 # --- First Line

2183 first_line = line.clone()

2184 left_leaves = LL[:string_idx]

2185

2186 # We have to remember to account for (possibly invisible) LPAR and RPAR

2187 # leaves that already wrapped the target string. If these leaves do

2188 # exist, we will replace them with our own LPAR and RPAR leaves.

2189 old_parens_exist = False

2190 if left_leaves and left_leaves[-1].type == token.LPAR:

2191 old_parens_exist = True

2192 leaves_to_steal_comments_from.append(left_leaves[-1])

2193 left_leaves.pop()

2194

2195 append_leaves(first_line, line, left_leaves)

2196

2197 lpar_leaf = Leaf(token.LPAR, "(")

2198 if old_parens_exist:

2199 replace_child(LL[string_idx - 1], lpar_leaf)

2200 else:

2201 insert_str_child(lpar_leaf)

2202 first_line.append(lpar_leaf)

2203

2204 # We throw inline comments that were originally to the right of the

2205 # target string to the top line. They will now be shown to the right of

2206 # the LPAR.

2207 for leaf in leaves_to_steal_comments_from:

2208 for comment_leaf in line.comments_after(leaf):

2209 first_line.append(comment_leaf, preformatted=True)

2210

2211 yield Ok(first_line)

2212

2213 # --- Middle (String) Line

2214 # We only need to yield one (possibly too long) string line, since the

2215 # `StringSplitter` will break it down further if necessary.

2216 string_value = LL[string_idx].value

2217 string_line = Line(

2218 mode=line.mode,

2219 depth=line.depth + 1,

2220 inside_brackets=True,

2221 should_split_rhs=line.should_split_rhs,

2222 magic_trailing_comma=line.magic_trailing_comma,

2223 )

2224 string_leaf = Leaf(token.STRING, string_value)

2225 insert_str_child(string_leaf)

2226 string_line.append(string_leaf)

2227

2228 old_rpar_leaf = None

2229 if is_valid_index(string_idx + 1):

2230 right_leaves = LL[string_idx + 1 :]

2231 if ends_with_comma:

2232 right_leaves.pop()

2233

2234 if old_parens_exist:

2235 assert right_leaves and right_leaves[-1].type == token.RPAR, (

2236 "Apparently, old parentheses do NOT exist?!"

2237 f" (left_leaves={left_leaves}, right_leaves={right_leaves})"

2238 )

2239 old_rpar_leaf = right_leaves.pop()

2240 elif right_leaves and right_leaves[-1].type == token.RPAR:

2241 # Special case for lambda expressions as dict's value, e.g.:

2242 # my_dict = {

2243 # "key": lambda x: f"formatted: {x}",

2244 # }

2245 # After wrapping the dict's value with parentheses, the string is

2246 # followed by a RPAR but its opening bracket is lambda's, not

2247 # the string's:

2248 # "key": (lambda x: f"formatted: {x}"),

2249 opening_bracket = right_leaves[-1].opening_bracket

2250 if opening_bracket is not None and opening_bracket in left_leaves:

2251 index = left_leaves.index(opening_bracket)

2252 if (

2253 0 < index < len(left_leaves) - 1

2254 and left_leaves[index - 1].type == token.COLON

2255 and left_leaves[index + 1].value == "lambda"

2256 ):

2257 right_leaves.pop()

2258

2259 append_leaves(string_line, line, right_leaves)

2260

2261 yield Ok(string_line)

2262

2263 # --- Last Line

2264 last_line = line.clone()

2265 last_line.bracket_tracker = first_line.bracket_tracker

2266

2267 new_rpar_leaf = Leaf(token.RPAR, ")")

2268 if old_rpar_leaf is not None:

2269 replace_child(old_rpar_leaf, new_rpar_leaf)

2270 else:

2271 insert_str_child(new_rpar_leaf)

2272 last_line.append(new_rpar_leaf)

2273

2274 # If the target string ended with a comma, we place this comma to the

2275 # right of the RPAR on the last line.

2276 if ends_with_comma:

2277 comma_leaf = Leaf(token.COMMA, ",")

2278 replace_child(LL[comma_idx], comma_leaf)

2279 last_line.append(comma_leaf)

2280

2281 yield Ok(last_line)

2282

2283

2284class StringParser:

2285 """

2286 A state machine that aids in parsing a string's "trailer", which can be

2287 either non-existent, an old-style formatting sequence (e.g. `% varX` or `%

2288 (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX,

2289 varY)`).

2290

2291 NOTE: A new StringParser object MUST be instantiated for each string

2292 trailer we need to parse.

2293

2294 Examples:

2295 We shall assume that `line` equals the `Line` object that corresponds

2296 to the following line of python code:

2297 ```

2298 x = "Some {}.".format("String") + some_other_string

2299 ```

2300

2301 Furthermore, we will assume that `string_idx` is some index such that:

2302 ```

2303 assert line.leaves[string_idx].value == "Some {}."

2304 ```

2305

2306 The following code snippet then holds:

2307 ```

2308 string_parser = StringParser()

2309 idx = string_parser.parse(line.leaves, string_idx)

2310 assert line.leaves[idx].type == token.PLUS

2311 ```

2312 """

2313

2314 DEFAULT_TOKEN: Final = 20210605

2315

2316 # String Parser States

2317 START: Final = 1

2318 DOT: Final = 2

2319 NAME: Final = 3

2320 PERCENT: Final = 4

2321 SINGLE_FMT_ARG: Final = 5

2322 LPAR: Final = 6

2323 RPAR: Final = 7

2324 DONE: Final = 8

2325

2326 # Lookup Table for Next State

2327 _goto: Final[dict[tuple[ParserState, NodeType], ParserState]] = {

2328 # A string trailer may start with '.' OR '%'.

2329 (START, token.DOT): DOT,

2330 (START, token.PERCENT): PERCENT,

2331 (START, DEFAULT_TOKEN): DONE,

2332 # A '.' MUST be followed by an attribute or method name.

2333 (DOT, token.NAME): NAME,

2334 # A method name MUST be followed by an '(', whereas an attribute name

2335 # is the last symbol in the string trailer.

2336 (NAME, token.LPAR): LPAR,

2337 (NAME, DEFAULT_TOKEN): DONE,

2338 # A '%' symbol can be followed by an '(' or a single argument (e.g. a

2339 # string or variable name).

2340 (PERCENT, token.LPAR): LPAR,

2341 (PERCENT, DEFAULT_TOKEN): SINGLE_FMT_ARG,

2342 # If a '%' symbol is followed by a single argument, that argument is

2343 # the last leaf in the string trailer.

2344 (SINGLE_FMT_ARG, DEFAULT_TOKEN): DONE,

2345 # If present, a ')' symbol is the last symbol in a string trailer.

2346 # (NOTE: LPARS and nested RPARS are not included in this lookup table,

2347 # since they are treated as a special case by the parsing logic in this

2348 # classes' implementation.)

2349 (RPAR, DEFAULT_TOKEN): DONE,

2350 }

2351

2352 def __init__(self) -> None:

2353 self._state = self.START

2354 self._unmatched_lpars = 0

2355

2356 def parse(self, leaves: list[Leaf], string_idx: int) -> int:

2357 """

2358 Pre-conditions:

2359 * @leaves[@string_idx].type == token.STRING

2360

2361 Returns:

2362 The index directly after the last leaf which is a part of the string

2363 trailer, if a "trailer" exists.

2364 OR

2365 @string_idx + 1, if no string "trailer" exists.

2366 """

2367 assert leaves[string_idx].type == token.STRING

2368

2369 idx = string_idx + 1

2370 while idx < len(leaves) and self._next_state(leaves[idx]):

2371 idx += 1

2372 return idx

2373

2374 def _next_state(self, leaf: Leaf) -> bool:

2375 """

2376 Pre-conditions:

2377 * On the first call to this function, @leaf MUST be the leaf that

2378 was directly after the string leaf in question (e.g. if our target

2379 string is `line.leaves[i]` then the first call to this method must

2380 be `line.leaves[i + 1]`).

2381 * On the next call to this function, the leaf parameter passed in

2382 MUST be the leaf directly following @leaf.

2383

2384 Returns:

2385 True iff @leaf is a part of the string's trailer.

2386 """

2387 # We ignore empty LPAR or RPAR leaves.

2388 if is_empty_par(leaf):

2389 return True

2390

2391 next_token = leaf.type

2392 if next_token == token.LPAR:

2393 self._unmatched_lpars += 1

2394

2395 current_state = self._state

2396

2397 # The LPAR parser state is a special case. We will return True until we

2398 # find the matching RPAR token.

2399 if current_state == self.LPAR:

2400 if next_token == token.RPAR:

2401 self._unmatched_lpars -= 1

2402 if self._unmatched_lpars == 0:

2403 self._state = self.RPAR

2404 # Otherwise, we use a lookup table to determine the next state.

2405 else:

2406 # If the lookup table matches the current state to the next

2407 # token, we use the lookup table.

2408 if (current_state, next_token) in self._goto:

2409 self._state = self._goto[current_state, next_token]

2410 else:

2411 # Otherwise, we check if a the current state was assigned a

2412 # default.

2413 if (current_state, self.DEFAULT_TOKEN) in self._goto:

2414 self._state = self._goto[current_state, self.DEFAULT_TOKEN]

2415 # If no default has been assigned, then this parser has a logic

2416 # error.

2417 else:

2418 raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")

2419

2420 if self._state == self.DONE:

2421 return False

2422

2423 return True

2424

2425

2426def insert_str_child_factory(string_leaf: Leaf) -> Callable[[LN], None]:

2427 """

2428 Factory for a convenience function that is used to orphan @string_leaf

2429 and then insert multiple new leaves into the same part of the node

2430 structure that @string_leaf had originally occupied.

2431

2432 Examples:

2433 Let `string_leaf = Leaf(token.STRING, '"foo"')` and `N =

2434 string_leaf.parent`. Assume the node `N` has the following

2435 original structure:

2436

2437 Node(

2438 expr_stmt, [

2439 Leaf(NAME, 'x'),

2440 Leaf(EQUAL, '='),

2441 Leaf(STRING, '"foo"'),

2442 ]

2443 )

2444

2445 We then run the code snippet shown below.

2446 ```

2447 insert_str_child = insert_str_child_factory(string_leaf)

2448

2449 lpar = Leaf(token.LPAR, '(')

2450 insert_str_child(lpar)

2451

2452 bar = Leaf(token.STRING, '"bar"')

2453 insert_str_child(bar)

2454

2455 rpar = Leaf(token.RPAR, ')')

2456 insert_str_child(rpar)

2457 ```

2458

2459 After which point, it follows that `string_leaf.parent is None` and

2460 the node `N` now has the following structure:

2461

2462 Node(

2463 expr_stmt, [

2464 Leaf(NAME, 'x'),

2465 Leaf(EQUAL, '='),

2466 Leaf(LPAR, '('),

2467 Leaf(STRING, '"bar"'),

2468 Leaf(RPAR, ')'),

2469 ]

2470 )

2471 """

2472 string_parent = string_leaf.parent

2473 string_child_idx = string_leaf.remove()

2474

2475 def insert_str_child(child: LN) -> None:

2476 nonlocal string_child_idx

2477

2478 assert string_parent is not None

2479 assert string_child_idx is not None

2480

2481 string_parent.insert_child(string_child_idx, child)

2482 string_child_idx += 1

2483

2484 return insert_str_child

2485

2486

2487def is_valid_index_factory(seq: Sequence[Any]) -> Callable[[int], bool]:

2488 """

2489 Examples:

2490 ```

2491 my_list = [1, 2, 3]

2492

2493 is_valid_index = is_valid_index_factory(my_list)

2494

2495 assert is_valid_index(0)

2496 assert is_valid_index(2)

2497

2498 assert not is_valid_index(3)

2499 assert not is_valid_index(-1)

2500 ```

2501 """

2502

2503 def is_valid_index(idx: int) -> bool:

2504 """

2505 Returns:

2506 True iff @idx is positive AND seq[@idx] does NOT raise an

2507 IndexError.

2508 """

2509 return 0 <= idx < len(seq)

2510

2511 return is_valid_index