Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/black/trans.py: 13%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2String transformers that can split and merge strings.
3"""
5import re
6from abc import ABC, abstractmethod
7from collections import defaultdict
8from collections.abc import Callable, Collection, Iterable, Iterator, Sequence
9from dataclasses import dataclass
10from typing import Any, ClassVar, Final, Literal, Optional, TypeVar, Union
12from mypy_extensions import trait
14from black.comments import contains_pragma_comment
15from black.lines import Line, append_leaves
16from black.mode import Feature, Mode
17from black.nodes import (
18 CLOSING_BRACKETS,
19 OPENING_BRACKETS,
20 STANDALONE_COMMENT,
21 is_empty_lpar,
22 is_empty_par,
23 is_empty_rpar,
24 is_part_of_annotation,
25 parent_type,
26 replace_child,
27 syms,
28)
29from black.rusty import Err, Ok, Result
30from black.strings import (
31 assert_is_leaf_string,
32 count_chars_in_width,
33 get_string_prefix,
34 has_triple_quotes,
35 normalize_string_quotes,
36 str_width,
37)
38from blib2to3.pgen2 import token
39from blib2to3.pytree import Leaf, Node
42class CannotTransform(Exception):
43 """Base class for errors raised by Transformers."""
46# types
47T = TypeVar("T")
48LN = Union[Leaf, Node]
49Transformer = Callable[[Line, Collection[Feature], Mode], Iterator[Line]]
50Index = int
51NodeType = int
52ParserState = int
53StringID = int
54TResult = Result[T, CannotTransform] # (T)ransform Result
55TMatchResult = TResult[list[Index]]
57SPLIT_SAFE_CHARS = frozenset(["\u3001", "\u3002", "\uff0c"]) # East Asian stops
60def TErr(err_msg: str) -> Err[CannotTransform]:
61 """(T)ransform Err
63 Convenience function used when working with the TResult type.
64 """
65 cant_transform = CannotTransform(err_msg)
66 return Err(cant_transform)
69def hug_power_op(
70 line: Line, features: Collection[Feature], mode: Mode
71) -> Iterator[Line]:
72 """A transformer which normalizes spacing around power operators."""
74 # Performance optimization to avoid unnecessary Leaf clones and other ops.
75 for leaf in line.leaves:
76 if leaf.type == token.DOUBLESTAR:
77 break
78 else:
79 raise CannotTransform("No doublestar token was found in the line.")
81 def is_simple_lookup(index: int, kind: Literal[1, -1]) -> bool:
82 # Brackets and parentheses indicate calls, subscripts, etc. ...
83 # basically stuff that doesn't count as "simple". Only a NAME lookup
84 # or dotted lookup (eg. NAME.NAME) is OK.
85 if kind == -1:
86 return handle_is_simple_look_up_prev(line, index, {token.RPAR, token.RSQB})
87 else:
88 return handle_is_simple_lookup_forward(
89 line, index, {token.LPAR, token.LSQB}
90 )
92 def is_simple_operand(index: int, kind: Literal[1, -1]) -> bool:
93 # An operand is considered "simple" if's a NAME, a numeric CONSTANT, a simple
94 # lookup (see above), with or without a preceding unary operator.
95 start = line.leaves[index]
96 if start.type in {token.NAME, token.NUMBER}:
97 return is_simple_lookup(index, kind)
99 if start.type in {token.PLUS, token.MINUS, token.TILDE}:
100 if line.leaves[index + 1].type in {token.NAME, token.NUMBER}:
101 # kind is always one as bases with a preceding unary op will be checked
102 # for simplicity starting from the next token (so it'll hit the check
103 # above).
104 return is_simple_lookup(index + 1, kind=1)
106 return False
108 new_line = line.clone()
109 should_hug = False
110 for idx, leaf in enumerate(line.leaves):
111 new_leaf = leaf.clone()
112 if should_hug:
113 new_leaf.prefix = ""
114 should_hug = False
116 should_hug = (
117 (0 < idx < len(line.leaves) - 1)
118 and leaf.type == token.DOUBLESTAR
119 and is_simple_operand(idx - 1, kind=-1)
120 and line.leaves[idx - 1].value != "lambda"
121 and is_simple_operand(idx + 1, kind=1)
122 )
123 if should_hug:
124 new_leaf.prefix = ""
126 # We have to be careful to make a new line properly:
127 # - bracket related metadata must be maintained (handled by Line.append)
128 # - comments need to copied over, updating the leaf IDs they're attached to
129 new_line.append(new_leaf, preformatted=True)
130 for comment_leaf in line.comments_after(leaf):
131 new_line.append(comment_leaf, preformatted=True)
133 yield new_line
136def handle_is_simple_look_up_prev(line: Line, index: int, disallowed: set[int]) -> bool:
137 """
138 Handling the determination of is_simple_lookup for the lines prior to the doublestar
139 token. This is required because of the need to isolate the chained expression
140 to determine the bracket or parenthesis belong to the single expression.
141 """
142 contains_disallowed = False
143 chain = []
145 while 0 <= index < len(line.leaves):
146 current = line.leaves[index]
147 chain.append(current)
148 if not contains_disallowed and current.type in disallowed:
149 contains_disallowed = True
150 if not is_expression_chained(chain):
151 return not contains_disallowed
153 index -= 1
155 return True
158def handle_is_simple_lookup_forward(
159 line: Line, index: int, disallowed: set[int]
160) -> bool:
161 """
162 Handling decision is_simple_lookup for the lines behind the doublestar token.
163 This function is simplified to keep consistent with the prior logic and the forward
164 case are more straightforward and do not need to care about chained expressions.
165 """
166 while 0 <= index < len(line.leaves):
167 current = line.leaves[index]
168 if current.type in disallowed:
169 return False
170 if current.type not in {token.NAME, token.DOT} or (
171 current.type == token.NAME and current.value == "for"
172 ):
173 # If the current token isn't disallowed, we'll assume this is simple as
174 # only the disallowed tokens are semantically attached to this lookup
175 # expression we're checking. Also, stop early if we hit the 'for' bit
176 # of a comprehension.
177 return True
179 index += 1
181 return True
184def is_expression_chained(chained_leaves: list[Leaf]) -> bool:
185 """
186 Function to determine if the variable is a chained call.
187 (e.g., foo.lookup, foo().lookup, (foo.lookup())) will be recognized as chained call)
188 """
189 if len(chained_leaves) < 2:
190 return True
192 current_leaf = chained_leaves[-1]
193 past_leaf = chained_leaves[-2]
195 if past_leaf.type == token.NAME:
196 return current_leaf.type in {token.DOT}
197 elif past_leaf.type in {token.RPAR, token.RSQB}:
198 return current_leaf.type in {token.RSQB, token.RPAR}
199 elif past_leaf.type in {token.LPAR, token.LSQB}:
200 return current_leaf.type in {token.NAME, token.LPAR, token.LSQB}
201 else:
202 return False
205class StringTransformer(ABC):
206 """
207 An implementation of the Transformer protocol that relies on its
208 subclasses overriding the template methods `do_match(...)` and
209 `do_transform(...)`.
211 This Transformer works exclusively on strings (for example, by merging
212 or splitting them).
214 The following sections can be found among the docstrings of each concrete
215 StringTransformer subclass.
217 Requirements:
218 Which requirements must be met of the given Line for this
219 StringTransformer to be applied?
221 Transformations:
222 If the given Line meets all of the above requirements, which string
223 transformations can you expect to be applied to it by this
224 StringTransformer?
226 Collaborations:
227 What contractual agreements does this StringTransformer have with other
228 StringTransfomers? Such collaborations should be eliminated/minimized
229 as much as possible.
230 """
232 __name__: Final = "StringTransformer"
234 # Ideally this would be a dataclass, but unfortunately mypyc breaks when used with
235 # `abc.ABC`.
236 def __init__(self, line_length: int, normalize_strings: bool) -> None:
237 self.line_length = line_length
238 self.normalize_strings = normalize_strings
240 @abstractmethod
241 def do_match(self, line: Line) -> TMatchResult:
242 """
243 Returns:
244 * Ok(string_indices) such that for each index, `line.leaves[index]`
245 is our target string if a match was able to be made. For
246 transformers that don't result in more lines (e.g. StringMerger,
247 StringParenStripper), multiple matches and transforms are done at
248 once to reduce the complexity.
249 OR
250 * Err(CannotTransform), if no match could be made.
251 """
253 @abstractmethod
254 def do_transform(
255 self, line: Line, string_indices: list[int]
256 ) -> Iterator[TResult[Line]]:
257 """
258 Yields:
259 * Ok(new_line) where new_line is the new transformed line.
260 OR
261 * Err(CannotTransform) if the transformation failed for some reason. The
262 `do_match(...)` template method should usually be used to reject
263 the form of the given Line, but in some cases it is difficult to
264 know whether or not a Line meets the StringTransformer's
265 requirements until the transformation is already midway.
267 Side Effects:
268 This method should NOT mutate @line directly, but it MAY mutate the
269 Line's underlying Node structure. (WARNING: If the underlying Node
270 structure IS altered, then this method should NOT be allowed to
271 yield an CannotTransform after that point.)
272 """
274 def __call__(
275 self, line: Line, _features: Collection[Feature], _mode: Mode
276 ) -> Iterator[Line]:
277 """
278 StringTransformer instances have a call signature that mirrors that of
279 the Transformer type.
281 Raises:
282 CannotTransform(...) if the concrete StringTransformer class is unable
283 to transform @line.
284 """
285 # Optimization to avoid calling `self.do_match(...)` when the line does
286 # not contain any string.
287 if not any(leaf.type == token.STRING for leaf in line.leaves):
288 raise CannotTransform("There are no strings in this line.")
290 match_result = self.do_match(line)
292 if isinstance(match_result, Err):
293 cant_transform = match_result.err()
294 raise CannotTransform(
295 f"The string transformer {self.__class__.__name__} does not recognize"
296 " this line as one that it can transform."
297 ) from cant_transform
299 string_indices = match_result.ok()
301 for line_result in self.do_transform(line, string_indices):
302 if isinstance(line_result, Err):
303 cant_transform = line_result.err()
304 raise CannotTransform(
305 "StringTransformer failed while attempting to transform string."
306 ) from cant_transform
307 line = line_result.ok()
308 yield line
311@dataclass
312class CustomSplit:
313 """A custom (i.e. manual) string split.
315 A single CustomSplit instance represents a single substring.
317 Examples:
318 Consider the following string:
319 ```
320 "Hi there friend."
321 " This is a custom"
322 f" string {split}."
323 ```
325 This string will correspond to the following three CustomSplit instances:
326 ```
327 CustomSplit(False, 16)
328 CustomSplit(False, 17)
329 CustomSplit(True, 16)
330 ```
331 """
333 has_prefix: bool
334 break_idx: int
337CustomSplitMapKey = tuple[StringID, str]
340@trait
341class CustomSplitMapMixin:
342 """
343 This mixin class is used to map merged strings to a sequence of
344 CustomSplits, which will then be used to re-split the strings iff none of
345 the resultant substrings go over the configured max line length.
346 """
348 _CUSTOM_SPLIT_MAP: ClassVar[dict[CustomSplitMapKey, tuple[CustomSplit, ...]]] = (
349 defaultdict(tuple)
350 )
352 @staticmethod
353 def _get_key(string: str) -> CustomSplitMapKey:
354 """
355 Returns:
356 A unique identifier that is used internally to map @string to a
357 group of custom splits.
358 """
359 return (id(string), string)
361 def add_custom_splits(
362 self, string: str, custom_splits: Iterable[CustomSplit]
363 ) -> None:
364 """Custom Split Map Setter Method
366 Side Effects:
367 Adds a mapping from @string to the custom splits @custom_splits.
368 """
369 key = self._get_key(string)
370 self._CUSTOM_SPLIT_MAP[key] = tuple(custom_splits)
372 def pop_custom_splits(self, string: str) -> list[CustomSplit]:
373 """Custom Split Map Getter Method
375 Returns:
376 * A list of the custom splits that are mapped to @string, if any
377 exist.
378 OR
379 * [], otherwise.
381 Side Effects:
382 Deletes the mapping between @string and its associated custom
383 splits (which are returned to the caller).
384 """
385 key = self._get_key(string)
387 custom_splits = self._CUSTOM_SPLIT_MAP[key]
388 del self._CUSTOM_SPLIT_MAP[key]
390 return list(custom_splits)
392 def has_custom_splits(self, string: str) -> bool:
393 """
394 Returns:
395 True iff @string is associated with a set of custom splits.
396 """
397 key = self._get_key(string)
398 return key in self._CUSTOM_SPLIT_MAP
401class StringMerger(StringTransformer, CustomSplitMapMixin):
402 """StringTransformer that merges strings together.
404 Requirements:
405 (A) The line contains adjacent strings such that ALL of the validation checks
406 listed in StringMerger._validate_msg(...)'s docstring pass.
407 OR
408 (B) The line contains a string which uses line continuation backslashes.
410 Transformations:
411 Depending on which of the two requirements above where met, either:
413 (A) The string group associated with the target string is merged.
414 OR
415 (B) All line-continuation backslashes are removed from the target string.
417 Collaborations:
418 StringMerger provides custom split information to StringSplitter.
419 """
421 def do_match(self, line: Line) -> TMatchResult:
422 LL = line.leaves
424 is_valid_index = is_valid_index_factory(LL)
426 string_indices = []
427 idx = 0
428 while is_valid_index(idx):
429 leaf = LL[idx]
430 if (
431 leaf.type == token.STRING
432 and is_valid_index(idx + 1)
433 and LL[idx + 1].type == token.STRING
434 ):
435 # Let's check if the string group contains an inline comment
436 # If we have a comment inline, we don't merge the strings
437 contains_comment = False
438 i = idx
439 while is_valid_index(i):
440 if LL[i].type != token.STRING:
441 break
442 if line.comments_after(LL[i]):
443 contains_comment = True
444 break
445 i += 1
447 if not contains_comment and not is_part_of_annotation(leaf):
448 string_indices.append(idx)
450 # Advance to the next non-STRING leaf.
451 idx += 2
452 while is_valid_index(idx) and LL[idx].type == token.STRING:
453 idx += 1
455 elif leaf.type == token.STRING and "\\\n" in leaf.value:
456 string_indices.append(idx)
457 # Advance to the next non-STRING leaf.
458 idx += 1
459 while is_valid_index(idx) and LL[idx].type == token.STRING:
460 idx += 1
462 else:
463 idx += 1
465 if string_indices:
466 return Ok(string_indices)
467 else:
468 return TErr("This line has no strings that need merging.")
470 def do_transform(
471 self, line: Line, string_indices: list[int]
472 ) -> Iterator[TResult[Line]]:
473 new_line = line
475 rblc_result = self._remove_backslash_line_continuation_chars(
476 new_line, string_indices
477 )
478 if isinstance(rblc_result, Ok):
479 new_line = rblc_result.ok()
481 msg_result = self._merge_string_group(new_line, string_indices)
482 if isinstance(msg_result, Ok):
483 new_line = msg_result.ok()
485 if isinstance(rblc_result, Err) and isinstance(msg_result, Err):
486 msg_cant_transform = msg_result.err()
487 rblc_cant_transform = rblc_result.err()
488 cant_transform = CannotTransform(
489 "StringMerger failed to merge any strings in this line."
490 )
492 # Chain the errors together using `__cause__`.
493 msg_cant_transform.__cause__ = rblc_cant_transform
494 cant_transform.__cause__ = msg_cant_transform
496 yield Err(cant_transform)
497 else:
498 yield Ok(new_line)
500 @staticmethod
501 def _remove_backslash_line_continuation_chars(
502 line: Line, string_indices: list[int]
503 ) -> TResult[Line]:
504 """
505 Merge strings that were split across multiple lines using
506 line-continuation backslashes.
508 Returns:
509 Ok(new_line), if @line contains backslash line-continuation
510 characters.
511 OR
512 Err(CannotTransform), otherwise.
513 """
514 LL = line.leaves
516 indices_to_transform = []
517 for string_idx in string_indices:
518 string_leaf = LL[string_idx]
519 if (
520 string_leaf.type == token.STRING
521 and "\\\n" in string_leaf.value
522 and not has_triple_quotes(string_leaf.value)
523 ):
524 indices_to_transform.append(string_idx)
526 if not indices_to_transform:
527 return TErr(
528 "Found no string leaves that contain backslash line continuation"
529 " characters."
530 )
532 new_line = line.clone()
533 new_line.comments = line.comments.copy()
534 append_leaves(new_line, line, LL)
536 for string_idx in indices_to_transform:
537 new_string_leaf = new_line.leaves[string_idx]
538 new_string_leaf.value = new_string_leaf.value.replace("\\\n", "")
540 return Ok(new_line)
542 def _merge_string_group(
543 self, line: Line, string_indices: list[int]
544 ) -> TResult[Line]:
545 """
546 Merges string groups (i.e. set of adjacent strings).
548 Each index from `string_indices` designates one string group's first
549 leaf in `line.leaves`.
551 Returns:
552 Ok(new_line), if ALL of the validation checks found in
553 _validate_msg(...) pass.
554 OR
555 Err(CannotTransform), otherwise.
556 """
557 LL = line.leaves
559 is_valid_index = is_valid_index_factory(LL)
561 # A dict of {string_idx: tuple[num_of_strings, string_leaf]}.
562 merged_string_idx_dict: dict[int, tuple[int, Leaf]] = {}
563 for string_idx in string_indices:
564 vresult = self._validate_msg(line, string_idx)
565 if isinstance(vresult, Err):
566 continue
567 merged_string_idx_dict[string_idx] = self._merge_one_string_group(
568 LL, string_idx, is_valid_index
569 )
571 if not merged_string_idx_dict:
572 return TErr("No string group is merged")
574 # Build the final line ('new_line') that this method will later return.
575 new_line = line.clone()
576 previous_merged_string_idx = -1
577 previous_merged_num_of_strings = -1
578 for i, leaf in enumerate(LL):
579 if i in merged_string_idx_dict:
580 previous_merged_string_idx = i
581 previous_merged_num_of_strings, string_leaf = merged_string_idx_dict[i]
582 new_line.append(string_leaf)
584 if (
585 previous_merged_string_idx
586 <= i
587 < previous_merged_string_idx + previous_merged_num_of_strings
588 ):
589 for comment_leaf in line.comments_after(leaf):
590 new_line.append(comment_leaf, preformatted=True)
591 continue
593 append_leaves(new_line, line, [leaf])
595 return Ok(new_line)
597 def _merge_one_string_group(
598 self, LL: list[Leaf], string_idx: int, is_valid_index: Callable[[int], bool]
599 ) -> tuple[int, Leaf]:
600 """
601 Merges one string group where the first string in the group is
602 `LL[string_idx]`.
604 Returns:
605 A tuple of `(num_of_strings, leaf)` where `num_of_strings` is the
606 number of strings merged and `leaf` is the newly merged string
607 to be replaced in the new line.
608 """
609 # If the string group is wrapped inside an Atom node, we must make sure
610 # to later replace that Atom with our new (merged) string leaf.
611 atom_node = LL[string_idx].parent
613 # We will place BREAK_MARK in between every two substrings that we
614 # merge. We will then later go through our final result and use the
615 # various instances of BREAK_MARK we find to add the right values to
616 # the custom split map.
617 BREAK_MARK = "@@@@@ BLACK BREAKPOINT MARKER @@@@@"
619 QUOTE = LL[string_idx].value[-1]
621 def make_naked(string: str, string_prefix: str) -> str:
622 """Strip @string (i.e. make it a "naked" string)
624 Pre-conditions:
625 * assert_is_leaf_string(@string)
627 Returns:
628 A string that is identical to @string except that
629 @string_prefix has been stripped, the surrounding QUOTE
630 characters have been removed, and any remaining QUOTE
631 characters have been escaped.
632 """
633 assert_is_leaf_string(string)
634 if "f" in string_prefix:
635 f_expressions = [
636 string[span[0] + 1 : span[1] - 1] # +-1 to get rid of curly braces
637 for span in iter_fexpr_spans(string)
638 ]
639 debug_expressions_contain_visible_quotes = any(
640 re.search(r".*[\'\"].*(?<![!:=])={1}(?!=)(?![^\s:])", expression)
641 for expression in f_expressions
642 )
643 if not debug_expressions_contain_visible_quotes:
644 # We don't want to toggle visible quotes in debug f-strings, as
645 # that would modify the AST
646 string = _toggle_fexpr_quotes(string, QUOTE)
647 # After quotes toggling, quotes in expressions won't be escaped
648 # because quotes can't be reused in f-strings. So we can simply
649 # let the escaping logic below run without knowing f-string
650 # expressions.
652 RE_EVEN_BACKSLASHES = r"(?:(?<!\\)(?:\\\\)*)"
653 naked_string = string[len(string_prefix) + 1 : -1]
654 naked_string = re.sub(
655 "(" + RE_EVEN_BACKSLASHES + ")" + QUOTE, r"\1\\" + QUOTE, naked_string
656 )
657 return naked_string
659 # Holds the CustomSplit objects that will later be added to the custom
660 # split map.
661 custom_splits = []
663 # Temporary storage for the 'has_prefix' part of the CustomSplit objects.
664 prefix_tracker = []
666 # Sets the 'prefix' variable. This is the prefix that the final merged
667 # string will have.
668 next_str_idx = string_idx
669 prefix = ""
670 while (
671 not prefix
672 and is_valid_index(next_str_idx)
673 and LL[next_str_idx].type == token.STRING
674 ):
675 prefix = get_string_prefix(LL[next_str_idx].value).lower()
676 next_str_idx += 1
678 # The next loop merges the string group. The final string will be
679 # contained in 'S'.
680 #
681 # The following convenience variables are used:
682 #
683 # S: string
684 # NS: naked string
685 # SS: next string
686 # NSS: naked next string
687 S = ""
688 NS = ""
689 num_of_strings = 0
690 next_str_idx = string_idx
691 while is_valid_index(next_str_idx) and LL[next_str_idx].type == token.STRING:
692 num_of_strings += 1
694 SS = LL[next_str_idx].value
695 next_prefix = get_string_prefix(SS).lower()
697 # If this is an f-string group but this substring is not prefixed
698 # with 'f'...
699 if "f" in prefix and "f" not in next_prefix:
700 # Then we must escape any braces contained in this substring.
701 SS = re.sub(r"(\{|\})", r"\1\1", SS)
703 NSS = make_naked(SS, next_prefix)
705 has_prefix = bool(next_prefix)
706 prefix_tracker.append(has_prefix)
708 S = prefix + QUOTE + NS + NSS + BREAK_MARK + QUOTE
709 NS = make_naked(S, prefix)
711 next_str_idx += 1
713 # Take a note on the index of the non-STRING leaf.
714 non_string_idx = next_str_idx
716 S_leaf = Leaf(token.STRING, S)
717 if self.normalize_strings:
718 S_leaf.value = normalize_string_quotes(S_leaf.value)
720 # Fill the 'custom_splits' list with the appropriate CustomSplit objects.
721 temp_string = S_leaf.value[len(prefix) + 1 : -1]
722 for has_prefix in prefix_tracker:
723 mark_idx = temp_string.find(BREAK_MARK)
724 assert (
725 mark_idx >= 0
726 ), "Logic error while filling the custom string breakpoint cache."
728 temp_string = temp_string[mark_idx + len(BREAK_MARK) :]
729 breakpoint_idx = mark_idx + (len(prefix) if has_prefix else 0) + 1
730 custom_splits.append(CustomSplit(has_prefix, breakpoint_idx))
732 string_leaf = Leaf(token.STRING, S_leaf.value.replace(BREAK_MARK, ""))
734 if atom_node is not None:
735 # If not all children of the atom node are merged (this can happen
736 # when there is a standalone comment in the middle) ...
737 if non_string_idx - string_idx < len(atom_node.children):
738 # We need to replace the old STRING leaves with the new string leaf.
739 first_child_idx = LL[string_idx].remove()
740 for idx in range(string_idx + 1, non_string_idx):
741 LL[idx].remove()
742 if first_child_idx is not None:
743 atom_node.insert_child(first_child_idx, string_leaf)
744 else:
745 # Else replace the atom node with the new string leaf.
746 replace_child(atom_node, string_leaf)
748 self.add_custom_splits(string_leaf.value, custom_splits)
749 return num_of_strings, string_leaf
751 @staticmethod
752 def _validate_msg(line: Line, string_idx: int) -> TResult[None]:
753 """Validate (M)erge (S)tring (G)roup
755 Transform-time string validation logic for _merge_string_group(...).
757 Returns:
758 * Ok(None), if ALL validation checks (listed below) pass.
759 OR
760 * Err(CannotTransform), if any of the following are true:
761 - The target string group does not contain ANY stand-alone comments.
762 - The target string is not in a string group (i.e. it has no
763 adjacent strings).
764 - The string group has more than one inline comment.
765 - The string group has an inline comment that appears to be a pragma.
766 - The set of all string prefixes in the string group is of
767 length greater than one and is not equal to {"", "f"}.
768 - The string group consists of raw strings.
769 - The string group would merge f-strings with different quote types
770 and internal quotes.
771 - The string group is stringified type annotations. We don't want to
772 process stringified type annotations since pyright doesn't support
773 them spanning multiple string values. (NOTE: mypy, pytype, pyre do
774 support them, so we can change if pyright also gains support in the
775 future. See https://github.com/microsoft/pyright/issues/4359.)
776 """
777 # We first check for "inner" stand-alone comments (i.e. stand-alone
778 # comments that have a string leaf before them AND after them).
779 for inc in [1, -1]:
780 i = string_idx
781 found_sa_comment = False
782 is_valid_index = is_valid_index_factory(line.leaves)
783 while is_valid_index(i) and line.leaves[i].type in [
784 token.STRING,
785 STANDALONE_COMMENT,
786 ]:
787 if line.leaves[i].type == STANDALONE_COMMENT:
788 found_sa_comment = True
789 elif found_sa_comment:
790 return TErr(
791 "StringMerger does NOT merge string groups which contain "
792 "stand-alone comments."
793 )
795 i += inc
797 QUOTE = line.leaves[string_idx].value[-1]
799 num_of_inline_string_comments = 0
800 set_of_prefixes = set()
801 num_of_strings = 0
802 for leaf in line.leaves[string_idx:]:
803 if leaf.type != token.STRING:
804 # If the string group is trailed by a comma, we count the
805 # comments trailing the comma to be one of the string group's
806 # comments.
807 if leaf.type == token.COMMA and id(leaf) in line.comments:
808 num_of_inline_string_comments += 1
809 break
811 if has_triple_quotes(leaf.value):
812 return TErr("StringMerger does NOT merge multiline strings.")
814 num_of_strings += 1
815 prefix = get_string_prefix(leaf.value).lower()
816 if "r" in prefix:
817 return TErr("StringMerger does NOT merge raw strings.")
819 set_of_prefixes.add(prefix)
821 if (
822 "f" in prefix
823 and leaf.value[-1] != QUOTE
824 and (
825 "'" in leaf.value[len(prefix) + 1 : -1]
826 or '"' in leaf.value[len(prefix) + 1 : -1]
827 )
828 ):
829 return TErr(
830 "StringMerger does NOT merge f-strings with different quote types"
831 " and internal quotes."
832 )
834 if id(leaf) in line.comments:
835 num_of_inline_string_comments += 1
836 if contains_pragma_comment(line.comments[id(leaf)]):
837 return TErr("Cannot merge strings which have pragma comments.")
839 if num_of_strings < 2:
840 return TErr(
841 f"Not enough strings to merge (num_of_strings={num_of_strings})."
842 )
844 if num_of_inline_string_comments > 1:
845 return TErr(
846 f"Too many inline string comments ({num_of_inline_string_comments})."
847 )
849 if len(set_of_prefixes) > 1 and set_of_prefixes != {"", "f"}:
850 return TErr(f"Too many different prefixes ({set_of_prefixes}).")
852 return Ok(None)
855class StringParenStripper(StringTransformer):
856 """StringTransformer that strips surrounding parentheses from strings.
858 Requirements:
859 The line contains a string which is surrounded by parentheses and:
860 - The target string is NOT the only argument to a function call.
861 - The target string is NOT a "pointless" string.
862 - The target string is NOT a dictionary value.
863 - If the target string contains a PERCENT, the brackets are not
864 preceded or followed by an operator with higher precedence than
865 PERCENT.
867 Transformations:
868 The parentheses mentioned in the 'Requirements' section are stripped.
870 Collaborations:
871 StringParenStripper has its own inherent usefulness, but it is also
872 relied on to clean up the parentheses created by StringParenWrapper (in
873 the event that they are no longer needed).
874 """
876 def do_match(self, line: Line) -> TMatchResult:
877 LL = line.leaves
879 is_valid_index = is_valid_index_factory(LL)
881 string_indices = []
883 idx = -1
884 while True:
885 idx += 1
886 if idx >= len(LL):
887 break
888 leaf = LL[idx]
890 # Should be a string...
891 if leaf.type != token.STRING:
892 continue
894 # If this is a "pointless" string...
895 if (
896 leaf.parent
897 and leaf.parent.parent
898 and leaf.parent.parent.type == syms.simple_stmt
899 ):
900 continue
902 # Should be preceded by a non-empty LPAR...
903 if (
904 not is_valid_index(idx - 1)
905 or LL[idx - 1].type != token.LPAR
906 or is_empty_lpar(LL[idx - 1])
907 ):
908 continue
910 # That LPAR should NOT be preceded by a colon (which could be a
911 # dictionary value), function name, or a closing bracket (which
912 # could be a function returning a function or a list/dictionary
913 # containing a function)...
914 if is_valid_index(idx - 2) and (
915 LL[idx - 2].type == token.COLON
916 or LL[idx - 2].type == token.NAME
917 or LL[idx - 2].type in CLOSING_BRACKETS
918 ):
919 continue
921 string_idx = idx
923 # Skip the string trailer, if one exists.
924 string_parser = StringParser()
925 next_idx = string_parser.parse(LL, string_idx)
927 # if the leaves in the parsed string include a PERCENT, we need to
928 # make sure the initial LPAR is NOT preceded by an operator with
929 # higher or equal precedence to PERCENT
930 if is_valid_index(idx - 2):
931 # mypy can't quite follow unless we name this
932 before_lpar = LL[idx - 2]
933 if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and (
934 (
935 before_lpar.type
936 in {
937 token.STAR,
938 token.AT,
939 token.SLASH,
940 token.DOUBLESLASH,
941 token.PERCENT,
942 token.TILDE,
943 token.DOUBLESTAR,
944 token.AWAIT,
945 token.LSQB,
946 token.LPAR,
947 }
948 )
949 or (
950 # only unary PLUS/MINUS
951 before_lpar.parent
952 and before_lpar.parent.type == syms.factor
953 and (before_lpar.type in {token.PLUS, token.MINUS})
954 )
955 ):
956 continue
958 # Should be followed by a non-empty RPAR...
959 if (
960 is_valid_index(next_idx)
961 and LL[next_idx].type == token.RPAR
962 and not is_empty_rpar(LL[next_idx])
963 ):
964 # That RPAR should NOT be followed by anything with higher
965 # precedence than PERCENT
966 if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in {
967 token.DOUBLESTAR,
968 token.LSQB,
969 token.LPAR,
970 token.DOT,
971 }:
972 continue
974 string_indices.append(string_idx)
975 idx = string_idx
976 while idx < len(LL) - 1 and LL[idx + 1].type == token.STRING:
977 idx += 1
979 if string_indices:
980 return Ok(string_indices)
981 return TErr("This line has no strings wrapped in parens.")
983 def do_transform(
984 self, line: Line, string_indices: list[int]
985 ) -> Iterator[TResult[Line]]:
986 LL = line.leaves
988 string_and_rpar_indices: list[int] = []
989 for string_idx in string_indices:
990 string_parser = StringParser()
991 rpar_idx = string_parser.parse(LL, string_idx)
993 should_transform = True
994 for leaf in (LL[string_idx - 1], LL[rpar_idx]):
995 if line.comments_after(leaf):
996 # Should not strip parentheses which have comments attached
997 # to them.
998 should_transform = False
999 break
1000 if should_transform:
1001 string_and_rpar_indices.extend((string_idx, rpar_idx))
1003 if string_and_rpar_indices:
1004 yield Ok(self._transform_to_new_line(line, string_and_rpar_indices))
1005 else:
1006 yield Err(
1007 CannotTransform("All string groups have comments attached to them.")
1008 )
1010 def _transform_to_new_line(
1011 self, line: Line, string_and_rpar_indices: list[int]
1012 ) -> Line:
1013 LL = line.leaves
1015 new_line = line.clone()
1016 new_line.comments = line.comments.copy()
1018 previous_idx = -1
1019 # We need to sort the indices, since string_idx and its matching
1020 # rpar_idx may not come in order, e.g. in
1021 # `("outer" % ("inner".join(items)))`, the "inner" string's
1022 # string_idx is smaller than "outer" string's rpar_idx.
1023 for idx in sorted(string_and_rpar_indices):
1024 leaf = LL[idx]
1025 lpar_or_rpar_idx = idx - 1 if leaf.type == token.STRING else idx
1026 append_leaves(new_line, line, LL[previous_idx + 1 : lpar_or_rpar_idx])
1027 if leaf.type == token.STRING:
1028 string_leaf = Leaf(token.STRING, LL[idx].value)
1029 LL[lpar_or_rpar_idx].remove() # Remove lpar.
1030 replace_child(LL[idx], string_leaf)
1031 new_line.append(string_leaf)
1032 # replace comments
1033 old_comments = new_line.comments.pop(id(LL[idx]), [])
1034 new_line.comments.setdefault(id(string_leaf), []).extend(old_comments)
1035 else:
1036 LL[lpar_or_rpar_idx].remove() # This is a rpar.
1038 previous_idx = idx
1040 # Append the leaves after the last idx:
1041 append_leaves(new_line, line, LL[idx + 1 :])
1043 return new_line
1046class BaseStringSplitter(StringTransformer):
1047 """
1048 Abstract class for StringTransformers which transform a Line's strings by splitting
1049 them or placing them on their own lines where necessary to avoid going over
1050 the configured line length.
1052 Requirements:
1053 * The target string value is responsible for the line going over the
1054 line length limit. It follows that after all of black's other line
1055 split methods have been exhausted, this line (or one of the resulting
1056 lines after all line splits are performed) would still be over the
1057 line_length limit unless we split this string.
1058 AND
1060 * The target string is NOT a "pointless" string (i.e. a string that has
1061 no parent or siblings).
1062 AND
1064 * The target string is not followed by an inline comment that appears
1065 to be a pragma.
1066 AND
1068 * The target string is not a multiline (i.e. triple-quote) string.
1069 """
1071 STRING_OPERATORS: Final = [
1072 token.EQEQUAL,
1073 token.GREATER,
1074 token.GREATEREQUAL,
1075 token.LESS,
1076 token.LESSEQUAL,
1077 token.NOTEQUAL,
1078 token.PERCENT,
1079 token.PLUS,
1080 token.STAR,
1081 ]
1083 @abstractmethod
1084 def do_splitter_match(self, line: Line) -> TMatchResult:
1085 """
1086 BaseStringSplitter asks its clients to override this method instead of
1087 `StringTransformer.do_match(...)`.
1089 Follows the same protocol as `StringTransformer.do_match(...)`.
1091 Refer to `help(StringTransformer.do_match)` for more information.
1092 """
1094 def do_match(self, line: Line) -> TMatchResult:
1095 match_result = self.do_splitter_match(line)
1096 if isinstance(match_result, Err):
1097 return match_result
1099 string_indices = match_result.ok()
1100 assert len(string_indices) == 1, (
1101 f"{self.__class__.__name__} should only find one match at a time, found"
1102 f" {len(string_indices)}"
1103 )
1104 string_idx = string_indices[0]
1105 vresult = self._validate(line, string_idx)
1106 if isinstance(vresult, Err):
1107 return vresult
1109 return match_result
1111 def _validate(self, line: Line, string_idx: int) -> TResult[None]:
1112 """
1113 Checks that @line meets all of the requirements listed in this classes'
1114 docstring. Refer to `help(BaseStringSplitter)` for a detailed
1115 description of those requirements.
1117 Returns:
1118 * Ok(None), if ALL of the requirements are met.
1119 OR
1120 * Err(CannotTransform), if ANY of the requirements are NOT met.
1121 """
1122 LL = line.leaves
1124 string_leaf = LL[string_idx]
1126 max_string_length = self._get_max_string_length(line, string_idx)
1127 if len(string_leaf.value) <= max_string_length:
1128 return TErr(
1129 "The string itself is not what is causing this line to be too long."
1130 )
1132 if not string_leaf.parent or [L.type for L in string_leaf.parent.children] == [
1133 token.STRING,
1134 token.NEWLINE,
1135 ]:
1136 return TErr(
1137 f"This string ({string_leaf.value}) appears to be pointless (i.e. has"
1138 " no parent)."
1139 )
1141 if id(line.leaves[string_idx]) in line.comments and contains_pragma_comment(
1142 line.comments[id(line.leaves[string_idx])]
1143 ):
1144 return TErr(
1145 "Line appears to end with an inline pragma comment. Splitting the line"
1146 " could modify the pragma's behavior."
1147 )
1149 if has_triple_quotes(string_leaf.value):
1150 return TErr("We cannot split multiline strings.")
1152 return Ok(None)
1154 def _get_max_string_length(self, line: Line, string_idx: int) -> int:
1155 """
1156 Calculates the max string length used when attempting to determine
1157 whether or not the target string is responsible for causing the line to
1158 go over the line length limit.
1160 WARNING: This method is tightly coupled to both StringSplitter and
1161 (especially) StringParenWrapper. There is probably a better way to
1162 accomplish what is being done here.
1164 Returns:
1165 max_string_length: such that `line.leaves[string_idx].value >
1166 max_string_length` implies that the target string IS responsible
1167 for causing this line to exceed the line length limit.
1168 """
1169 LL = line.leaves
1171 is_valid_index = is_valid_index_factory(LL)
1173 # We use the shorthand "WMA4" in comments to abbreviate "We must
1174 # account for". When giving examples, we use STRING to mean some/any
1175 # valid string.
1176 #
1177 # Finally, we use the following convenience variables:
1178 #
1179 # P: The leaf that is before the target string leaf.
1180 # N: The leaf that is after the target string leaf.
1181 # NN: The leaf that is after N.
1183 # WMA4 the whitespace at the beginning of the line.
1184 offset = line.depth * 4
1186 if is_valid_index(string_idx - 1):
1187 p_idx = string_idx - 1
1188 if (
1189 LL[string_idx - 1].type == token.LPAR
1190 and LL[string_idx - 1].value == ""
1191 and string_idx >= 2
1192 ):
1193 # If the previous leaf is an empty LPAR placeholder, we should skip it.
1194 p_idx -= 1
1196 P = LL[p_idx]
1197 if P.type in self.STRING_OPERATORS:
1198 # WMA4 a space and a string operator (e.g. `+ STRING` or `== STRING`).
1199 offset += len(str(P)) + 1
1201 if P.type == token.COMMA:
1202 # WMA4 a space, a comma, and a closing bracket [e.g. `), STRING`].
1203 offset += 3
1205 if P.type in [token.COLON, token.EQUAL, token.PLUSEQUAL, token.NAME]:
1206 # This conditional branch is meant to handle dictionary keys,
1207 # variable assignments, 'return STRING' statement lines, and
1208 # 'else STRING' ternary expression lines.
1210 # WMA4 a single space.
1211 offset += 1
1213 # WMA4 the lengths of any leaves that came before that space,
1214 # but after any closing bracket before that space.
1215 for leaf in reversed(LL[: p_idx + 1]):
1216 offset += len(str(leaf))
1217 if leaf.type in CLOSING_BRACKETS:
1218 break
1220 if is_valid_index(string_idx + 1):
1221 N = LL[string_idx + 1]
1222 if N.type == token.RPAR and N.value == "" and len(LL) > string_idx + 2:
1223 # If the next leaf is an empty RPAR placeholder, we should skip it.
1224 N = LL[string_idx + 2]
1226 if N.type == token.COMMA:
1227 # WMA4 a single comma at the end of the string (e.g `STRING,`).
1228 offset += 1
1230 if is_valid_index(string_idx + 2):
1231 NN = LL[string_idx + 2]
1233 if N.type == token.DOT and NN.type == token.NAME:
1234 # This conditional branch is meant to handle method calls invoked
1235 # off of a string literal up to and including the LPAR character.
1237 # WMA4 the '.' character.
1238 offset += 1
1240 if (
1241 is_valid_index(string_idx + 3)
1242 and LL[string_idx + 3].type == token.LPAR
1243 ):
1244 # WMA4 the left parenthesis character.
1245 offset += 1
1247 # WMA4 the length of the method's name.
1248 offset += len(NN.value)
1250 has_comments = False
1251 for comment_leaf in line.comments_after(LL[string_idx]):
1252 if not has_comments:
1253 has_comments = True
1254 # WMA4 two spaces before the '#' character.
1255 offset += 2
1257 # WMA4 the length of the inline comment.
1258 offset += len(comment_leaf.value)
1260 max_string_length = count_chars_in_width(str(line), self.line_length - offset)
1261 return max_string_length
1263 @staticmethod
1264 def _prefer_paren_wrap_match(LL: list[Leaf]) -> Optional[int]:
1265 """
1266 Returns:
1267 string_idx such that @LL[string_idx] is equal to our target (i.e.
1268 matched) string, if this line matches the "prefer paren wrap" statement
1269 requirements listed in the 'Requirements' section of the StringParenWrapper
1270 class's docstring.
1271 OR
1272 None, otherwise.
1273 """
1274 # The line must start with a string.
1275 if LL[0].type != token.STRING:
1276 return None
1278 matching_nodes = [
1279 syms.listmaker,
1280 syms.dictsetmaker,
1281 syms.testlist_gexp,
1282 ]
1283 # If the string is an immediate child of a list/set/tuple literal...
1284 if (
1285 parent_type(LL[0]) in matching_nodes
1286 or parent_type(LL[0].parent) in matching_nodes
1287 ):
1288 # And the string is surrounded by commas (or is the first/last child)...
1289 prev_sibling = LL[0].prev_sibling
1290 next_sibling = LL[0].next_sibling
1291 if (
1292 not prev_sibling
1293 and not next_sibling
1294 and parent_type(LL[0]) == syms.atom
1295 ):
1296 # If it's an atom string, we need to check the parent atom's siblings.
1297 parent = LL[0].parent
1298 assert parent is not None # For type checkers.
1299 prev_sibling = parent.prev_sibling
1300 next_sibling = parent.next_sibling
1301 if (not prev_sibling or prev_sibling.type == token.COMMA) and (
1302 not next_sibling or next_sibling.type == token.COMMA
1303 ):
1304 return 0
1306 return None
1309def iter_fexpr_spans(s: str) -> Iterator[tuple[int, int]]:
1310 """
1311 Yields spans corresponding to expressions in a given f-string.
1312 Spans are half-open ranges (left inclusive, right exclusive).
1313 Assumes the input string is a valid f-string, but will not crash if the input
1314 string is invalid.
1315 """
1316 stack: list[int] = [] # our curly paren stack
1317 i = 0
1318 while i < len(s):
1319 if s[i] == "{":
1320 # if we're in a string part of the f-string, ignore escaped curly braces
1321 if not stack and i + 1 < len(s) and s[i + 1] == "{":
1322 i += 2
1323 continue
1324 stack.append(i)
1325 i += 1
1326 continue
1328 if s[i] == "}":
1329 if not stack:
1330 i += 1
1331 continue
1332 j = stack.pop()
1333 # we've made it back out of the expression! yield the span
1334 if not stack:
1335 yield (j, i + 1)
1336 i += 1
1337 continue
1339 # if we're in an expression part of the f-string, fast-forward through strings
1340 # note that backslashes are not legal in the expression portion of f-strings
1341 if stack:
1342 delim = None
1343 if s[i : i + 3] in ("'''", '"""'):
1344 delim = s[i : i + 3]
1345 elif s[i] in ("'", '"'):
1346 delim = s[i]
1347 if delim:
1348 i += len(delim)
1349 while i < len(s) and s[i : i + len(delim)] != delim:
1350 i += 1
1351 i += len(delim)
1352 continue
1353 i += 1
1356def fstring_contains_expr(s: str) -> bool:
1357 return any(iter_fexpr_spans(s))
1360def _toggle_fexpr_quotes(fstring: str, old_quote: str) -> str:
1361 """
1362 Toggles quotes used in f-string expressions that are `old_quote`.
1364 f-string expressions can't contain backslashes, so we need to toggle the
1365 quotes if the f-string itself will end up using the same quote. We can
1366 simply toggle without escaping because, quotes can't be reused in f-string
1367 expressions. They will fail to parse.
1369 NOTE: If PEP 701 is accepted, above statement will no longer be true.
1370 Though if quotes can be reused, we can simply reuse them without updates or
1371 escaping, once Black figures out how to parse the new grammar.
1372 """
1373 new_quote = "'" if old_quote == '"' else '"'
1374 parts = []
1375 previous_index = 0
1376 for start, end in iter_fexpr_spans(fstring):
1377 parts.append(fstring[previous_index:start])
1378 parts.append(fstring[start:end].replace(old_quote, new_quote))
1379 previous_index = end
1380 parts.append(fstring[previous_index:])
1381 return "".join(parts)
1384class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
1385 """
1386 StringTransformer that splits "atom" strings (i.e. strings which exist on
1387 lines by themselves).
1389 Requirements:
1390 * The line consists ONLY of a single string (possibly prefixed by a
1391 string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE
1392 a trailing comma.
1393 AND
1394 * All of the requirements listed in BaseStringSplitter's docstring.
1396 Transformations:
1397 The string mentioned in the 'Requirements' section is split into as
1398 many substrings as necessary to adhere to the configured line length.
1400 In the final set of substrings, no substring should be smaller than
1401 MIN_SUBSTR_SIZE characters.
1403 The string will ONLY be split on spaces (i.e. each new substring should
1404 start with a space). Note that the string will NOT be split on a space
1405 which is escaped with a backslash.
1407 If the string is an f-string, it will NOT be split in the middle of an
1408 f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x
1409 else bar()} is an f-expression).
1411 If the string that is being split has an associated set of custom split
1412 records and those custom splits will NOT result in any line going over
1413 the configured line length, those custom splits are used. Otherwise the
1414 string is split as late as possible (from left-to-right) while still
1415 adhering to the transformation rules listed above.
1417 Collaborations:
1418 StringSplitter relies on StringMerger to construct the appropriate
1419 CustomSplit objects and add them to the custom split map.
1420 """
1422 MIN_SUBSTR_SIZE: Final = 6
1424 def do_splitter_match(self, line: Line) -> TMatchResult:
1425 LL = line.leaves
1427 if self._prefer_paren_wrap_match(LL) is not None:
1428 return TErr("Line needs to be wrapped in parens first.")
1430 is_valid_index = is_valid_index_factory(LL)
1432 idx = 0
1434 # The first two leaves MAY be the 'not in' keywords...
1435 if (
1436 is_valid_index(idx)
1437 and is_valid_index(idx + 1)
1438 and [LL[idx].type, LL[idx + 1].type] == [token.NAME, token.NAME]
1439 and str(LL[idx]) + str(LL[idx + 1]) == "not in"
1440 ):
1441 idx += 2
1442 # Else the first leaf MAY be a string operator symbol or the 'in' keyword...
1443 elif is_valid_index(idx) and (
1444 LL[idx].type in self.STRING_OPERATORS
1445 or LL[idx].type == token.NAME
1446 and str(LL[idx]) == "in"
1447 ):
1448 idx += 1
1450 # The next/first leaf MAY be an empty LPAR...
1451 if is_valid_index(idx) and is_empty_lpar(LL[idx]):
1452 idx += 1
1454 # The next/first leaf MUST be a string...
1455 if not is_valid_index(idx) or LL[idx].type != token.STRING:
1456 return TErr("Line does not start with a string.")
1458 string_idx = idx
1460 # Skip the string trailer, if one exists.
1461 string_parser = StringParser()
1462 idx = string_parser.parse(LL, string_idx)
1464 # That string MAY be followed by an empty RPAR...
1465 if is_valid_index(idx) and is_empty_rpar(LL[idx]):
1466 idx += 1
1468 # That string / empty RPAR leaf MAY be followed by a comma...
1469 if is_valid_index(idx) and LL[idx].type == token.COMMA:
1470 idx += 1
1472 # But no more leaves are allowed...
1473 if is_valid_index(idx):
1474 return TErr("This line does not end with a string.")
1476 return Ok([string_idx])
1478 def do_transform(
1479 self, line: Line, string_indices: list[int]
1480 ) -> Iterator[TResult[Line]]:
1481 LL = line.leaves
1482 assert len(string_indices) == 1, (
1483 f"{self.__class__.__name__} should only find one match at a time, found"
1484 f" {len(string_indices)}"
1485 )
1486 string_idx = string_indices[0]
1488 QUOTE = LL[string_idx].value[-1]
1490 is_valid_index = is_valid_index_factory(LL)
1491 insert_str_child = insert_str_child_factory(LL[string_idx])
1493 prefix = get_string_prefix(LL[string_idx].value).lower()
1495 # We MAY choose to drop the 'f' prefix from substrings that don't
1496 # contain any f-expressions, but ONLY if the original f-string
1497 # contains at least one f-expression. Otherwise, we will alter the AST
1498 # of the program.
1499 drop_pointless_f_prefix = ("f" in prefix) and fstring_contains_expr(
1500 LL[string_idx].value
1501 )
1503 first_string_line = True
1505 string_op_leaves = self._get_string_operator_leaves(LL)
1506 string_op_leaves_length = (
1507 sum(len(str(prefix_leaf)) for prefix_leaf in string_op_leaves) + 1
1508 if string_op_leaves
1509 else 0
1510 )
1512 def maybe_append_string_operators(new_line: Line) -> None:
1513 """
1514 Side Effects:
1515 If @line starts with a string operator and this is the first
1516 line we are constructing, this function appends the string
1517 operator to @new_line and replaces the old string operator leaf
1518 in the node structure. Otherwise this function does nothing.
1519 """
1520 maybe_prefix_leaves = string_op_leaves if first_string_line else []
1521 for i, prefix_leaf in enumerate(maybe_prefix_leaves):
1522 replace_child(LL[i], prefix_leaf)
1523 new_line.append(prefix_leaf)
1525 ends_with_comma = (
1526 is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
1527 )
1529 def max_last_string_column() -> int:
1530 """
1531 Returns:
1532 The max allowed width of the string value used for the last
1533 line we will construct. Note that this value means the width
1534 rather than the number of characters (e.g., many East Asian
1535 characters expand to two columns).
1536 """
1537 result = self.line_length
1538 result -= line.depth * 4
1539 result -= 1 if ends_with_comma else 0
1540 result -= string_op_leaves_length
1541 return result
1543 # --- Calculate Max Break Width (for string value)
1544 # We start with the line length limit
1545 max_break_width = self.line_length
1546 # The last index of a string of length N is N-1.
1547 max_break_width -= 1
1548 # Leading whitespace is not present in the string value (e.g. Leaf.value).
1549 max_break_width -= line.depth * 4
1550 if max_break_width < 0:
1551 yield TErr(
1552 f"Unable to split {LL[string_idx].value} at such high of a line depth:"
1553 f" {line.depth}"
1554 )
1555 return
1557 # Check if StringMerger registered any custom splits.
1558 custom_splits = self.pop_custom_splits(LL[string_idx].value)
1559 # We use them ONLY if none of them would produce lines that exceed the
1560 # line limit.
1561 use_custom_breakpoints = bool(
1562 custom_splits
1563 and all(csplit.break_idx <= max_break_width for csplit in custom_splits)
1564 )
1566 # Temporary storage for the remaining chunk of the string line that
1567 # can't fit onto the line currently being constructed.
1568 rest_value = LL[string_idx].value
1570 def more_splits_should_be_made() -> bool:
1571 """
1572 Returns:
1573 True iff `rest_value` (the remaining string value from the last
1574 split), should be split again.
1575 """
1576 if use_custom_breakpoints:
1577 return len(custom_splits) > 1
1578 else:
1579 return str_width(rest_value) > max_last_string_column()
1581 string_line_results: list[Ok[Line]] = []
1582 while more_splits_should_be_made():
1583 if use_custom_breakpoints:
1584 # Custom User Split (manual)
1585 csplit = custom_splits.pop(0)
1586 break_idx = csplit.break_idx
1587 else:
1588 # Algorithmic Split (automatic)
1589 max_bidx = (
1590 count_chars_in_width(rest_value, max_break_width)
1591 - string_op_leaves_length
1592 )
1593 maybe_break_idx = self._get_break_idx(rest_value, max_bidx)
1594 if maybe_break_idx is None:
1595 # If we are unable to algorithmically determine a good split
1596 # and this string has custom splits registered to it, we
1597 # fall back to using them--which means we have to start
1598 # over from the beginning.
1599 if custom_splits:
1600 rest_value = LL[string_idx].value
1601 string_line_results = []
1602 first_string_line = True
1603 use_custom_breakpoints = True
1604 continue
1606 # Otherwise, we stop splitting here.
1607 break
1609 break_idx = maybe_break_idx
1611 # --- Construct `next_value`
1612 next_value = rest_value[:break_idx] + QUOTE
1614 # HACK: The following 'if' statement is a hack to fix the custom
1615 # breakpoint index in the case of either: (a) substrings that were
1616 # f-strings but will have the 'f' prefix removed OR (b) substrings
1617 # that were not f-strings but will now become f-strings because of
1618 # redundant use of the 'f' prefix (i.e. none of the substrings
1619 # contain f-expressions but one or more of them had the 'f' prefix
1620 # anyway; in which case, we will prepend 'f' to _all_ substrings).
1621 #
1622 # There is probably a better way to accomplish what is being done
1623 # here...
1624 #
1625 # If this substring is an f-string, we _could_ remove the 'f'
1626 # prefix, and the current custom split did NOT originally use a
1627 # prefix...
1628 if (
1629 use_custom_breakpoints
1630 and not csplit.has_prefix
1631 and (
1632 # `next_value == prefix + QUOTE` happens when the custom
1633 # split is an empty string.
1634 next_value == prefix + QUOTE
1635 or next_value != self._normalize_f_string(next_value, prefix)
1636 )
1637 ):
1638 # Then `csplit.break_idx` will be off by one after removing
1639 # the 'f' prefix.
1640 break_idx += 1
1641 next_value = rest_value[:break_idx] + QUOTE
1643 if drop_pointless_f_prefix:
1644 next_value = self._normalize_f_string(next_value, prefix)
1646 # --- Construct `next_leaf`
1647 next_leaf = Leaf(token.STRING, next_value)
1648 insert_str_child(next_leaf)
1649 self._maybe_normalize_string_quotes(next_leaf)
1651 # --- Construct `next_line`
1652 next_line = line.clone()
1653 maybe_append_string_operators(next_line)
1654 next_line.append(next_leaf)
1655 string_line_results.append(Ok(next_line))
1657 rest_value = prefix + QUOTE + rest_value[break_idx:]
1658 first_string_line = False
1660 yield from string_line_results
1662 if drop_pointless_f_prefix:
1663 rest_value = self._normalize_f_string(rest_value, prefix)
1665 rest_leaf = Leaf(token.STRING, rest_value)
1666 insert_str_child(rest_leaf)
1668 # NOTE: I could not find a test case that verifies that the following
1669 # line is actually necessary, but it seems to be. Otherwise we risk
1670 # not normalizing the last substring, right?
1671 self._maybe_normalize_string_quotes(rest_leaf)
1673 last_line = line.clone()
1674 maybe_append_string_operators(last_line)
1676 # If there are any leaves to the right of the target string...
1677 if is_valid_index(string_idx + 1):
1678 # We use `temp_value` here to determine how long the last line
1679 # would be if we were to append all the leaves to the right of the
1680 # target string to the last string line.
1681 temp_value = rest_value
1682 for leaf in LL[string_idx + 1 :]:
1683 temp_value += str(leaf)
1684 if leaf.type == token.LPAR:
1685 break
1687 # Try to fit them all on the same line with the last substring...
1688 if (
1689 str_width(temp_value) <= max_last_string_column()
1690 or LL[string_idx + 1].type == token.COMMA
1691 ):
1692 last_line.append(rest_leaf)
1693 append_leaves(last_line, line, LL[string_idx + 1 :])
1694 yield Ok(last_line)
1695 # Otherwise, place the last substring on one line and everything
1696 # else on a line below that...
1697 else:
1698 last_line.append(rest_leaf)
1699 yield Ok(last_line)
1701 non_string_line = line.clone()
1702 append_leaves(non_string_line, line, LL[string_idx + 1 :])
1703 yield Ok(non_string_line)
1704 # Else the target string was the last leaf...
1705 else:
1706 last_line.append(rest_leaf)
1707 last_line.comments = line.comments.copy()
1708 yield Ok(last_line)
1710 def _iter_nameescape_slices(self, string: str) -> Iterator[tuple[Index, Index]]:
1711 r"""
1712 Yields:
1713 All ranges of @string which, if @string were to be split there,
1714 would result in the splitting of an \N{...} expression (which is NOT
1715 allowed).
1716 """
1717 # True - the previous backslash was unescaped
1718 # False - the previous backslash was escaped *or* there was no backslash
1719 previous_was_unescaped_backslash = False
1720 it = iter(enumerate(string))
1721 for idx, c in it:
1722 if c == "\\":
1723 previous_was_unescaped_backslash = not previous_was_unescaped_backslash
1724 continue
1725 if not previous_was_unescaped_backslash or c != "N":
1726 previous_was_unescaped_backslash = False
1727 continue
1728 previous_was_unescaped_backslash = False
1730 begin = idx - 1 # the position of backslash before \N{...}
1731 for idx, c in it:
1732 if c == "}":
1733 end = idx
1734 break
1735 else:
1736 # malformed nameescape expression?
1737 # should have been detected by AST parsing earlier...
1738 raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
1739 yield begin, end
1741 def _iter_fexpr_slices(self, string: str) -> Iterator[tuple[Index, Index]]:
1742 """
1743 Yields:
1744 All ranges of @string which, if @string were to be split there,
1745 would result in the splitting of an f-expression (which is NOT
1746 allowed).
1747 """
1748 if "f" not in get_string_prefix(string).lower():
1749 return
1750 yield from iter_fexpr_spans(string)
1752 def _get_illegal_split_indices(self, string: str) -> set[Index]:
1753 illegal_indices: set[Index] = set()
1754 iterators = [
1755 self._iter_fexpr_slices(string),
1756 self._iter_nameescape_slices(string),
1757 ]
1758 for it in iterators:
1759 for begin, end in it:
1760 illegal_indices.update(range(begin, end))
1761 return illegal_indices
1763 def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
1764 """
1765 This method contains the algorithm that StringSplitter uses to
1766 determine which character to split each string at.
1768 Args:
1769 @string: The substring that we are attempting to split.
1770 @max_break_idx: The ideal break index. We will return this value if it
1771 meets all the necessary conditions. In the likely event that it
1772 doesn't we will try to find the closest index BELOW @max_break_idx
1773 that does. If that fails, we will expand our search by also
1774 considering all valid indices ABOVE @max_break_idx.
1776 Pre-Conditions:
1777 * assert_is_leaf_string(@string)
1778 * 0 <= @max_break_idx < len(@string)
1780 Returns:
1781 break_idx, if an index is able to be found that meets all of the
1782 conditions listed in the 'Transformations' section of this classes'
1783 docstring.
1784 OR
1785 None, otherwise.
1786 """
1787 is_valid_index = is_valid_index_factory(string)
1789 assert is_valid_index(max_break_idx)
1790 assert_is_leaf_string(string)
1792 _illegal_split_indices = self._get_illegal_split_indices(string)
1794 def breaks_unsplittable_expression(i: Index) -> bool:
1795 """
1796 Returns:
1797 True iff returning @i would result in the splitting of an
1798 unsplittable expression (which is NOT allowed).
1799 """
1800 return i in _illegal_split_indices
1802 def passes_all_checks(i: Index) -> bool:
1803 """
1804 Returns:
1805 True iff ALL of the conditions listed in the 'Transformations'
1806 section of this classes' docstring would be met by returning @i.
1807 """
1808 is_space = string[i] == " "
1809 is_split_safe = is_valid_index(i - 1) and string[i - 1] in SPLIT_SAFE_CHARS
1811 is_not_escaped = True
1812 j = i - 1
1813 while is_valid_index(j) and string[j] == "\\":
1814 is_not_escaped = not is_not_escaped
1815 j -= 1
1817 is_big_enough = (
1818 len(string[i:]) >= self.MIN_SUBSTR_SIZE
1819 and len(string[:i]) >= self.MIN_SUBSTR_SIZE
1820 )
1821 return (
1822 (is_space or is_split_safe)
1823 and is_not_escaped
1824 and is_big_enough
1825 and not breaks_unsplittable_expression(i)
1826 )
1828 # First, we check all indices BELOW @max_break_idx.
1829 break_idx = max_break_idx
1830 while is_valid_index(break_idx - 1) and not passes_all_checks(break_idx):
1831 break_idx -= 1
1833 if not passes_all_checks(break_idx):
1834 # If that fails, we check all indices ABOVE @max_break_idx.
1835 #
1836 # If we are able to find a valid index here, the next line is going
1837 # to be longer than the specified line length, but it's probably
1838 # better than doing nothing at all.
1839 break_idx = max_break_idx + 1
1840 while is_valid_index(break_idx + 1) and not passes_all_checks(break_idx):
1841 break_idx += 1
1843 if not is_valid_index(break_idx) or not passes_all_checks(break_idx):
1844 return None
1846 return break_idx
1848 def _maybe_normalize_string_quotes(self, leaf: Leaf) -> None:
1849 if self.normalize_strings:
1850 leaf.value = normalize_string_quotes(leaf.value)
1852 def _normalize_f_string(self, string: str, prefix: str) -> str:
1853 """
1854 Pre-Conditions:
1855 * assert_is_leaf_string(@string)
1857 Returns:
1858 * If @string is an f-string that contains no f-expressions, we
1859 return a string identical to @string except that the 'f' prefix
1860 has been stripped and all double braces (i.e. '{{' or '}}') have
1861 been normalized (i.e. turned into '{' or '}').
1862 OR
1863 * Otherwise, we return @string.
1864 """
1865 assert_is_leaf_string(string)
1867 if "f" in prefix and not fstring_contains_expr(string):
1868 new_prefix = prefix.replace("f", "")
1870 temp = string[len(prefix) :]
1871 temp = re.sub(r"\{\{", "{", temp)
1872 temp = re.sub(r"\}\}", "}", temp)
1873 new_string = temp
1875 return f"{new_prefix}{new_string}"
1876 else:
1877 return string
1879 def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> list[Leaf]:
1880 LL = list(leaves)
1882 string_op_leaves = []
1883 i = 0
1884 while LL[i].type in self.STRING_OPERATORS + [token.NAME]:
1885 prefix_leaf = Leaf(LL[i].type, str(LL[i]).strip())
1886 string_op_leaves.append(prefix_leaf)
1887 i += 1
1888 return string_op_leaves
1891class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin):
1892 """
1893 StringTransformer that wraps strings in parens and then splits at the LPAR.
1895 Requirements:
1896 All of the requirements listed in BaseStringSplitter's docstring in
1897 addition to the requirements listed below:
1899 * The line is a return/yield statement, which returns/yields a string.
1900 OR
1901 * The line is part of a ternary expression (e.g. `x = y if cond else
1902 z`) such that the line starts with `else <string>`, where <string> is
1903 some string.
1904 OR
1905 * The line is an assert statement, which ends with a string.
1906 OR
1907 * The line is an assignment statement (e.g. `x = <string>` or `x +=
1908 <string>`) such that the variable is being assigned the value of some
1909 string.
1910 OR
1911 * The line is a dictionary key assignment where some valid key is being
1912 assigned the value of some string.
1913 OR
1914 * The line is an lambda expression and the value is a string.
1915 OR
1916 * The line starts with an "atom" string that prefers to be wrapped in
1917 parens. It's preferred to be wrapped when it's is an immediate child of
1918 a list/set/tuple literal, AND the string is surrounded by commas (or is
1919 the first/last child).
1921 Transformations:
1922 The chosen string is wrapped in parentheses and then split at the LPAR.
1924 We then have one line which ends with an LPAR and another line that
1925 starts with the chosen string. The latter line is then split again at
1926 the RPAR. This results in the RPAR (and possibly a trailing comma)
1927 being placed on its own line.
1929 NOTE: If any leaves exist to the right of the chosen string (except
1930 for a trailing comma, which would be placed after the RPAR), those
1931 leaves are placed inside the parentheses. In effect, the chosen
1932 string is not necessarily being "wrapped" by parentheses. We can,
1933 however, count on the LPAR being placed directly before the chosen
1934 string.
1936 In other words, StringParenWrapper creates "atom" strings. These
1937 can then be split again by StringSplitter, if necessary.
1939 Collaborations:
1940 In the event that a string line split by StringParenWrapper is
1941 changed such that it no longer needs to be given its own line,
1942 StringParenWrapper relies on StringParenStripper to clean up the
1943 parentheses it created.
1945 For "atom" strings that prefers to be wrapped in parens, it requires
1946 StringSplitter to hold the split until the string is wrapped in parens.
1947 """
1949 def do_splitter_match(self, line: Line) -> TMatchResult:
1950 LL = line.leaves
1952 if line.leaves[-1].type in OPENING_BRACKETS:
1953 return TErr(
1954 "Cannot wrap parens around a line that ends in an opening bracket."
1955 )
1957 string_idx = (
1958 self._return_match(LL)
1959 or self._else_match(LL)
1960 or self._assert_match(LL)
1961 or self._assign_match(LL)
1962 or self._dict_or_lambda_match(LL)
1963 or self._prefer_paren_wrap_match(LL)
1964 )
1966 if string_idx is not None:
1967 string_value = line.leaves[string_idx].value
1968 # If the string has neither spaces nor East Asian stops...
1969 if not any(
1970 char == " " or char in SPLIT_SAFE_CHARS for char in string_value
1971 ):
1972 # And will still violate the line length limit when split...
1973 max_string_width = self.line_length - ((line.depth + 1) * 4)
1974 if str_width(string_value) > max_string_width:
1975 # And has no associated custom splits...
1976 if not self.has_custom_splits(string_value):
1977 # Then we should NOT put this string on its own line.
1978 return TErr(
1979 "We do not wrap long strings in parentheses when the"
1980 " resultant line would still be over the specified line"
1981 " length and can't be split further by StringSplitter."
1982 )
1983 return Ok([string_idx])
1985 return TErr("This line does not contain any non-atomic strings.")
1987 @staticmethod
1988 def _return_match(LL: list[Leaf]) -> Optional[int]:
1989 """
1990 Returns:
1991 string_idx such that @LL[string_idx] is equal to our target (i.e.
1992 matched) string, if this line matches the return/yield statement
1993 requirements listed in the 'Requirements' section of this classes'
1994 docstring.
1995 OR
1996 None, otherwise.
1997 """
1998 # If this line is a part of a return/yield statement and the first leaf
1999 # contains either the "return" or "yield" keywords...
2000 if parent_type(LL[0]) in [syms.return_stmt, syms.yield_expr] and LL[
2001 0
2002 ].value in ["return", "yield"]:
2003 is_valid_index = is_valid_index_factory(LL)
2005 idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
2006 # The next visible leaf MUST contain a string...
2007 if is_valid_index(idx) and LL[idx].type == token.STRING:
2008 return idx
2010 return None
2012 @staticmethod
2013 def _else_match(LL: list[Leaf]) -> Optional[int]:
2014 """
2015 Returns:
2016 string_idx such that @LL[string_idx] is equal to our target (i.e.
2017 matched) string, if this line matches the ternary expression
2018 requirements listed in the 'Requirements' section of this classes'
2019 docstring.
2020 OR
2021 None, otherwise.
2022 """
2023 # If this line is a part of a ternary expression and the first leaf
2024 # contains the "else" keyword...
2025 if (
2026 parent_type(LL[0]) == syms.test
2027 and LL[0].type == token.NAME
2028 and LL[0].value == "else"
2029 ):
2030 is_valid_index = is_valid_index_factory(LL)
2032 idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
2033 # The next visible leaf MUST contain a string...
2034 if is_valid_index(idx) and LL[idx].type == token.STRING:
2035 return idx
2037 return None
2039 @staticmethod
2040 def _assert_match(LL: list[Leaf]) -> Optional[int]:
2041 """
2042 Returns:
2043 string_idx such that @LL[string_idx] is equal to our target (i.e.
2044 matched) string, if this line matches the assert statement
2045 requirements listed in the 'Requirements' section of this classes'
2046 docstring.
2047 OR
2048 None, otherwise.
2049 """
2050 # If this line is a part of an assert statement and the first leaf
2051 # contains the "assert" keyword...
2052 if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert":
2053 is_valid_index = is_valid_index_factory(LL)
2055 for i, leaf in enumerate(LL):
2056 # We MUST find a comma...
2057 if leaf.type == token.COMMA:
2058 idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
2060 # That comma MUST be followed by a string...
2061 if is_valid_index(idx) and LL[idx].type == token.STRING:
2062 string_idx = idx
2064 # Skip the string trailer, if one exists.
2065 string_parser = StringParser()
2066 idx = string_parser.parse(LL, string_idx)
2068 # But no more leaves are allowed...
2069 if not is_valid_index(idx):
2070 return string_idx
2072 return None
2074 @staticmethod
2075 def _assign_match(LL: list[Leaf]) -> Optional[int]:
2076 """
2077 Returns:
2078 string_idx such that @LL[string_idx] is equal to our target (i.e.
2079 matched) string, if this line matches the assignment statement
2080 requirements listed in the 'Requirements' section of this classes'
2081 docstring.
2082 OR
2083 None, otherwise.
2084 """
2085 # If this line is a part of an expression statement or is a function
2086 # argument AND the first leaf contains a variable name...
2087 if (
2088 parent_type(LL[0]) in [syms.expr_stmt, syms.argument, syms.power]
2089 and LL[0].type == token.NAME
2090 ):
2091 is_valid_index = is_valid_index_factory(LL)
2093 for i, leaf in enumerate(LL):
2094 # We MUST find either an '=' or '+=' symbol...
2095 if leaf.type in [token.EQUAL, token.PLUSEQUAL]:
2096 idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
2098 # That symbol MUST be followed by a string...
2099 if is_valid_index(idx) and LL[idx].type == token.STRING:
2100 string_idx = idx
2102 # Skip the string trailer, if one exists.
2103 string_parser = StringParser()
2104 idx = string_parser.parse(LL, string_idx)
2106 # The next leaf MAY be a comma iff this line is a part
2107 # of a function argument...
2108 if (
2109 parent_type(LL[0]) == syms.argument
2110 and is_valid_index(idx)
2111 and LL[idx].type == token.COMMA
2112 ):
2113 idx += 1
2115 # But no more leaves are allowed...
2116 if not is_valid_index(idx):
2117 return string_idx
2119 return None
2121 @staticmethod
2122 def _dict_or_lambda_match(LL: list[Leaf]) -> Optional[int]:
2123 """
2124 Returns:
2125 string_idx such that @LL[string_idx] is equal to our target (i.e.
2126 matched) string, if this line matches the dictionary key assignment
2127 statement or lambda expression requirements listed in the
2128 'Requirements' section of this classes' docstring.
2129 OR
2130 None, otherwise.
2131 """
2132 # If this line is a part of a dictionary key assignment or lambda expression...
2133 parent_types = [parent_type(LL[0]), parent_type(LL[0].parent)]
2134 if syms.dictsetmaker in parent_types or syms.lambdef in parent_types:
2135 is_valid_index = is_valid_index_factory(LL)
2137 for i, leaf in enumerate(LL):
2138 # We MUST find a colon, it can either be dict's or lambda's colon...
2139 if leaf.type == token.COLON and i < len(LL) - 1:
2140 idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
2142 # That colon MUST be followed by a string...
2143 if is_valid_index(idx) and LL[idx].type == token.STRING:
2144 string_idx = idx
2146 # Skip the string trailer, if one exists.
2147 string_parser = StringParser()
2148 idx = string_parser.parse(LL, string_idx)
2150 # That string MAY be followed by a comma...
2151 if is_valid_index(idx) and LL[idx].type == token.COMMA:
2152 idx += 1
2154 # But no more leaves are allowed...
2155 if not is_valid_index(idx):
2156 return string_idx
2158 return None
2160 def do_transform(
2161 self, line: Line, string_indices: list[int]
2162 ) -> Iterator[TResult[Line]]:
2163 LL = line.leaves
2164 assert len(string_indices) == 1, (
2165 f"{self.__class__.__name__} should only find one match at a time, found"
2166 f" {len(string_indices)}"
2167 )
2168 string_idx = string_indices[0]
2170 is_valid_index = is_valid_index_factory(LL)
2171 insert_str_child = insert_str_child_factory(LL[string_idx])
2173 comma_idx = -1
2174 ends_with_comma = False
2175 if LL[comma_idx].type == token.COMMA:
2176 ends_with_comma = True
2178 leaves_to_steal_comments_from = [LL[string_idx]]
2179 if ends_with_comma:
2180 leaves_to_steal_comments_from.append(LL[comma_idx])
2182 # --- First Line
2183 first_line = line.clone()
2184 left_leaves = LL[:string_idx]
2186 # We have to remember to account for (possibly invisible) LPAR and RPAR
2187 # leaves that already wrapped the target string. If these leaves do
2188 # exist, we will replace them with our own LPAR and RPAR leaves.
2189 old_parens_exist = False
2190 if left_leaves and left_leaves[-1].type == token.LPAR:
2191 old_parens_exist = True
2192 leaves_to_steal_comments_from.append(left_leaves[-1])
2193 left_leaves.pop()
2195 append_leaves(first_line, line, left_leaves)
2197 lpar_leaf = Leaf(token.LPAR, "(")
2198 if old_parens_exist:
2199 replace_child(LL[string_idx - 1], lpar_leaf)
2200 else:
2201 insert_str_child(lpar_leaf)
2202 first_line.append(lpar_leaf)
2204 # We throw inline comments that were originally to the right of the
2205 # target string to the top line. They will now be shown to the right of
2206 # the LPAR.
2207 for leaf in leaves_to_steal_comments_from:
2208 for comment_leaf in line.comments_after(leaf):
2209 first_line.append(comment_leaf, preformatted=True)
2211 yield Ok(first_line)
2213 # --- Middle (String) Line
2214 # We only need to yield one (possibly too long) string line, since the
2215 # `StringSplitter` will break it down further if necessary.
2216 string_value = LL[string_idx].value
2217 string_line = Line(
2218 mode=line.mode,
2219 depth=line.depth + 1,
2220 inside_brackets=True,
2221 should_split_rhs=line.should_split_rhs,
2222 magic_trailing_comma=line.magic_trailing_comma,
2223 )
2224 string_leaf = Leaf(token.STRING, string_value)
2225 insert_str_child(string_leaf)
2226 string_line.append(string_leaf)
2228 old_rpar_leaf = None
2229 if is_valid_index(string_idx + 1):
2230 right_leaves = LL[string_idx + 1 :]
2231 if ends_with_comma:
2232 right_leaves.pop()
2234 if old_parens_exist:
2235 assert right_leaves and right_leaves[-1].type == token.RPAR, (
2236 "Apparently, old parentheses do NOT exist?!"
2237 f" (left_leaves={left_leaves}, right_leaves={right_leaves})"
2238 )
2239 old_rpar_leaf = right_leaves.pop()
2240 elif right_leaves and right_leaves[-1].type == token.RPAR:
2241 # Special case for lambda expressions as dict's value, e.g.:
2242 # my_dict = {
2243 # "key": lambda x: f"formatted: {x}",
2244 # }
2245 # After wrapping the dict's value with parentheses, the string is
2246 # followed by a RPAR but its opening bracket is lambda's, not
2247 # the string's:
2248 # "key": (lambda x: f"formatted: {x}"),
2249 opening_bracket = right_leaves[-1].opening_bracket
2250 if opening_bracket is not None and opening_bracket in left_leaves:
2251 index = left_leaves.index(opening_bracket)
2252 if (
2253 0 < index < len(left_leaves) - 1
2254 and left_leaves[index - 1].type == token.COLON
2255 and left_leaves[index + 1].value == "lambda"
2256 ):
2257 right_leaves.pop()
2259 append_leaves(string_line, line, right_leaves)
2261 yield Ok(string_line)
2263 # --- Last Line
2264 last_line = line.clone()
2265 last_line.bracket_tracker = first_line.bracket_tracker
2267 new_rpar_leaf = Leaf(token.RPAR, ")")
2268 if old_rpar_leaf is not None:
2269 replace_child(old_rpar_leaf, new_rpar_leaf)
2270 else:
2271 insert_str_child(new_rpar_leaf)
2272 last_line.append(new_rpar_leaf)
2274 # If the target string ended with a comma, we place this comma to the
2275 # right of the RPAR on the last line.
2276 if ends_with_comma:
2277 comma_leaf = Leaf(token.COMMA, ",")
2278 replace_child(LL[comma_idx], comma_leaf)
2279 last_line.append(comma_leaf)
2281 yield Ok(last_line)
2284class StringParser:
2285 """
2286 A state machine that aids in parsing a string's "trailer", which can be
2287 either non-existent, an old-style formatting sequence (e.g. `% varX` or `%
2288 (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX,
2289 varY)`).
2291 NOTE: A new StringParser object MUST be instantiated for each string
2292 trailer we need to parse.
2294 Examples:
2295 We shall assume that `line` equals the `Line` object that corresponds
2296 to the following line of python code:
2297 ```
2298 x = "Some {}.".format("String") + some_other_string
2299 ```
2301 Furthermore, we will assume that `string_idx` is some index such that:
2302 ```
2303 assert line.leaves[string_idx].value == "Some {}."
2304 ```
2306 The following code snippet then holds:
2307 ```
2308 string_parser = StringParser()
2309 idx = string_parser.parse(line.leaves, string_idx)
2310 assert line.leaves[idx].type == token.PLUS
2311 ```
2312 """
2314 DEFAULT_TOKEN: Final = 20210605
2316 # String Parser States
2317 START: Final = 1
2318 DOT: Final = 2
2319 NAME: Final = 3
2320 PERCENT: Final = 4
2321 SINGLE_FMT_ARG: Final = 5
2322 LPAR: Final = 6
2323 RPAR: Final = 7
2324 DONE: Final = 8
2326 # Lookup Table for Next State
2327 _goto: Final[dict[tuple[ParserState, NodeType], ParserState]] = {
2328 # A string trailer may start with '.' OR '%'.
2329 (START, token.DOT): DOT,
2330 (START, token.PERCENT): PERCENT,
2331 (START, DEFAULT_TOKEN): DONE,
2332 # A '.' MUST be followed by an attribute or method name.
2333 (DOT, token.NAME): NAME,
2334 # A method name MUST be followed by an '(', whereas an attribute name
2335 # is the last symbol in the string trailer.
2336 (NAME, token.LPAR): LPAR,
2337 (NAME, DEFAULT_TOKEN): DONE,
2338 # A '%' symbol can be followed by an '(' or a single argument (e.g. a
2339 # string or variable name).
2340 (PERCENT, token.LPAR): LPAR,
2341 (PERCENT, DEFAULT_TOKEN): SINGLE_FMT_ARG,
2342 # If a '%' symbol is followed by a single argument, that argument is
2343 # the last leaf in the string trailer.
2344 (SINGLE_FMT_ARG, DEFAULT_TOKEN): DONE,
2345 # If present, a ')' symbol is the last symbol in a string trailer.
2346 # (NOTE: LPARS and nested RPARS are not included in this lookup table,
2347 # since they are treated as a special case by the parsing logic in this
2348 # classes' implementation.)
2349 (RPAR, DEFAULT_TOKEN): DONE,
2350 }
2352 def __init__(self) -> None:
2353 self._state = self.START
2354 self._unmatched_lpars = 0
2356 def parse(self, leaves: list[Leaf], string_idx: int) -> int:
2357 """
2358 Pre-conditions:
2359 * @leaves[@string_idx].type == token.STRING
2361 Returns:
2362 The index directly after the last leaf which is a part of the string
2363 trailer, if a "trailer" exists.
2364 OR
2365 @string_idx + 1, if no string "trailer" exists.
2366 """
2367 assert leaves[string_idx].type == token.STRING
2369 idx = string_idx + 1
2370 while idx < len(leaves) and self._next_state(leaves[idx]):
2371 idx += 1
2372 return idx
2374 def _next_state(self, leaf: Leaf) -> bool:
2375 """
2376 Pre-conditions:
2377 * On the first call to this function, @leaf MUST be the leaf that
2378 was directly after the string leaf in question (e.g. if our target
2379 string is `line.leaves[i]` then the first call to this method must
2380 be `line.leaves[i + 1]`).
2381 * On the next call to this function, the leaf parameter passed in
2382 MUST be the leaf directly following @leaf.
2384 Returns:
2385 True iff @leaf is a part of the string's trailer.
2386 """
2387 # We ignore empty LPAR or RPAR leaves.
2388 if is_empty_par(leaf):
2389 return True
2391 next_token = leaf.type
2392 if next_token == token.LPAR:
2393 self._unmatched_lpars += 1
2395 current_state = self._state
2397 # The LPAR parser state is a special case. We will return True until we
2398 # find the matching RPAR token.
2399 if current_state == self.LPAR:
2400 if next_token == token.RPAR:
2401 self._unmatched_lpars -= 1
2402 if self._unmatched_lpars == 0:
2403 self._state = self.RPAR
2404 # Otherwise, we use a lookup table to determine the next state.
2405 else:
2406 # If the lookup table matches the current state to the next
2407 # token, we use the lookup table.
2408 if (current_state, next_token) in self._goto:
2409 self._state = self._goto[current_state, next_token]
2410 else:
2411 # Otherwise, we check if a the current state was assigned a
2412 # default.
2413 if (current_state, self.DEFAULT_TOKEN) in self._goto:
2414 self._state = self._goto[current_state, self.DEFAULT_TOKEN]
2415 # If no default has been assigned, then this parser has a logic
2416 # error.
2417 else:
2418 raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
2420 if self._state == self.DONE:
2421 return False
2423 return True
2426def insert_str_child_factory(string_leaf: Leaf) -> Callable[[LN], None]:
2427 """
2428 Factory for a convenience function that is used to orphan @string_leaf
2429 and then insert multiple new leaves into the same part of the node
2430 structure that @string_leaf had originally occupied.
2432 Examples:
2433 Let `string_leaf = Leaf(token.STRING, '"foo"')` and `N =
2434 string_leaf.parent`. Assume the node `N` has the following
2435 original structure:
2437 Node(
2438 expr_stmt, [
2439 Leaf(NAME, 'x'),
2440 Leaf(EQUAL, '='),
2441 Leaf(STRING, '"foo"'),
2442 ]
2443 )
2445 We then run the code snippet shown below.
2446 ```
2447 insert_str_child = insert_str_child_factory(string_leaf)
2449 lpar = Leaf(token.LPAR, '(')
2450 insert_str_child(lpar)
2452 bar = Leaf(token.STRING, '"bar"')
2453 insert_str_child(bar)
2455 rpar = Leaf(token.RPAR, ')')
2456 insert_str_child(rpar)
2457 ```
2459 After which point, it follows that `string_leaf.parent is None` and
2460 the node `N` now has the following structure:
2462 Node(
2463 expr_stmt, [
2464 Leaf(NAME, 'x'),
2465 Leaf(EQUAL, '='),
2466 Leaf(LPAR, '('),
2467 Leaf(STRING, '"bar"'),
2468 Leaf(RPAR, ')'),
2469 ]
2470 )
2471 """
2472 string_parent = string_leaf.parent
2473 string_child_idx = string_leaf.remove()
2475 def insert_str_child(child: LN) -> None:
2476 nonlocal string_child_idx
2478 assert string_parent is not None
2479 assert string_child_idx is not None
2481 string_parent.insert_child(string_child_idx, child)
2482 string_child_idx += 1
2484 return insert_str_child
2487def is_valid_index_factory(seq: Sequence[Any]) -> Callable[[int], bool]:
2488 """
2489 Examples:
2490 ```
2491 my_list = [1, 2, 3]
2493 is_valid_index = is_valid_index_factory(my_list)
2495 assert is_valid_index(0)
2496 assert is_valid_index(2)
2498 assert not is_valid_index(3)
2499 assert not is_valid_index(-1)
2500 ```
2501 """
2503 def is_valid_index(idx: int) -> bool:
2504 """
2505 Returns:
2506 True iff @idx is positive AND seq[@idx] does NOT raise an
2507 IndexError.
2508 """
2509 return 0 <= idx < len(seq)
2511 return is_valid_index