Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/black/trans.py: 13%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

923 statements  

1""" 

2String transformers that can split and merge strings. 

3""" 

4 

5import re 

6from abc import ABC, abstractmethod 

7from collections import defaultdict 

8from collections.abc import Callable, Collection, Iterable, Iterator, Sequence 

9from dataclasses import dataclass 

10from typing import Any, ClassVar, Final, Literal, Optional, TypeVar, Union 

11 

12from mypy_extensions import trait 

13 

14from black.comments import contains_pragma_comment 

15from black.lines import Line, append_leaves 

16from black.mode import Feature, Mode 

17from black.nodes import ( 

18 CLOSING_BRACKETS, 

19 OPENING_BRACKETS, 

20 STANDALONE_COMMENT, 

21 is_empty_lpar, 

22 is_empty_par, 

23 is_empty_rpar, 

24 is_part_of_annotation, 

25 parent_type, 

26 replace_child, 

27 syms, 

28) 

29from black.rusty import Err, Ok, Result 

30from black.strings import ( 

31 assert_is_leaf_string, 

32 count_chars_in_width, 

33 get_string_prefix, 

34 has_triple_quotes, 

35 normalize_string_quotes, 

36 str_width, 

37) 

38from blib2to3.pgen2 import token 

39from blib2to3.pytree import Leaf, Node 

40 

41 

42class CannotTransform(Exception): 

43 """Base class for errors raised by Transformers.""" 

44 

45 

46# types 

47T = TypeVar("T") 

48LN = Union[Leaf, Node] 

49Transformer = Callable[[Line, Collection[Feature], Mode], Iterator[Line]] 

50Index = int 

51NodeType = int 

52ParserState = int 

53StringID = int 

54TResult = Result[T, CannotTransform] # (T)ransform Result 

55TMatchResult = TResult[list[Index]] 

56 

57SPLIT_SAFE_CHARS = frozenset(["\u3001", "\u3002", "\uff0c"]) # East Asian stops 

58 

59 

60def TErr(err_msg: str) -> Err[CannotTransform]: 

61 """(T)ransform Err 

62 

63 Convenience function used when working with the TResult type. 

64 """ 

65 cant_transform = CannotTransform(err_msg) 

66 return Err(cant_transform) 

67 

68 

69def hug_power_op( 

70 line: Line, features: Collection[Feature], mode: Mode 

71) -> Iterator[Line]: 

72 """A transformer which normalizes spacing around power operators.""" 

73 

74 # Performance optimization to avoid unnecessary Leaf clones and other ops. 

75 for leaf in line.leaves: 

76 if leaf.type == token.DOUBLESTAR: 

77 break 

78 else: 

79 raise CannotTransform("No doublestar token was found in the line.") 

80 

81 def is_simple_lookup(index: int, kind: Literal[1, -1]) -> bool: 

82 # Brackets and parentheses indicate calls, subscripts, etc. ... 

83 # basically stuff that doesn't count as "simple". Only a NAME lookup 

84 # or dotted lookup (eg. NAME.NAME) is OK. 

85 if kind == -1: 

86 return handle_is_simple_look_up_prev(line, index, {token.RPAR, token.RSQB}) 

87 else: 

88 return handle_is_simple_lookup_forward( 

89 line, index, {token.LPAR, token.LSQB} 

90 ) 

91 

92 def is_simple_operand(index: int, kind: Literal[1, -1]) -> bool: 

93 # An operand is considered "simple" if's a NAME, a numeric CONSTANT, a simple 

94 # lookup (see above), with or without a preceding unary operator. 

95 start = line.leaves[index] 

96 if start.type in {token.NAME, token.NUMBER}: 

97 return is_simple_lookup(index, kind) 

98 

99 if start.type in {token.PLUS, token.MINUS, token.TILDE}: 

100 if line.leaves[index + 1].type in {token.NAME, token.NUMBER}: 

101 # kind is always one as bases with a preceding unary op will be checked 

102 # for simplicity starting from the next token (so it'll hit the check 

103 # above). 

104 return is_simple_lookup(index + 1, kind=1) 

105 

106 return False 

107 

108 new_line = line.clone() 

109 should_hug = False 

110 for idx, leaf in enumerate(line.leaves): 

111 new_leaf = leaf.clone() 

112 if should_hug: 

113 new_leaf.prefix = "" 

114 should_hug = False 

115 

116 should_hug = ( 

117 (0 < idx < len(line.leaves) - 1) 

118 and leaf.type == token.DOUBLESTAR 

119 and is_simple_operand(idx - 1, kind=-1) 

120 and line.leaves[idx - 1].value != "lambda" 

121 and is_simple_operand(idx + 1, kind=1) 

122 ) 

123 if should_hug: 

124 new_leaf.prefix = "" 

125 

126 # We have to be careful to make a new line properly: 

127 # - bracket related metadata must be maintained (handled by Line.append) 

128 # - comments need to copied over, updating the leaf IDs they're attached to 

129 new_line.append(new_leaf, preformatted=True) 

130 for comment_leaf in line.comments_after(leaf): 

131 new_line.append(comment_leaf, preformatted=True) 

132 

133 yield new_line 

134 

135 

136def handle_is_simple_look_up_prev(line: Line, index: int, disallowed: set[int]) -> bool: 

137 """ 

138 Handling the determination of is_simple_lookup for the lines prior to the doublestar 

139 token. This is required because of the need to isolate the chained expression 

140 to determine the bracket or parenthesis belong to the single expression. 

141 """ 

142 contains_disallowed = False 

143 chain = [] 

144 

145 while 0 <= index < len(line.leaves): 

146 current = line.leaves[index] 

147 chain.append(current) 

148 if not contains_disallowed and current.type in disallowed: 

149 contains_disallowed = True 

150 if not is_expression_chained(chain): 

151 return not contains_disallowed 

152 

153 index -= 1 

154 

155 return True 

156 

157 

158def handle_is_simple_lookup_forward( 

159 line: Line, index: int, disallowed: set[int] 

160) -> bool: 

161 """ 

162 Handling decision is_simple_lookup for the lines behind the doublestar token. 

163 This function is simplified to keep consistent with the prior logic and the forward 

164 case are more straightforward and do not need to care about chained expressions. 

165 """ 

166 while 0 <= index < len(line.leaves): 

167 current = line.leaves[index] 

168 if current.type in disallowed: 

169 return False 

170 if current.type not in {token.NAME, token.DOT} or ( 

171 current.type == token.NAME and current.value == "for" 

172 ): 

173 # If the current token isn't disallowed, we'll assume this is simple as 

174 # only the disallowed tokens are semantically attached to this lookup 

175 # expression we're checking. Also, stop early if we hit the 'for' bit 

176 # of a comprehension. 

177 return True 

178 

179 index += 1 

180 

181 return True 

182 

183 

184def is_expression_chained(chained_leaves: list[Leaf]) -> bool: 

185 """ 

186 Function to determine if the variable is a chained call. 

187 (e.g., foo.lookup, foo().lookup, (foo.lookup())) will be recognized as chained call) 

188 """ 

189 if len(chained_leaves) < 2: 

190 return True 

191 

192 current_leaf = chained_leaves[-1] 

193 past_leaf = chained_leaves[-2] 

194 

195 if past_leaf.type == token.NAME: 

196 return current_leaf.type in {token.DOT} 

197 elif past_leaf.type in {token.RPAR, token.RSQB}: 

198 return current_leaf.type in {token.RSQB, token.RPAR} 

199 elif past_leaf.type in {token.LPAR, token.LSQB}: 

200 return current_leaf.type in {token.NAME, token.LPAR, token.LSQB} 

201 else: 

202 return False 

203 

204 

205class StringTransformer(ABC): 

206 """ 

207 An implementation of the Transformer protocol that relies on its 

208 subclasses overriding the template methods `do_match(...)` and 

209 `do_transform(...)`. 

210 

211 This Transformer works exclusively on strings (for example, by merging 

212 or splitting them). 

213 

214 The following sections can be found among the docstrings of each concrete 

215 StringTransformer subclass. 

216 

217 Requirements: 

218 Which requirements must be met of the given Line for this 

219 StringTransformer to be applied? 

220 

221 Transformations: 

222 If the given Line meets all of the above requirements, which string 

223 transformations can you expect to be applied to it by this 

224 StringTransformer? 

225 

226 Collaborations: 

227 What contractual agreements does this StringTransformer have with other 

228 StringTransfomers? Such collaborations should be eliminated/minimized 

229 as much as possible. 

230 """ 

231 

232 __name__: Final = "StringTransformer" 

233 

234 # Ideally this would be a dataclass, but unfortunately mypyc breaks when used with 

235 # `abc.ABC`. 

236 def __init__(self, line_length: int, normalize_strings: bool) -> None: 

237 self.line_length = line_length 

238 self.normalize_strings = normalize_strings 

239 

240 @abstractmethod 

241 def do_match(self, line: Line) -> TMatchResult: 

242 """ 

243 Returns: 

244 * Ok(string_indices) such that for each index, `line.leaves[index]` 

245 is our target string if a match was able to be made. For 

246 transformers that don't result in more lines (e.g. StringMerger, 

247 StringParenStripper), multiple matches and transforms are done at 

248 once to reduce the complexity. 

249 OR 

250 * Err(CannotTransform), if no match could be made. 

251 """ 

252 

253 @abstractmethod 

254 def do_transform( 

255 self, line: Line, string_indices: list[int] 

256 ) -> Iterator[TResult[Line]]: 

257 """ 

258 Yields: 

259 * Ok(new_line) where new_line is the new transformed line. 

260 OR 

261 * Err(CannotTransform) if the transformation failed for some reason. The 

262 `do_match(...)` template method should usually be used to reject 

263 the form of the given Line, but in some cases it is difficult to 

264 know whether or not a Line meets the StringTransformer's 

265 requirements until the transformation is already midway. 

266 

267 Side Effects: 

268 This method should NOT mutate @line directly, but it MAY mutate the 

269 Line's underlying Node structure. (WARNING: If the underlying Node 

270 structure IS altered, then this method should NOT be allowed to 

271 yield an CannotTransform after that point.) 

272 """ 

273 

274 def __call__( 

275 self, line: Line, _features: Collection[Feature], _mode: Mode 

276 ) -> Iterator[Line]: 

277 """ 

278 StringTransformer instances have a call signature that mirrors that of 

279 the Transformer type. 

280 

281 Raises: 

282 CannotTransform(...) if the concrete StringTransformer class is unable 

283 to transform @line. 

284 """ 

285 # Optimization to avoid calling `self.do_match(...)` when the line does 

286 # not contain any string. 

287 if not any(leaf.type == token.STRING for leaf in line.leaves): 

288 raise CannotTransform("There are no strings in this line.") 

289 

290 match_result = self.do_match(line) 

291 

292 if isinstance(match_result, Err): 

293 cant_transform = match_result.err() 

294 raise CannotTransform( 

295 f"The string transformer {self.__class__.__name__} does not recognize" 

296 " this line as one that it can transform." 

297 ) from cant_transform 

298 

299 string_indices = match_result.ok() 

300 

301 for line_result in self.do_transform(line, string_indices): 

302 if isinstance(line_result, Err): 

303 cant_transform = line_result.err() 

304 raise CannotTransform( 

305 "StringTransformer failed while attempting to transform string." 

306 ) from cant_transform 

307 line = line_result.ok() 

308 yield line 

309 

310 

311@dataclass 

312class CustomSplit: 

313 """A custom (i.e. manual) string split. 

314 

315 A single CustomSplit instance represents a single substring. 

316 

317 Examples: 

318 Consider the following string: 

319 ``` 

320 "Hi there friend." 

321 " This is a custom" 

322 f" string {split}." 

323 ``` 

324 

325 This string will correspond to the following three CustomSplit instances: 

326 ``` 

327 CustomSplit(False, 16) 

328 CustomSplit(False, 17) 

329 CustomSplit(True, 16) 

330 ``` 

331 """ 

332 

333 has_prefix: bool 

334 break_idx: int 

335 

336 

337CustomSplitMapKey = tuple[StringID, str] 

338 

339 

340@trait 

341class CustomSplitMapMixin: 

342 """ 

343 This mixin class is used to map merged strings to a sequence of 

344 CustomSplits, which will then be used to re-split the strings iff none of 

345 the resultant substrings go over the configured max line length. 

346 """ 

347 

348 _CUSTOM_SPLIT_MAP: ClassVar[dict[CustomSplitMapKey, tuple[CustomSplit, ...]]] = ( 

349 defaultdict(tuple) 

350 ) 

351 

352 @staticmethod 

353 def _get_key(string: str) -> CustomSplitMapKey: 

354 """ 

355 Returns: 

356 A unique identifier that is used internally to map @string to a 

357 group of custom splits. 

358 """ 

359 return (id(string), string) 

360 

361 def add_custom_splits( 

362 self, string: str, custom_splits: Iterable[CustomSplit] 

363 ) -> None: 

364 """Custom Split Map Setter Method 

365 

366 Side Effects: 

367 Adds a mapping from @string to the custom splits @custom_splits. 

368 """ 

369 key = self._get_key(string) 

370 self._CUSTOM_SPLIT_MAP[key] = tuple(custom_splits) 

371 

372 def pop_custom_splits(self, string: str) -> list[CustomSplit]: 

373 """Custom Split Map Getter Method 

374 

375 Returns: 

376 * A list of the custom splits that are mapped to @string, if any 

377 exist. 

378 OR 

379 * [], otherwise. 

380 

381 Side Effects: 

382 Deletes the mapping between @string and its associated custom 

383 splits (which are returned to the caller). 

384 """ 

385 key = self._get_key(string) 

386 

387 custom_splits = self._CUSTOM_SPLIT_MAP[key] 

388 del self._CUSTOM_SPLIT_MAP[key] 

389 

390 return list(custom_splits) 

391 

392 def has_custom_splits(self, string: str) -> bool: 

393 """ 

394 Returns: 

395 True iff @string is associated with a set of custom splits. 

396 """ 

397 key = self._get_key(string) 

398 return key in self._CUSTOM_SPLIT_MAP 

399 

400 

401class StringMerger(StringTransformer, CustomSplitMapMixin): 

402 """StringTransformer that merges strings together. 

403 

404 Requirements: 

405 (A) The line contains adjacent strings such that ALL of the validation checks 

406 listed in StringMerger._validate_msg(...)'s docstring pass. 

407 OR 

408 (B) The line contains a string which uses line continuation backslashes. 

409 

410 Transformations: 

411 Depending on which of the two requirements above where met, either: 

412 

413 (A) The string group associated with the target string is merged. 

414 OR 

415 (B) All line-continuation backslashes are removed from the target string. 

416 

417 Collaborations: 

418 StringMerger provides custom split information to StringSplitter. 

419 """ 

420 

421 def do_match(self, line: Line) -> TMatchResult: 

422 LL = line.leaves 

423 

424 is_valid_index = is_valid_index_factory(LL) 

425 

426 string_indices = [] 

427 idx = 0 

428 while is_valid_index(idx): 

429 leaf = LL[idx] 

430 if ( 

431 leaf.type == token.STRING 

432 and is_valid_index(idx + 1) 

433 and LL[idx + 1].type == token.STRING 

434 ): 

435 # Let's check if the string group contains an inline comment 

436 # If we have a comment inline, we don't merge the strings 

437 contains_comment = False 

438 i = idx 

439 while is_valid_index(i): 

440 if LL[i].type != token.STRING: 

441 break 

442 if line.comments_after(LL[i]): 

443 contains_comment = True 

444 break 

445 i += 1 

446 

447 if not contains_comment and not is_part_of_annotation(leaf): 

448 string_indices.append(idx) 

449 

450 # Advance to the next non-STRING leaf. 

451 idx += 2 

452 while is_valid_index(idx) and LL[idx].type == token.STRING: 

453 idx += 1 

454 

455 elif leaf.type == token.STRING and "\\\n" in leaf.value: 

456 string_indices.append(idx) 

457 # Advance to the next non-STRING leaf. 

458 idx += 1 

459 while is_valid_index(idx) and LL[idx].type == token.STRING: 

460 idx += 1 

461 

462 else: 

463 idx += 1 

464 

465 if string_indices: 

466 return Ok(string_indices) 

467 else: 

468 return TErr("This line has no strings that need merging.") 

469 

470 def do_transform( 

471 self, line: Line, string_indices: list[int] 

472 ) -> Iterator[TResult[Line]]: 

473 new_line = line 

474 

475 rblc_result = self._remove_backslash_line_continuation_chars( 

476 new_line, string_indices 

477 ) 

478 if isinstance(rblc_result, Ok): 

479 new_line = rblc_result.ok() 

480 

481 msg_result = self._merge_string_group(new_line, string_indices) 

482 if isinstance(msg_result, Ok): 

483 new_line = msg_result.ok() 

484 

485 if isinstance(rblc_result, Err) and isinstance(msg_result, Err): 

486 msg_cant_transform = msg_result.err() 

487 rblc_cant_transform = rblc_result.err() 

488 cant_transform = CannotTransform( 

489 "StringMerger failed to merge any strings in this line." 

490 ) 

491 

492 # Chain the errors together using `__cause__`. 

493 msg_cant_transform.__cause__ = rblc_cant_transform 

494 cant_transform.__cause__ = msg_cant_transform 

495 

496 yield Err(cant_transform) 

497 else: 

498 yield Ok(new_line) 

499 

500 @staticmethod 

501 def _remove_backslash_line_continuation_chars( 

502 line: Line, string_indices: list[int] 

503 ) -> TResult[Line]: 

504 """ 

505 Merge strings that were split across multiple lines using 

506 line-continuation backslashes. 

507 

508 Returns: 

509 Ok(new_line), if @line contains backslash line-continuation 

510 characters. 

511 OR 

512 Err(CannotTransform), otherwise. 

513 """ 

514 LL = line.leaves 

515 

516 indices_to_transform = [] 

517 for string_idx in string_indices: 

518 string_leaf = LL[string_idx] 

519 if ( 

520 string_leaf.type == token.STRING 

521 and "\\\n" in string_leaf.value 

522 and not has_triple_quotes(string_leaf.value) 

523 ): 

524 indices_to_transform.append(string_idx) 

525 

526 if not indices_to_transform: 

527 return TErr( 

528 "Found no string leaves that contain backslash line continuation" 

529 " characters." 

530 ) 

531 

532 new_line = line.clone() 

533 new_line.comments = line.comments.copy() 

534 append_leaves(new_line, line, LL) 

535 

536 for string_idx in indices_to_transform: 

537 new_string_leaf = new_line.leaves[string_idx] 

538 new_string_leaf.value = new_string_leaf.value.replace("\\\n", "") 

539 

540 return Ok(new_line) 

541 

542 def _merge_string_group( 

543 self, line: Line, string_indices: list[int] 

544 ) -> TResult[Line]: 

545 """ 

546 Merges string groups (i.e. set of adjacent strings). 

547 

548 Each index from `string_indices` designates one string group's first 

549 leaf in `line.leaves`. 

550 

551 Returns: 

552 Ok(new_line), if ALL of the validation checks found in 

553 _validate_msg(...) pass. 

554 OR 

555 Err(CannotTransform), otherwise. 

556 """ 

557 LL = line.leaves 

558 

559 is_valid_index = is_valid_index_factory(LL) 

560 

561 # A dict of {string_idx: tuple[num_of_strings, string_leaf]}. 

562 merged_string_idx_dict: dict[int, tuple[int, Leaf]] = {} 

563 for string_idx in string_indices: 

564 vresult = self._validate_msg(line, string_idx) 

565 if isinstance(vresult, Err): 

566 continue 

567 merged_string_idx_dict[string_idx] = self._merge_one_string_group( 

568 LL, string_idx, is_valid_index 

569 ) 

570 

571 if not merged_string_idx_dict: 

572 return TErr("No string group is merged") 

573 

574 # Build the final line ('new_line') that this method will later return. 

575 new_line = line.clone() 

576 previous_merged_string_idx = -1 

577 previous_merged_num_of_strings = -1 

578 for i, leaf in enumerate(LL): 

579 if i in merged_string_idx_dict: 

580 previous_merged_string_idx = i 

581 previous_merged_num_of_strings, string_leaf = merged_string_idx_dict[i] 

582 new_line.append(string_leaf) 

583 

584 if ( 

585 previous_merged_string_idx 

586 <= i 

587 < previous_merged_string_idx + previous_merged_num_of_strings 

588 ): 

589 for comment_leaf in line.comments_after(leaf): 

590 new_line.append(comment_leaf, preformatted=True) 

591 continue 

592 

593 append_leaves(new_line, line, [leaf]) 

594 

595 return Ok(new_line) 

596 

597 def _merge_one_string_group( 

598 self, LL: list[Leaf], string_idx: int, is_valid_index: Callable[[int], bool] 

599 ) -> tuple[int, Leaf]: 

600 """ 

601 Merges one string group where the first string in the group is 

602 `LL[string_idx]`. 

603 

604 Returns: 

605 A tuple of `(num_of_strings, leaf)` where `num_of_strings` is the 

606 number of strings merged and `leaf` is the newly merged string 

607 to be replaced in the new line. 

608 """ 

609 # If the string group is wrapped inside an Atom node, we must make sure 

610 # to later replace that Atom with our new (merged) string leaf. 

611 atom_node = LL[string_idx].parent 

612 

613 # We will place BREAK_MARK in between every two substrings that we 

614 # merge. We will then later go through our final result and use the 

615 # various instances of BREAK_MARK we find to add the right values to 

616 # the custom split map. 

617 BREAK_MARK = "@@@@@ BLACK BREAKPOINT MARKER @@@@@" 

618 

619 QUOTE = LL[string_idx].value[-1] 

620 

621 def make_naked(string: str, string_prefix: str) -> str: 

622 """Strip @string (i.e. make it a "naked" string) 

623 

624 Pre-conditions: 

625 * assert_is_leaf_string(@string) 

626 

627 Returns: 

628 A string that is identical to @string except that 

629 @string_prefix has been stripped, the surrounding QUOTE 

630 characters have been removed, and any remaining QUOTE 

631 characters have been escaped. 

632 """ 

633 assert_is_leaf_string(string) 

634 if "f" in string_prefix: 

635 f_expressions = [ 

636 string[span[0] + 1 : span[1] - 1] # +-1 to get rid of curly braces 

637 for span in iter_fexpr_spans(string) 

638 ] 

639 debug_expressions_contain_visible_quotes = any( 

640 re.search(r".*[\'\"].*(?<![!:=])={1}(?!=)(?![^\s:])", expression) 

641 for expression in f_expressions 

642 ) 

643 if not debug_expressions_contain_visible_quotes: 

644 # We don't want to toggle visible quotes in debug f-strings, as 

645 # that would modify the AST 

646 string = _toggle_fexpr_quotes(string, QUOTE) 

647 # After quotes toggling, quotes in expressions won't be escaped 

648 # because quotes can't be reused in f-strings. So we can simply 

649 # let the escaping logic below run without knowing f-string 

650 # expressions. 

651 

652 RE_EVEN_BACKSLASHES = r"(?:(?<!\\)(?:\\\\)*)" 

653 naked_string = string[len(string_prefix) + 1 : -1] 

654 naked_string = re.sub( 

655 "(" + RE_EVEN_BACKSLASHES + ")" + QUOTE, r"\1\\" + QUOTE, naked_string 

656 ) 

657 return naked_string 

658 

659 # Holds the CustomSplit objects that will later be added to the custom 

660 # split map. 

661 custom_splits = [] 

662 

663 # Temporary storage for the 'has_prefix' part of the CustomSplit objects. 

664 prefix_tracker = [] 

665 

666 # Sets the 'prefix' variable. This is the prefix that the final merged 

667 # string will have. 

668 next_str_idx = string_idx 

669 prefix = "" 

670 while ( 

671 not prefix 

672 and is_valid_index(next_str_idx) 

673 and LL[next_str_idx].type == token.STRING 

674 ): 

675 prefix = get_string_prefix(LL[next_str_idx].value).lower() 

676 next_str_idx += 1 

677 

678 # The next loop merges the string group. The final string will be 

679 # contained in 'S'. 

680 # 

681 # The following convenience variables are used: 

682 # 

683 # S: string 

684 # NS: naked string 

685 # SS: next string 

686 # NSS: naked next string 

687 S = "" 

688 NS = "" 

689 num_of_strings = 0 

690 next_str_idx = string_idx 

691 while is_valid_index(next_str_idx) and LL[next_str_idx].type == token.STRING: 

692 num_of_strings += 1 

693 

694 SS = LL[next_str_idx].value 

695 next_prefix = get_string_prefix(SS).lower() 

696 

697 # If this is an f-string group but this substring is not prefixed 

698 # with 'f'... 

699 if "f" in prefix and "f" not in next_prefix: 

700 # Then we must escape any braces contained in this substring. 

701 SS = re.sub(r"(\{|\})", r"\1\1", SS) 

702 

703 NSS = make_naked(SS, next_prefix) 

704 

705 has_prefix = bool(next_prefix) 

706 prefix_tracker.append(has_prefix) 

707 

708 S = prefix + QUOTE + NS + NSS + BREAK_MARK + QUOTE 

709 NS = make_naked(S, prefix) 

710 

711 next_str_idx += 1 

712 

713 # Take a note on the index of the non-STRING leaf. 

714 non_string_idx = next_str_idx 

715 

716 S_leaf = Leaf(token.STRING, S) 

717 if self.normalize_strings: 

718 S_leaf.value = normalize_string_quotes(S_leaf.value) 

719 

720 # Fill the 'custom_splits' list with the appropriate CustomSplit objects. 

721 temp_string = S_leaf.value[len(prefix) + 1 : -1] 

722 for has_prefix in prefix_tracker: 

723 mark_idx = temp_string.find(BREAK_MARK) 

724 assert ( 

725 mark_idx >= 0 

726 ), "Logic error while filling the custom string breakpoint cache." 

727 

728 temp_string = temp_string[mark_idx + len(BREAK_MARK) :] 

729 breakpoint_idx = mark_idx + (len(prefix) if has_prefix else 0) + 1 

730 custom_splits.append(CustomSplit(has_prefix, breakpoint_idx)) 

731 

732 string_leaf = Leaf(token.STRING, S_leaf.value.replace(BREAK_MARK, "")) 

733 

734 if atom_node is not None: 

735 # If not all children of the atom node are merged (this can happen 

736 # when there is a standalone comment in the middle) ... 

737 if non_string_idx - string_idx < len(atom_node.children): 

738 # We need to replace the old STRING leaves with the new string leaf. 

739 first_child_idx = LL[string_idx].remove() 

740 for idx in range(string_idx + 1, non_string_idx): 

741 LL[idx].remove() 

742 if first_child_idx is not None: 

743 atom_node.insert_child(first_child_idx, string_leaf) 

744 else: 

745 # Else replace the atom node with the new string leaf. 

746 replace_child(atom_node, string_leaf) 

747 

748 self.add_custom_splits(string_leaf.value, custom_splits) 

749 return num_of_strings, string_leaf 

750 

751 @staticmethod 

752 def _validate_msg(line: Line, string_idx: int) -> TResult[None]: 

753 """Validate (M)erge (S)tring (G)roup 

754 

755 Transform-time string validation logic for _merge_string_group(...). 

756 

757 Returns: 

758 * Ok(None), if ALL validation checks (listed below) pass. 

759 OR 

760 * Err(CannotTransform), if any of the following are true: 

761 - The target string group does not contain ANY stand-alone comments. 

762 - The target string is not in a string group (i.e. it has no 

763 adjacent strings). 

764 - The string group has more than one inline comment. 

765 - The string group has an inline comment that appears to be a pragma. 

766 - The set of all string prefixes in the string group is of 

767 length greater than one and is not equal to {"", "f"}. 

768 - The string group consists of raw strings. 

769 - The string group would merge f-strings with different quote types 

770 and internal quotes. 

771 - The string group is stringified type annotations. We don't want to 

772 process stringified type annotations since pyright doesn't support 

773 them spanning multiple string values. (NOTE: mypy, pytype, pyre do 

774 support them, so we can change if pyright also gains support in the 

775 future. See https://github.com/microsoft/pyright/issues/4359.) 

776 """ 

777 # We first check for "inner" stand-alone comments (i.e. stand-alone 

778 # comments that have a string leaf before them AND after them). 

779 for inc in [1, -1]: 

780 i = string_idx 

781 found_sa_comment = False 

782 is_valid_index = is_valid_index_factory(line.leaves) 

783 while is_valid_index(i) and line.leaves[i].type in [ 

784 token.STRING, 

785 STANDALONE_COMMENT, 

786 ]: 

787 if line.leaves[i].type == STANDALONE_COMMENT: 

788 found_sa_comment = True 

789 elif found_sa_comment: 

790 return TErr( 

791 "StringMerger does NOT merge string groups which contain " 

792 "stand-alone comments." 

793 ) 

794 

795 i += inc 

796 

797 QUOTE = line.leaves[string_idx].value[-1] 

798 

799 num_of_inline_string_comments = 0 

800 set_of_prefixes = set() 

801 num_of_strings = 0 

802 for leaf in line.leaves[string_idx:]: 

803 if leaf.type != token.STRING: 

804 # If the string group is trailed by a comma, we count the 

805 # comments trailing the comma to be one of the string group's 

806 # comments. 

807 if leaf.type == token.COMMA and id(leaf) in line.comments: 

808 num_of_inline_string_comments += 1 

809 break 

810 

811 if has_triple_quotes(leaf.value): 

812 return TErr("StringMerger does NOT merge multiline strings.") 

813 

814 num_of_strings += 1 

815 prefix = get_string_prefix(leaf.value).lower() 

816 if "r" in prefix: 

817 return TErr("StringMerger does NOT merge raw strings.") 

818 

819 set_of_prefixes.add(prefix) 

820 

821 if ( 

822 "f" in prefix 

823 and leaf.value[-1] != QUOTE 

824 and ( 

825 "'" in leaf.value[len(prefix) + 1 : -1] 

826 or '"' in leaf.value[len(prefix) + 1 : -1] 

827 ) 

828 ): 

829 return TErr( 

830 "StringMerger does NOT merge f-strings with different quote types" 

831 " and internal quotes." 

832 ) 

833 

834 if id(leaf) in line.comments: 

835 num_of_inline_string_comments += 1 

836 if contains_pragma_comment(line.comments[id(leaf)]): 

837 return TErr("Cannot merge strings which have pragma comments.") 

838 

839 if num_of_strings < 2: 

840 return TErr( 

841 f"Not enough strings to merge (num_of_strings={num_of_strings})." 

842 ) 

843 

844 if num_of_inline_string_comments > 1: 

845 return TErr( 

846 f"Too many inline string comments ({num_of_inline_string_comments})." 

847 ) 

848 

849 if len(set_of_prefixes) > 1 and set_of_prefixes != {"", "f"}: 

850 return TErr(f"Too many different prefixes ({set_of_prefixes}).") 

851 

852 return Ok(None) 

853 

854 

855class StringParenStripper(StringTransformer): 

856 """StringTransformer that strips surrounding parentheses from strings. 

857 

858 Requirements: 

859 The line contains a string which is surrounded by parentheses and: 

860 - The target string is NOT the only argument to a function call. 

861 - The target string is NOT a "pointless" string. 

862 - The target string is NOT a dictionary value. 

863 - If the target string contains a PERCENT, the brackets are not 

864 preceded or followed by an operator with higher precedence than 

865 PERCENT. 

866 

867 Transformations: 

868 The parentheses mentioned in the 'Requirements' section are stripped. 

869 

870 Collaborations: 

871 StringParenStripper has its own inherent usefulness, but it is also 

872 relied on to clean up the parentheses created by StringParenWrapper (in 

873 the event that they are no longer needed). 

874 """ 

875 

876 def do_match(self, line: Line) -> TMatchResult: 

877 LL = line.leaves 

878 

879 is_valid_index = is_valid_index_factory(LL) 

880 

881 string_indices = [] 

882 

883 idx = -1 

884 while True: 

885 idx += 1 

886 if idx >= len(LL): 

887 break 

888 leaf = LL[idx] 

889 

890 # Should be a string... 

891 if leaf.type != token.STRING: 

892 continue 

893 

894 # If this is a "pointless" string... 

895 if ( 

896 leaf.parent 

897 and leaf.parent.parent 

898 and leaf.parent.parent.type == syms.simple_stmt 

899 ): 

900 continue 

901 

902 # Should be preceded by a non-empty LPAR... 

903 if ( 

904 not is_valid_index(idx - 1) 

905 or LL[idx - 1].type != token.LPAR 

906 or is_empty_lpar(LL[idx - 1]) 

907 ): 

908 continue 

909 

910 # That LPAR should NOT be preceded by a colon (which could be a 

911 # dictionary value), function name, or a closing bracket (which 

912 # could be a function returning a function or a list/dictionary 

913 # containing a function)... 

914 if is_valid_index(idx - 2) and ( 

915 LL[idx - 2].type == token.COLON 

916 or LL[idx - 2].type == token.NAME 

917 or LL[idx - 2].type in CLOSING_BRACKETS 

918 ): 

919 continue 

920 

921 string_idx = idx 

922 

923 # Skip the string trailer, if one exists. 

924 string_parser = StringParser() 

925 next_idx = string_parser.parse(LL, string_idx) 

926 

927 # if the leaves in the parsed string include a PERCENT, we need to 

928 # make sure the initial LPAR is NOT preceded by an operator with 

929 # higher or equal precedence to PERCENT 

930 if is_valid_index(idx - 2): 

931 # mypy can't quite follow unless we name this 

932 before_lpar = LL[idx - 2] 

933 if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and ( 

934 ( 

935 before_lpar.type 

936 in { 

937 token.STAR, 

938 token.AT, 

939 token.SLASH, 

940 token.DOUBLESLASH, 

941 token.PERCENT, 

942 token.TILDE, 

943 token.DOUBLESTAR, 

944 token.AWAIT, 

945 token.LSQB, 

946 token.LPAR, 

947 } 

948 ) 

949 or ( 

950 # only unary PLUS/MINUS 

951 before_lpar.parent 

952 and before_lpar.parent.type == syms.factor 

953 and (before_lpar.type in {token.PLUS, token.MINUS}) 

954 ) 

955 ): 

956 continue 

957 

958 # Should be followed by a non-empty RPAR... 

959 if ( 

960 is_valid_index(next_idx) 

961 and LL[next_idx].type == token.RPAR 

962 and not is_empty_rpar(LL[next_idx]) 

963 ): 

964 # That RPAR should NOT be followed by anything with higher 

965 # precedence than PERCENT 

966 if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in { 

967 token.DOUBLESTAR, 

968 token.LSQB, 

969 token.LPAR, 

970 token.DOT, 

971 }: 

972 continue 

973 

974 string_indices.append(string_idx) 

975 idx = string_idx 

976 while idx < len(LL) - 1 and LL[idx + 1].type == token.STRING: 

977 idx += 1 

978 

979 if string_indices: 

980 return Ok(string_indices) 

981 return TErr("This line has no strings wrapped in parens.") 

982 

983 def do_transform( 

984 self, line: Line, string_indices: list[int] 

985 ) -> Iterator[TResult[Line]]: 

986 LL = line.leaves 

987 

988 string_and_rpar_indices: list[int] = [] 

989 for string_idx in string_indices: 

990 string_parser = StringParser() 

991 rpar_idx = string_parser.parse(LL, string_idx) 

992 

993 should_transform = True 

994 for leaf in (LL[string_idx - 1], LL[rpar_idx]): 

995 if line.comments_after(leaf): 

996 # Should not strip parentheses which have comments attached 

997 # to them. 

998 should_transform = False 

999 break 

1000 if should_transform: 

1001 string_and_rpar_indices.extend((string_idx, rpar_idx)) 

1002 

1003 if string_and_rpar_indices: 

1004 yield Ok(self._transform_to_new_line(line, string_and_rpar_indices)) 

1005 else: 

1006 yield Err( 

1007 CannotTransform("All string groups have comments attached to them.") 

1008 ) 

1009 

1010 def _transform_to_new_line( 

1011 self, line: Line, string_and_rpar_indices: list[int] 

1012 ) -> Line: 

1013 LL = line.leaves 

1014 

1015 new_line = line.clone() 

1016 new_line.comments = line.comments.copy() 

1017 

1018 previous_idx = -1 

1019 # We need to sort the indices, since string_idx and its matching 

1020 # rpar_idx may not come in order, e.g. in 

1021 # `("outer" % ("inner".join(items)))`, the "inner" string's 

1022 # string_idx is smaller than "outer" string's rpar_idx. 

1023 for idx in sorted(string_and_rpar_indices): 

1024 leaf = LL[idx] 

1025 lpar_or_rpar_idx = idx - 1 if leaf.type == token.STRING else idx 

1026 append_leaves(new_line, line, LL[previous_idx + 1 : lpar_or_rpar_idx]) 

1027 if leaf.type == token.STRING: 

1028 string_leaf = Leaf(token.STRING, LL[idx].value) 

1029 LL[lpar_or_rpar_idx].remove() # Remove lpar. 

1030 replace_child(LL[idx], string_leaf) 

1031 new_line.append(string_leaf) 

1032 # replace comments 

1033 old_comments = new_line.comments.pop(id(LL[idx]), []) 

1034 new_line.comments.setdefault(id(string_leaf), []).extend(old_comments) 

1035 else: 

1036 LL[lpar_or_rpar_idx].remove() # This is a rpar. 

1037 

1038 previous_idx = idx 

1039 

1040 # Append the leaves after the last idx: 

1041 append_leaves(new_line, line, LL[idx + 1 :]) 

1042 

1043 return new_line 

1044 

1045 

1046class BaseStringSplitter(StringTransformer): 

1047 """ 

1048 Abstract class for StringTransformers which transform a Line's strings by splitting 

1049 them or placing them on their own lines where necessary to avoid going over 

1050 the configured line length. 

1051 

1052 Requirements: 

1053 * The target string value is responsible for the line going over the 

1054 line length limit. It follows that after all of black's other line 

1055 split methods have been exhausted, this line (or one of the resulting 

1056 lines after all line splits are performed) would still be over the 

1057 line_length limit unless we split this string. 

1058 AND 

1059 

1060 * The target string is NOT a "pointless" string (i.e. a string that has 

1061 no parent or siblings). 

1062 AND 

1063 

1064 * The target string is not followed by an inline comment that appears 

1065 to be a pragma. 

1066 AND 

1067 

1068 * The target string is not a multiline (i.e. triple-quote) string. 

1069 """ 

1070 

1071 STRING_OPERATORS: Final = [ 

1072 token.EQEQUAL, 

1073 token.GREATER, 

1074 token.GREATEREQUAL, 

1075 token.LESS, 

1076 token.LESSEQUAL, 

1077 token.NOTEQUAL, 

1078 token.PERCENT, 

1079 token.PLUS, 

1080 token.STAR, 

1081 ] 

1082 

1083 @abstractmethod 

1084 def do_splitter_match(self, line: Line) -> TMatchResult: 

1085 """ 

1086 BaseStringSplitter asks its clients to override this method instead of 

1087 `StringTransformer.do_match(...)`. 

1088 

1089 Follows the same protocol as `StringTransformer.do_match(...)`. 

1090 

1091 Refer to `help(StringTransformer.do_match)` for more information. 

1092 """ 

1093 

1094 def do_match(self, line: Line) -> TMatchResult: 

1095 match_result = self.do_splitter_match(line) 

1096 if isinstance(match_result, Err): 

1097 return match_result 

1098 

1099 string_indices = match_result.ok() 

1100 assert len(string_indices) == 1, ( 

1101 f"{self.__class__.__name__} should only find one match at a time, found" 

1102 f" {len(string_indices)}" 

1103 ) 

1104 string_idx = string_indices[0] 

1105 vresult = self._validate(line, string_idx) 

1106 if isinstance(vresult, Err): 

1107 return vresult 

1108 

1109 return match_result 

1110 

1111 def _validate(self, line: Line, string_idx: int) -> TResult[None]: 

1112 """ 

1113 Checks that @line meets all of the requirements listed in this classes' 

1114 docstring. Refer to `help(BaseStringSplitter)` for a detailed 

1115 description of those requirements. 

1116 

1117 Returns: 

1118 * Ok(None), if ALL of the requirements are met. 

1119 OR 

1120 * Err(CannotTransform), if ANY of the requirements are NOT met. 

1121 """ 

1122 LL = line.leaves 

1123 

1124 string_leaf = LL[string_idx] 

1125 

1126 max_string_length = self._get_max_string_length(line, string_idx) 

1127 if len(string_leaf.value) <= max_string_length: 

1128 return TErr( 

1129 "The string itself is not what is causing this line to be too long." 

1130 ) 

1131 

1132 if not string_leaf.parent or [L.type for L in string_leaf.parent.children] == [ 

1133 token.STRING, 

1134 token.NEWLINE, 

1135 ]: 

1136 return TErr( 

1137 f"This string ({string_leaf.value}) appears to be pointless (i.e. has" 

1138 " no parent)." 

1139 ) 

1140 

1141 if id(line.leaves[string_idx]) in line.comments and contains_pragma_comment( 

1142 line.comments[id(line.leaves[string_idx])] 

1143 ): 

1144 return TErr( 

1145 "Line appears to end with an inline pragma comment. Splitting the line" 

1146 " could modify the pragma's behavior." 

1147 ) 

1148 

1149 if has_triple_quotes(string_leaf.value): 

1150 return TErr("We cannot split multiline strings.") 

1151 

1152 return Ok(None) 

1153 

1154 def _get_max_string_length(self, line: Line, string_idx: int) -> int: 

1155 """ 

1156 Calculates the max string length used when attempting to determine 

1157 whether or not the target string is responsible for causing the line to 

1158 go over the line length limit. 

1159 

1160 WARNING: This method is tightly coupled to both StringSplitter and 

1161 (especially) StringParenWrapper. There is probably a better way to 

1162 accomplish what is being done here. 

1163 

1164 Returns: 

1165 max_string_length: such that `line.leaves[string_idx].value > 

1166 max_string_length` implies that the target string IS responsible 

1167 for causing this line to exceed the line length limit. 

1168 """ 

1169 LL = line.leaves 

1170 

1171 is_valid_index = is_valid_index_factory(LL) 

1172 

1173 # We use the shorthand "WMA4" in comments to abbreviate "We must 

1174 # account for". When giving examples, we use STRING to mean some/any 

1175 # valid string. 

1176 # 

1177 # Finally, we use the following convenience variables: 

1178 # 

1179 # P: The leaf that is before the target string leaf. 

1180 # N: The leaf that is after the target string leaf. 

1181 # NN: The leaf that is after N. 

1182 

1183 # WMA4 the whitespace at the beginning of the line. 

1184 offset = line.depth * 4 

1185 

1186 if is_valid_index(string_idx - 1): 

1187 p_idx = string_idx - 1 

1188 if ( 

1189 LL[string_idx - 1].type == token.LPAR 

1190 and LL[string_idx - 1].value == "" 

1191 and string_idx >= 2 

1192 ): 

1193 # If the previous leaf is an empty LPAR placeholder, we should skip it. 

1194 p_idx -= 1 

1195 

1196 P = LL[p_idx] 

1197 if P.type in self.STRING_OPERATORS: 

1198 # WMA4 a space and a string operator (e.g. `+ STRING` or `== STRING`). 

1199 offset += len(str(P)) + 1 

1200 

1201 if P.type == token.COMMA: 

1202 # WMA4 a space, a comma, and a closing bracket [e.g. `), STRING`]. 

1203 offset += 3 

1204 

1205 if P.type in [token.COLON, token.EQUAL, token.PLUSEQUAL, token.NAME]: 

1206 # This conditional branch is meant to handle dictionary keys, 

1207 # variable assignments, 'return STRING' statement lines, and 

1208 # 'else STRING' ternary expression lines. 

1209 

1210 # WMA4 a single space. 

1211 offset += 1 

1212 

1213 # WMA4 the lengths of any leaves that came before that space, 

1214 # but after any closing bracket before that space. 

1215 for leaf in reversed(LL[: p_idx + 1]): 

1216 offset += len(str(leaf)) 

1217 if leaf.type in CLOSING_BRACKETS: 

1218 break 

1219 

1220 if is_valid_index(string_idx + 1): 

1221 N = LL[string_idx + 1] 

1222 if N.type == token.RPAR and N.value == "" and len(LL) > string_idx + 2: 

1223 # If the next leaf is an empty RPAR placeholder, we should skip it. 

1224 N = LL[string_idx + 2] 

1225 

1226 if N.type == token.COMMA: 

1227 # WMA4 a single comma at the end of the string (e.g `STRING,`). 

1228 offset += 1 

1229 

1230 if is_valid_index(string_idx + 2): 

1231 NN = LL[string_idx + 2] 

1232 

1233 if N.type == token.DOT and NN.type == token.NAME: 

1234 # This conditional branch is meant to handle method calls invoked 

1235 # off of a string literal up to and including the LPAR character. 

1236 

1237 # WMA4 the '.' character. 

1238 offset += 1 

1239 

1240 if ( 

1241 is_valid_index(string_idx + 3) 

1242 and LL[string_idx + 3].type == token.LPAR 

1243 ): 

1244 # WMA4 the left parenthesis character. 

1245 offset += 1 

1246 

1247 # WMA4 the length of the method's name. 

1248 offset += len(NN.value) 

1249 

1250 has_comments = False 

1251 for comment_leaf in line.comments_after(LL[string_idx]): 

1252 if not has_comments: 

1253 has_comments = True 

1254 # WMA4 two spaces before the '#' character. 

1255 offset += 2 

1256 

1257 # WMA4 the length of the inline comment. 

1258 offset += len(comment_leaf.value) 

1259 

1260 max_string_length = count_chars_in_width(str(line), self.line_length - offset) 

1261 return max_string_length 

1262 

1263 @staticmethod 

1264 def _prefer_paren_wrap_match(LL: list[Leaf]) -> Optional[int]: 

1265 """ 

1266 Returns: 

1267 string_idx such that @LL[string_idx] is equal to our target (i.e. 

1268 matched) string, if this line matches the "prefer paren wrap" statement 

1269 requirements listed in the 'Requirements' section of the StringParenWrapper 

1270 class's docstring. 

1271 OR 

1272 None, otherwise. 

1273 """ 

1274 # The line must start with a string. 

1275 if LL[0].type != token.STRING: 

1276 return None 

1277 

1278 matching_nodes = [ 

1279 syms.listmaker, 

1280 syms.dictsetmaker, 

1281 syms.testlist_gexp, 

1282 ] 

1283 # If the string is an immediate child of a list/set/tuple literal... 

1284 if ( 

1285 parent_type(LL[0]) in matching_nodes 

1286 or parent_type(LL[0].parent) in matching_nodes 

1287 ): 

1288 # And the string is surrounded by commas (or is the first/last child)... 

1289 prev_sibling = LL[0].prev_sibling 

1290 next_sibling = LL[0].next_sibling 

1291 if ( 

1292 not prev_sibling 

1293 and not next_sibling 

1294 and parent_type(LL[0]) == syms.atom 

1295 ): 

1296 # If it's an atom string, we need to check the parent atom's siblings. 

1297 parent = LL[0].parent 

1298 assert parent is not None # For type checkers. 

1299 prev_sibling = parent.prev_sibling 

1300 next_sibling = parent.next_sibling 

1301 if (not prev_sibling or prev_sibling.type == token.COMMA) and ( 

1302 not next_sibling or next_sibling.type == token.COMMA 

1303 ): 

1304 return 0 

1305 

1306 return None 

1307 

1308 

1309def iter_fexpr_spans(s: str) -> Iterator[tuple[int, int]]: 

1310 """ 

1311 Yields spans corresponding to expressions in a given f-string. 

1312 Spans are half-open ranges (left inclusive, right exclusive). 

1313 Assumes the input string is a valid f-string, but will not crash if the input 

1314 string is invalid. 

1315 """ 

1316 stack: list[int] = [] # our curly paren stack 

1317 i = 0 

1318 while i < len(s): 

1319 if s[i] == "{": 

1320 # if we're in a string part of the f-string, ignore escaped curly braces 

1321 if not stack and i + 1 < len(s) and s[i + 1] == "{": 

1322 i += 2 

1323 continue 

1324 stack.append(i) 

1325 i += 1 

1326 continue 

1327 

1328 if s[i] == "}": 

1329 if not stack: 

1330 i += 1 

1331 continue 

1332 j = stack.pop() 

1333 # we've made it back out of the expression! yield the span 

1334 if not stack: 

1335 yield (j, i + 1) 

1336 i += 1 

1337 continue 

1338 

1339 # if we're in an expression part of the f-string, fast-forward through strings 

1340 # note that backslashes are not legal in the expression portion of f-strings 

1341 if stack: 

1342 delim = None 

1343 if s[i : i + 3] in ("'''", '"""'): 

1344 delim = s[i : i + 3] 

1345 elif s[i] in ("'", '"'): 

1346 delim = s[i] 

1347 if delim: 

1348 i += len(delim) 

1349 while i < len(s) and s[i : i + len(delim)] != delim: 

1350 i += 1 

1351 i += len(delim) 

1352 continue 

1353 i += 1 

1354 

1355 

1356def fstring_contains_expr(s: str) -> bool: 

1357 return any(iter_fexpr_spans(s)) 

1358 

1359 

1360def _toggle_fexpr_quotes(fstring: str, old_quote: str) -> str: 

1361 """ 

1362 Toggles quotes used in f-string expressions that are `old_quote`. 

1363 

1364 f-string expressions can't contain backslashes, so we need to toggle the 

1365 quotes if the f-string itself will end up using the same quote. We can 

1366 simply toggle without escaping because, quotes can't be reused in f-string 

1367 expressions. They will fail to parse. 

1368 

1369 NOTE: If PEP 701 is accepted, above statement will no longer be true. 

1370 Though if quotes can be reused, we can simply reuse them without updates or 

1371 escaping, once Black figures out how to parse the new grammar. 

1372 """ 

1373 new_quote = "'" if old_quote == '"' else '"' 

1374 parts = [] 

1375 previous_index = 0 

1376 for start, end in iter_fexpr_spans(fstring): 

1377 parts.append(fstring[previous_index:start]) 

1378 parts.append(fstring[start:end].replace(old_quote, new_quote)) 

1379 previous_index = end 

1380 parts.append(fstring[previous_index:]) 

1381 return "".join(parts) 

1382 

1383 

1384class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): 

1385 """ 

1386 StringTransformer that splits "atom" strings (i.e. strings which exist on 

1387 lines by themselves). 

1388 

1389 Requirements: 

1390 * The line consists ONLY of a single string (possibly prefixed by a 

1391 string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE 

1392 a trailing comma. 

1393 AND 

1394 * All of the requirements listed in BaseStringSplitter's docstring. 

1395 

1396 Transformations: 

1397 The string mentioned in the 'Requirements' section is split into as 

1398 many substrings as necessary to adhere to the configured line length. 

1399 

1400 In the final set of substrings, no substring should be smaller than 

1401 MIN_SUBSTR_SIZE characters. 

1402 

1403 The string will ONLY be split on spaces (i.e. each new substring should 

1404 start with a space). Note that the string will NOT be split on a space 

1405 which is escaped with a backslash. 

1406 

1407 If the string is an f-string, it will NOT be split in the middle of an 

1408 f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x 

1409 else bar()} is an f-expression). 

1410 

1411 If the string that is being split has an associated set of custom split 

1412 records and those custom splits will NOT result in any line going over 

1413 the configured line length, those custom splits are used. Otherwise the 

1414 string is split as late as possible (from left-to-right) while still 

1415 adhering to the transformation rules listed above. 

1416 

1417 Collaborations: 

1418 StringSplitter relies on StringMerger to construct the appropriate 

1419 CustomSplit objects and add them to the custom split map. 

1420 """ 

1421 

1422 MIN_SUBSTR_SIZE: Final = 6 

1423 

1424 def do_splitter_match(self, line: Line) -> TMatchResult: 

1425 LL = line.leaves 

1426 

1427 if self._prefer_paren_wrap_match(LL) is not None: 

1428 return TErr("Line needs to be wrapped in parens first.") 

1429 

1430 is_valid_index = is_valid_index_factory(LL) 

1431 

1432 idx = 0 

1433 

1434 # The first two leaves MAY be the 'not in' keywords... 

1435 if ( 

1436 is_valid_index(idx) 

1437 and is_valid_index(idx + 1) 

1438 and [LL[idx].type, LL[idx + 1].type] == [token.NAME, token.NAME] 

1439 and str(LL[idx]) + str(LL[idx + 1]) == "not in" 

1440 ): 

1441 idx += 2 

1442 # Else the first leaf MAY be a string operator symbol or the 'in' keyword... 

1443 elif is_valid_index(idx) and ( 

1444 LL[idx].type in self.STRING_OPERATORS 

1445 or LL[idx].type == token.NAME 

1446 and str(LL[idx]) == "in" 

1447 ): 

1448 idx += 1 

1449 

1450 # The next/first leaf MAY be an empty LPAR... 

1451 if is_valid_index(idx) and is_empty_lpar(LL[idx]): 

1452 idx += 1 

1453 

1454 # The next/first leaf MUST be a string... 

1455 if not is_valid_index(idx) or LL[idx].type != token.STRING: 

1456 return TErr("Line does not start with a string.") 

1457 

1458 string_idx = idx 

1459 

1460 # Skip the string trailer, if one exists. 

1461 string_parser = StringParser() 

1462 idx = string_parser.parse(LL, string_idx) 

1463 

1464 # That string MAY be followed by an empty RPAR... 

1465 if is_valid_index(idx) and is_empty_rpar(LL[idx]): 

1466 idx += 1 

1467 

1468 # That string / empty RPAR leaf MAY be followed by a comma... 

1469 if is_valid_index(idx) and LL[idx].type == token.COMMA: 

1470 idx += 1 

1471 

1472 # But no more leaves are allowed... 

1473 if is_valid_index(idx): 

1474 return TErr("This line does not end with a string.") 

1475 

1476 return Ok([string_idx]) 

1477 

1478 def do_transform( 

1479 self, line: Line, string_indices: list[int] 

1480 ) -> Iterator[TResult[Line]]: 

1481 LL = line.leaves 

1482 assert len(string_indices) == 1, ( 

1483 f"{self.__class__.__name__} should only find one match at a time, found" 

1484 f" {len(string_indices)}" 

1485 ) 

1486 string_idx = string_indices[0] 

1487 

1488 QUOTE = LL[string_idx].value[-1] 

1489 

1490 is_valid_index = is_valid_index_factory(LL) 

1491 insert_str_child = insert_str_child_factory(LL[string_idx]) 

1492 

1493 prefix = get_string_prefix(LL[string_idx].value).lower() 

1494 

1495 # We MAY choose to drop the 'f' prefix from substrings that don't 

1496 # contain any f-expressions, but ONLY if the original f-string 

1497 # contains at least one f-expression. Otherwise, we will alter the AST 

1498 # of the program. 

1499 drop_pointless_f_prefix = ("f" in prefix) and fstring_contains_expr( 

1500 LL[string_idx].value 

1501 ) 

1502 

1503 first_string_line = True 

1504 

1505 string_op_leaves = self._get_string_operator_leaves(LL) 

1506 string_op_leaves_length = ( 

1507 sum(len(str(prefix_leaf)) for prefix_leaf in string_op_leaves) + 1 

1508 if string_op_leaves 

1509 else 0 

1510 ) 

1511 

1512 def maybe_append_string_operators(new_line: Line) -> None: 

1513 """ 

1514 Side Effects: 

1515 If @line starts with a string operator and this is the first 

1516 line we are constructing, this function appends the string 

1517 operator to @new_line and replaces the old string operator leaf 

1518 in the node structure. Otherwise this function does nothing. 

1519 """ 

1520 maybe_prefix_leaves = string_op_leaves if first_string_line else [] 

1521 for i, prefix_leaf in enumerate(maybe_prefix_leaves): 

1522 replace_child(LL[i], prefix_leaf) 

1523 new_line.append(prefix_leaf) 

1524 

1525 ends_with_comma = ( 

1526 is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA 

1527 ) 

1528 

1529 def max_last_string_column() -> int: 

1530 """ 

1531 Returns: 

1532 The max allowed width of the string value used for the last 

1533 line we will construct. Note that this value means the width 

1534 rather than the number of characters (e.g., many East Asian 

1535 characters expand to two columns). 

1536 """ 

1537 result = self.line_length 

1538 result -= line.depth * 4 

1539 result -= 1 if ends_with_comma else 0 

1540 result -= string_op_leaves_length 

1541 return result 

1542 

1543 # --- Calculate Max Break Width (for string value) 

1544 # We start with the line length limit 

1545 max_break_width = self.line_length 

1546 # The last index of a string of length N is N-1. 

1547 max_break_width -= 1 

1548 # Leading whitespace is not present in the string value (e.g. Leaf.value). 

1549 max_break_width -= line.depth * 4 

1550 if max_break_width < 0: 

1551 yield TErr( 

1552 f"Unable to split {LL[string_idx].value} at such high of a line depth:" 

1553 f" {line.depth}" 

1554 ) 

1555 return 

1556 

1557 # Check if StringMerger registered any custom splits. 

1558 custom_splits = self.pop_custom_splits(LL[string_idx].value) 

1559 # We use them ONLY if none of them would produce lines that exceed the 

1560 # line limit. 

1561 use_custom_breakpoints = bool( 

1562 custom_splits 

1563 and all(csplit.break_idx <= max_break_width for csplit in custom_splits) 

1564 ) 

1565 

1566 # Temporary storage for the remaining chunk of the string line that 

1567 # can't fit onto the line currently being constructed. 

1568 rest_value = LL[string_idx].value 

1569 

1570 def more_splits_should_be_made() -> bool: 

1571 """ 

1572 Returns: 

1573 True iff `rest_value` (the remaining string value from the last 

1574 split), should be split again. 

1575 """ 

1576 if use_custom_breakpoints: 

1577 return len(custom_splits) > 1 

1578 else: 

1579 return str_width(rest_value) > max_last_string_column() 

1580 

1581 string_line_results: list[Ok[Line]] = [] 

1582 while more_splits_should_be_made(): 

1583 if use_custom_breakpoints: 

1584 # Custom User Split (manual) 

1585 csplit = custom_splits.pop(0) 

1586 break_idx = csplit.break_idx 

1587 else: 

1588 # Algorithmic Split (automatic) 

1589 max_bidx = ( 

1590 count_chars_in_width(rest_value, max_break_width) 

1591 - string_op_leaves_length 

1592 ) 

1593 maybe_break_idx = self._get_break_idx(rest_value, max_bidx) 

1594 if maybe_break_idx is None: 

1595 # If we are unable to algorithmically determine a good split 

1596 # and this string has custom splits registered to it, we 

1597 # fall back to using them--which means we have to start 

1598 # over from the beginning. 

1599 if custom_splits: 

1600 rest_value = LL[string_idx].value 

1601 string_line_results = [] 

1602 first_string_line = True 

1603 use_custom_breakpoints = True 

1604 continue 

1605 

1606 # Otherwise, we stop splitting here. 

1607 break 

1608 

1609 break_idx = maybe_break_idx 

1610 

1611 # --- Construct `next_value` 

1612 next_value = rest_value[:break_idx] + QUOTE 

1613 

1614 # HACK: The following 'if' statement is a hack to fix the custom 

1615 # breakpoint index in the case of either: (a) substrings that were 

1616 # f-strings but will have the 'f' prefix removed OR (b) substrings 

1617 # that were not f-strings but will now become f-strings because of 

1618 # redundant use of the 'f' prefix (i.e. none of the substrings 

1619 # contain f-expressions but one or more of them had the 'f' prefix 

1620 # anyway; in which case, we will prepend 'f' to _all_ substrings). 

1621 # 

1622 # There is probably a better way to accomplish what is being done 

1623 # here... 

1624 # 

1625 # If this substring is an f-string, we _could_ remove the 'f' 

1626 # prefix, and the current custom split did NOT originally use a 

1627 # prefix... 

1628 if ( 

1629 use_custom_breakpoints 

1630 and not csplit.has_prefix 

1631 and ( 

1632 # `next_value == prefix + QUOTE` happens when the custom 

1633 # split is an empty string. 

1634 next_value == prefix + QUOTE 

1635 or next_value != self._normalize_f_string(next_value, prefix) 

1636 ) 

1637 ): 

1638 # Then `csplit.break_idx` will be off by one after removing 

1639 # the 'f' prefix. 

1640 break_idx += 1 

1641 next_value = rest_value[:break_idx] + QUOTE 

1642 

1643 if drop_pointless_f_prefix: 

1644 next_value = self._normalize_f_string(next_value, prefix) 

1645 

1646 # --- Construct `next_leaf` 

1647 next_leaf = Leaf(token.STRING, next_value) 

1648 insert_str_child(next_leaf) 

1649 self._maybe_normalize_string_quotes(next_leaf) 

1650 

1651 # --- Construct `next_line` 

1652 next_line = line.clone() 

1653 maybe_append_string_operators(next_line) 

1654 next_line.append(next_leaf) 

1655 string_line_results.append(Ok(next_line)) 

1656 

1657 rest_value = prefix + QUOTE + rest_value[break_idx:] 

1658 first_string_line = False 

1659 

1660 yield from string_line_results 

1661 

1662 if drop_pointless_f_prefix: 

1663 rest_value = self._normalize_f_string(rest_value, prefix) 

1664 

1665 rest_leaf = Leaf(token.STRING, rest_value) 

1666 insert_str_child(rest_leaf) 

1667 

1668 # NOTE: I could not find a test case that verifies that the following 

1669 # line is actually necessary, but it seems to be. Otherwise we risk 

1670 # not normalizing the last substring, right? 

1671 self._maybe_normalize_string_quotes(rest_leaf) 

1672 

1673 last_line = line.clone() 

1674 maybe_append_string_operators(last_line) 

1675 

1676 # If there are any leaves to the right of the target string... 

1677 if is_valid_index(string_idx + 1): 

1678 # We use `temp_value` here to determine how long the last line 

1679 # would be if we were to append all the leaves to the right of the 

1680 # target string to the last string line. 

1681 temp_value = rest_value 

1682 for leaf in LL[string_idx + 1 :]: 

1683 temp_value += str(leaf) 

1684 if leaf.type == token.LPAR: 

1685 break 

1686 

1687 # Try to fit them all on the same line with the last substring... 

1688 if ( 

1689 str_width(temp_value) <= max_last_string_column() 

1690 or LL[string_idx + 1].type == token.COMMA 

1691 ): 

1692 last_line.append(rest_leaf) 

1693 append_leaves(last_line, line, LL[string_idx + 1 :]) 

1694 yield Ok(last_line) 

1695 # Otherwise, place the last substring on one line and everything 

1696 # else on a line below that... 

1697 else: 

1698 last_line.append(rest_leaf) 

1699 yield Ok(last_line) 

1700 

1701 non_string_line = line.clone() 

1702 append_leaves(non_string_line, line, LL[string_idx + 1 :]) 

1703 yield Ok(non_string_line) 

1704 # Else the target string was the last leaf... 

1705 else: 

1706 last_line.append(rest_leaf) 

1707 last_line.comments = line.comments.copy() 

1708 yield Ok(last_line) 

1709 

1710 def _iter_nameescape_slices(self, string: str) -> Iterator[tuple[Index, Index]]: 

1711 r""" 

1712 Yields: 

1713 All ranges of @string which, if @string were to be split there, 

1714 would result in the splitting of an \N{...} expression (which is NOT 

1715 allowed). 

1716 """ 

1717 # True - the previous backslash was unescaped 

1718 # False - the previous backslash was escaped *or* there was no backslash 

1719 previous_was_unescaped_backslash = False 

1720 it = iter(enumerate(string)) 

1721 for idx, c in it: 

1722 if c == "\\": 

1723 previous_was_unescaped_backslash = not previous_was_unescaped_backslash 

1724 continue 

1725 if not previous_was_unescaped_backslash or c != "N": 

1726 previous_was_unescaped_backslash = False 

1727 continue 

1728 previous_was_unescaped_backslash = False 

1729 

1730 begin = idx - 1 # the position of backslash before \N{...} 

1731 for idx, c in it: 

1732 if c == "}": 

1733 end = idx 

1734 break 

1735 else: 

1736 # malformed nameescape expression? 

1737 # should have been detected by AST parsing earlier... 

1738 raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!") 

1739 yield begin, end 

1740 

1741 def _iter_fexpr_slices(self, string: str) -> Iterator[tuple[Index, Index]]: 

1742 """ 

1743 Yields: 

1744 All ranges of @string which, if @string were to be split there, 

1745 would result in the splitting of an f-expression (which is NOT 

1746 allowed). 

1747 """ 

1748 if "f" not in get_string_prefix(string).lower(): 

1749 return 

1750 yield from iter_fexpr_spans(string) 

1751 

1752 def _get_illegal_split_indices(self, string: str) -> set[Index]: 

1753 illegal_indices: set[Index] = set() 

1754 iterators = [ 

1755 self._iter_fexpr_slices(string), 

1756 self._iter_nameescape_slices(string), 

1757 ] 

1758 for it in iterators: 

1759 for begin, end in it: 

1760 illegal_indices.update(range(begin, end)) 

1761 return illegal_indices 

1762 

1763 def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]: 

1764 """ 

1765 This method contains the algorithm that StringSplitter uses to 

1766 determine which character to split each string at. 

1767 

1768 Args: 

1769 @string: The substring that we are attempting to split. 

1770 @max_break_idx: The ideal break index. We will return this value if it 

1771 meets all the necessary conditions. In the likely event that it 

1772 doesn't we will try to find the closest index BELOW @max_break_idx 

1773 that does. If that fails, we will expand our search by also 

1774 considering all valid indices ABOVE @max_break_idx. 

1775 

1776 Pre-Conditions: 

1777 * assert_is_leaf_string(@string) 

1778 * 0 <= @max_break_idx < len(@string) 

1779 

1780 Returns: 

1781 break_idx, if an index is able to be found that meets all of the 

1782 conditions listed in the 'Transformations' section of this classes' 

1783 docstring. 

1784 OR 

1785 None, otherwise. 

1786 """ 

1787 is_valid_index = is_valid_index_factory(string) 

1788 

1789 assert is_valid_index(max_break_idx) 

1790 assert_is_leaf_string(string) 

1791 

1792 _illegal_split_indices = self._get_illegal_split_indices(string) 

1793 

1794 def breaks_unsplittable_expression(i: Index) -> bool: 

1795 """ 

1796 Returns: 

1797 True iff returning @i would result in the splitting of an 

1798 unsplittable expression (which is NOT allowed). 

1799 """ 

1800 return i in _illegal_split_indices 

1801 

1802 def passes_all_checks(i: Index) -> bool: 

1803 """ 

1804 Returns: 

1805 True iff ALL of the conditions listed in the 'Transformations' 

1806 section of this classes' docstring would be met by returning @i. 

1807 """ 

1808 is_space = string[i] == " " 

1809 is_split_safe = is_valid_index(i - 1) and string[i - 1] in SPLIT_SAFE_CHARS 

1810 

1811 is_not_escaped = True 

1812 j = i - 1 

1813 while is_valid_index(j) and string[j] == "\\": 

1814 is_not_escaped = not is_not_escaped 

1815 j -= 1 

1816 

1817 is_big_enough = ( 

1818 len(string[i:]) >= self.MIN_SUBSTR_SIZE 

1819 and len(string[:i]) >= self.MIN_SUBSTR_SIZE 

1820 ) 

1821 return ( 

1822 (is_space or is_split_safe) 

1823 and is_not_escaped 

1824 and is_big_enough 

1825 and not breaks_unsplittable_expression(i) 

1826 ) 

1827 

1828 # First, we check all indices BELOW @max_break_idx. 

1829 break_idx = max_break_idx 

1830 while is_valid_index(break_idx - 1) and not passes_all_checks(break_idx): 

1831 break_idx -= 1 

1832 

1833 if not passes_all_checks(break_idx): 

1834 # If that fails, we check all indices ABOVE @max_break_idx. 

1835 # 

1836 # If we are able to find a valid index here, the next line is going 

1837 # to be longer than the specified line length, but it's probably 

1838 # better than doing nothing at all. 

1839 break_idx = max_break_idx + 1 

1840 while is_valid_index(break_idx + 1) and not passes_all_checks(break_idx): 

1841 break_idx += 1 

1842 

1843 if not is_valid_index(break_idx) or not passes_all_checks(break_idx): 

1844 return None 

1845 

1846 return break_idx 

1847 

1848 def _maybe_normalize_string_quotes(self, leaf: Leaf) -> None: 

1849 if self.normalize_strings: 

1850 leaf.value = normalize_string_quotes(leaf.value) 

1851 

1852 def _normalize_f_string(self, string: str, prefix: str) -> str: 

1853 """ 

1854 Pre-Conditions: 

1855 * assert_is_leaf_string(@string) 

1856 

1857 Returns: 

1858 * If @string is an f-string that contains no f-expressions, we 

1859 return a string identical to @string except that the 'f' prefix 

1860 has been stripped and all double braces (i.e. '{{' or '}}') have 

1861 been normalized (i.e. turned into '{' or '}'). 

1862 OR 

1863 * Otherwise, we return @string. 

1864 """ 

1865 assert_is_leaf_string(string) 

1866 

1867 if "f" in prefix and not fstring_contains_expr(string): 

1868 new_prefix = prefix.replace("f", "") 

1869 

1870 temp = string[len(prefix) :] 

1871 temp = re.sub(r"\{\{", "{", temp) 

1872 temp = re.sub(r"\}\}", "}", temp) 

1873 new_string = temp 

1874 

1875 return f"{new_prefix}{new_string}" 

1876 else: 

1877 return string 

1878 

1879 def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> list[Leaf]: 

1880 LL = list(leaves) 

1881 

1882 string_op_leaves = [] 

1883 i = 0 

1884 while LL[i].type in self.STRING_OPERATORS + [token.NAME]: 

1885 prefix_leaf = Leaf(LL[i].type, str(LL[i]).strip()) 

1886 string_op_leaves.append(prefix_leaf) 

1887 i += 1 

1888 return string_op_leaves 

1889 

1890 

1891class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin): 

1892 """ 

1893 StringTransformer that wraps strings in parens and then splits at the LPAR. 

1894 

1895 Requirements: 

1896 All of the requirements listed in BaseStringSplitter's docstring in 

1897 addition to the requirements listed below: 

1898 

1899 * The line is a return/yield statement, which returns/yields a string. 

1900 OR 

1901 * The line is part of a ternary expression (e.g. `x = y if cond else 

1902 z`) such that the line starts with `else <string>`, where <string> is 

1903 some string. 

1904 OR 

1905 * The line is an assert statement, which ends with a string. 

1906 OR 

1907 * The line is an assignment statement (e.g. `x = <string>` or `x += 

1908 <string>`) such that the variable is being assigned the value of some 

1909 string. 

1910 OR 

1911 * The line is a dictionary key assignment where some valid key is being 

1912 assigned the value of some string. 

1913 OR 

1914 * The line is an lambda expression and the value is a string. 

1915 OR 

1916 * The line starts with an "atom" string that prefers to be wrapped in 

1917 parens. It's preferred to be wrapped when it's is an immediate child of 

1918 a list/set/tuple literal, AND the string is surrounded by commas (or is 

1919 the first/last child). 

1920 

1921 Transformations: 

1922 The chosen string is wrapped in parentheses and then split at the LPAR. 

1923 

1924 We then have one line which ends with an LPAR and another line that 

1925 starts with the chosen string. The latter line is then split again at 

1926 the RPAR. This results in the RPAR (and possibly a trailing comma) 

1927 being placed on its own line. 

1928 

1929 NOTE: If any leaves exist to the right of the chosen string (except 

1930 for a trailing comma, which would be placed after the RPAR), those 

1931 leaves are placed inside the parentheses. In effect, the chosen 

1932 string is not necessarily being "wrapped" by parentheses. We can, 

1933 however, count on the LPAR being placed directly before the chosen 

1934 string. 

1935 

1936 In other words, StringParenWrapper creates "atom" strings. These 

1937 can then be split again by StringSplitter, if necessary. 

1938 

1939 Collaborations: 

1940 In the event that a string line split by StringParenWrapper is 

1941 changed such that it no longer needs to be given its own line, 

1942 StringParenWrapper relies on StringParenStripper to clean up the 

1943 parentheses it created. 

1944 

1945 For "atom" strings that prefers to be wrapped in parens, it requires 

1946 StringSplitter to hold the split until the string is wrapped in parens. 

1947 """ 

1948 

1949 def do_splitter_match(self, line: Line) -> TMatchResult: 

1950 LL = line.leaves 

1951 

1952 if line.leaves[-1].type in OPENING_BRACKETS: 

1953 return TErr( 

1954 "Cannot wrap parens around a line that ends in an opening bracket." 

1955 ) 

1956 

1957 string_idx = ( 

1958 self._return_match(LL) 

1959 or self._else_match(LL) 

1960 or self._assert_match(LL) 

1961 or self._assign_match(LL) 

1962 or self._dict_or_lambda_match(LL) 

1963 or self._prefer_paren_wrap_match(LL) 

1964 ) 

1965 

1966 if string_idx is not None: 

1967 string_value = line.leaves[string_idx].value 

1968 # If the string has neither spaces nor East Asian stops... 

1969 if not any( 

1970 char == " " or char in SPLIT_SAFE_CHARS for char in string_value 

1971 ): 

1972 # And will still violate the line length limit when split... 

1973 max_string_width = self.line_length - ((line.depth + 1) * 4) 

1974 if str_width(string_value) > max_string_width: 

1975 # And has no associated custom splits... 

1976 if not self.has_custom_splits(string_value): 

1977 # Then we should NOT put this string on its own line. 

1978 return TErr( 

1979 "We do not wrap long strings in parentheses when the" 

1980 " resultant line would still be over the specified line" 

1981 " length and can't be split further by StringSplitter." 

1982 ) 

1983 return Ok([string_idx]) 

1984 

1985 return TErr("This line does not contain any non-atomic strings.") 

1986 

1987 @staticmethod 

1988 def _return_match(LL: list[Leaf]) -> Optional[int]: 

1989 """ 

1990 Returns: 

1991 string_idx such that @LL[string_idx] is equal to our target (i.e. 

1992 matched) string, if this line matches the return/yield statement 

1993 requirements listed in the 'Requirements' section of this classes' 

1994 docstring. 

1995 OR 

1996 None, otherwise. 

1997 """ 

1998 # If this line is a part of a return/yield statement and the first leaf 

1999 # contains either the "return" or "yield" keywords... 

2000 if parent_type(LL[0]) in [syms.return_stmt, syms.yield_expr] and LL[ 

2001 0 

2002 ].value in ["return", "yield"]: 

2003 is_valid_index = is_valid_index_factory(LL) 

2004 

2005 idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1 

2006 # The next visible leaf MUST contain a string... 

2007 if is_valid_index(idx) and LL[idx].type == token.STRING: 

2008 return idx 

2009 

2010 return None 

2011 

2012 @staticmethod 

2013 def _else_match(LL: list[Leaf]) -> Optional[int]: 

2014 """ 

2015 Returns: 

2016 string_idx such that @LL[string_idx] is equal to our target (i.e. 

2017 matched) string, if this line matches the ternary expression 

2018 requirements listed in the 'Requirements' section of this classes' 

2019 docstring. 

2020 OR 

2021 None, otherwise. 

2022 """ 

2023 # If this line is a part of a ternary expression and the first leaf 

2024 # contains the "else" keyword... 

2025 if ( 

2026 parent_type(LL[0]) == syms.test 

2027 and LL[0].type == token.NAME 

2028 and LL[0].value == "else" 

2029 ): 

2030 is_valid_index = is_valid_index_factory(LL) 

2031 

2032 idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1 

2033 # The next visible leaf MUST contain a string... 

2034 if is_valid_index(idx) and LL[idx].type == token.STRING: 

2035 return idx 

2036 

2037 return None 

2038 

2039 @staticmethod 

2040 def _assert_match(LL: list[Leaf]) -> Optional[int]: 

2041 """ 

2042 Returns: 

2043 string_idx such that @LL[string_idx] is equal to our target (i.e. 

2044 matched) string, if this line matches the assert statement 

2045 requirements listed in the 'Requirements' section of this classes' 

2046 docstring. 

2047 OR 

2048 None, otherwise. 

2049 """ 

2050 # If this line is a part of an assert statement and the first leaf 

2051 # contains the "assert" keyword... 

2052 if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert": 

2053 is_valid_index = is_valid_index_factory(LL) 

2054 

2055 for i, leaf in enumerate(LL): 

2056 # We MUST find a comma... 

2057 if leaf.type == token.COMMA: 

2058 idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1 

2059 

2060 # That comma MUST be followed by a string... 

2061 if is_valid_index(idx) and LL[idx].type == token.STRING: 

2062 string_idx = idx 

2063 

2064 # Skip the string trailer, if one exists. 

2065 string_parser = StringParser() 

2066 idx = string_parser.parse(LL, string_idx) 

2067 

2068 # But no more leaves are allowed... 

2069 if not is_valid_index(idx): 

2070 return string_idx 

2071 

2072 return None 

2073 

2074 @staticmethod 

2075 def _assign_match(LL: list[Leaf]) -> Optional[int]: 

2076 """ 

2077 Returns: 

2078 string_idx such that @LL[string_idx] is equal to our target (i.e. 

2079 matched) string, if this line matches the assignment statement 

2080 requirements listed in the 'Requirements' section of this classes' 

2081 docstring. 

2082 OR 

2083 None, otherwise. 

2084 """ 

2085 # If this line is a part of an expression statement or is a function 

2086 # argument AND the first leaf contains a variable name... 

2087 if ( 

2088 parent_type(LL[0]) in [syms.expr_stmt, syms.argument, syms.power] 

2089 and LL[0].type == token.NAME 

2090 ): 

2091 is_valid_index = is_valid_index_factory(LL) 

2092 

2093 for i, leaf in enumerate(LL): 

2094 # We MUST find either an '=' or '+=' symbol... 

2095 if leaf.type in [token.EQUAL, token.PLUSEQUAL]: 

2096 idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1 

2097 

2098 # That symbol MUST be followed by a string... 

2099 if is_valid_index(idx) and LL[idx].type == token.STRING: 

2100 string_idx = idx 

2101 

2102 # Skip the string trailer, if one exists. 

2103 string_parser = StringParser() 

2104 idx = string_parser.parse(LL, string_idx) 

2105 

2106 # The next leaf MAY be a comma iff this line is a part 

2107 # of a function argument... 

2108 if ( 

2109 parent_type(LL[0]) == syms.argument 

2110 and is_valid_index(idx) 

2111 and LL[idx].type == token.COMMA 

2112 ): 

2113 idx += 1 

2114 

2115 # But no more leaves are allowed... 

2116 if not is_valid_index(idx): 

2117 return string_idx 

2118 

2119 return None 

2120 

2121 @staticmethod 

2122 def _dict_or_lambda_match(LL: list[Leaf]) -> Optional[int]: 

2123 """ 

2124 Returns: 

2125 string_idx such that @LL[string_idx] is equal to our target (i.e. 

2126 matched) string, if this line matches the dictionary key assignment 

2127 statement or lambda expression requirements listed in the 

2128 'Requirements' section of this classes' docstring. 

2129 OR 

2130 None, otherwise. 

2131 """ 

2132 # If this line is a part of a dictionary key assignment or lambda expression... 

2133 parent_types = [parent_type(LL[0]), parent_type(LL[0].parent)] 

2134 if syms.dictsetmaker in parent_types or syms.lambdef in parent_types: 

2135 is_valid_index = is_valid_index_factory(LL) 

2136 

2137 for i, leaf in enumerate(LL): 

2138 # We MUST find a colon, it can either be dict's or lambda's colon... 

2139 if leaf.type == token.COLON and i < len(LL) - 1: 

2140 idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1 

2141 

2142 # That colon MUST be followed by a string... 

2143 if is_valid_index(idx) and LL[idx].type == token.STRING: 

2144 string_idx = idx 

2145 

2146 # Skip the string trailer, if one exists. 

2147 string_parser = StringParser() 

2148 idx = string_parser.parse(LL, string_idx) 

2149 

2150 # That string MAY be followed by a comma... 

2151 if is_valid_index(idx) and LL[idx].type == token.COMMA: 

2152 idx += 1 

2153 

2154 # But no more leaves are allowed... 

2155 if not is_valid_index(idx): 

2156 return string_idx 

2157 

2158 return None 

2159 

2160 def do_transform( 

2161 self, line: Line, string_indices: list[int] 

2162 ) -> Iterator[TResult[Line]]: 

2163 LL = line.leaves 

2164 assert len(string_indices) == 1, ( 

2165 f"{self.__class__.__name__} should only find one match at a time, found" 

2166 f" {len(string_indices)}" 

2167 ) 

2168 string_idx = string_indices[0] 

2169 

2170 is_valid_index = is_valid_index_factory(LL) 

2171 insert_str_child = insert_str_child_factory(LL[string_idx]) 

2172 

2173 comma_idx = -1 

2174 ends_with_comma = False 

2175 if LL[comma_idx].type == token.COMMA: 

2176 ends_with_comma = True 

2177 

2178 leaves_to_steal_comments_from = [LL[string_idx]] 

2179 if ends_with_comma: 

2180 leaves_to_steal_comments_from.append(LL[comma_idx]) 

2181 

2182 # --- First Line 

2183 first_line = line.clone() 

2184 left_leaves = LL[:string_idx] 

2185 

2186 # We have to remember to account for (possibly invisible) LPAR and RPAR 

2187 # leaves that already wrapped the target string. If these leaves do 

2188 # exist, we will replace them with our own LPAR and RPAR leaves. 

2189 old_parens_exist = False 

2190 if left_leaves and left_leaves[-1].type == token.LPAR: 

2191 old_parens_exist = True 

2192 leaves_to_steal_comments_from.append(left_leaves[-1]) 

2193 left_leaves.pop() 

2194 

2195 append_leaves(first_line, line, left_leaves) 

2196 

2197 lpar_leaf = Leaf(token.LPAR, "(") 

2198 if old_parens_exist: 

2199 replace_child(LL[string_idx - 1], lpar_leaf) 

2200 else: 

2201 insert_str_child(lpar_leaf) 

2202 first_line.append(lpar_leaf) 

2203 

2204 # We throw inline comments that were originally to the right of the 

2205 # target string to the top line. They will now be shown to the right of 

2206 # the LPAR. 

2207 for leaf in leaves_to_steal_comments_from: 

2208 for comment_leaf in line.comments_after(leaf): 

2209 first_line.append(comment_leaf, preformatted=True) 

2210 

2211 yield Ok(first_line) 

2212 

2213 # --- Middle (String) Line 

2214 # We only need to yield one (possibly too long) string line, since the 

2215 # `StringSplitter` will break it down further if necessary. 

2216 string_value = LL[string_idx].value 

2217 string_line = Line( 

2218 mode=line.mode, 

2219 depth=line.depth + 1, 

2220 inside_brackets=True, 

2221 should_split_rhs=line.should_split_rhs, 

2222 magic_trailing_comma=line.magic_trailing_comma, 

2223 ) 

2224 string_leaf = Leaf(token.STRING, string_value) 

2225 insert_str_child(string_leaf) 

2226 string_line.append(string_leaf) 

2227 

2228 old_rpar_leaf = None 

2229 if is_valid_index(string_idx + 1): 

2230 right_leaves = LL[string_idx + 1 :] 

2231 if ends_with_comma: 

2232 right_leaves.pop() 

2233 

2234 if old_parens_exist: 

2235 assert right_leaves and right_leaves[-1].type == token.RPAR, ( 

2236 "Apparently, old parentheses do NOT exist?!" 

2237 f" (left_leaves={left_leaves}, right_leaves={right_leaves})" 

2238 ) 

2239 old_rpar_leaf = right_leaves.pop() 

2240 elif right_leaves and right_leaves[-1].type == token.RPAR: 

2241 # Special case for lambda expressions as dict's value, e.g.: 

2242 # my_dict = { 

2243 # "key": lambda x: f"formatted: {x}", 

2244 # } 

2245 # After wrapping the dict's value with parentheses, the string is 

2246 # followed by a RPAR but its opening bracket is lambda's, not 

2247 # the string's: 

2248 # "key": (lambda x: f"formatted: {x}"), 

2249 opening_bracket = right_leaves[-1].opening_bracket 

2250 if opening_bracket is not None and opening_bracket in left_leaves: 

2251 index = left_leaves.index(opening_bracket) 

2252 if ( 

2253 0 < index < len(left_leaves) - 1 

2254 and left_leaves[index - 1].type == token.COLON 

2255 and left_leaves[index + 1].value == "lambda" 

2256 ): 

2257 right_leaves.pop() 

2258 

2259 append_leaves(string_line, line, right_leaves) 

2260 

2261 yield Ok(string_line) 

2262 

2263 # --- Last Line 

2264 last_line = line.clone() 

2265 last_line.bracket_tracker = first_line.bracket_tracker 

2266 

2267 new_rpar_leaf = Leaf(token.RPAR, ")") 

2268 if old_rpar_leaf is not None: 

2269 replace_child(old_rpar_leaf, new_rpar_leaf) 

2270 else: 

2271 insert_str_child(new_rpar_leaf) 

2272 last_line.append(new_rpar_leaf) 

2273 

2274 # If the target string ended with a comma, we place this comma to the 

2275 # right of the RPAR on the last line. 

2276 if ends_with_comma: 

2277 comma_leaf = Leaf(token.COMMA, ",") 

2278 replace_child(LL[comma_idx], comma_leaf) 

2279 last_line.append(comma_leaf) 

2280 

2281 yield Ok(last_line) 

2282 

2283 

2284class StringParser: 

2285 """ 

2286 A state machine that aids in parsing a string's "trailer", which can be 

2287 either non-existent, an old-style formatting sequence (e.g. `% varX` or `% 

2288 (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX, 

2289 varY)`). 

2290 

2291 NOTE: A new StringParser object MUST be instantiated for each string 

2292 trailer we need to parse. 

2293 

2294 Examples: 

2295 We shall assume that `line` equals the `Line` object that corresponds 

2296 to the following line of python code: 

2297 ``` 

2298 x = "Some {}.".format("String") + some_other_string 

2299 ``` 

2300 

2301 Furthermore, we will assume that `string_idx` is some index such that: 

2302 ``` 

2303 assert line.leaves[string_idx].value == "Some {}." 

2304 ``` 

2305 

2306 The following code snippet then holds: 

2307 ``` 

2308 string_parser = StringParser() 

2309 idx = string_parser.parse(line.leaves, string_idx) 

2310 assert line.leaves[idx].type == token.PLUS 

2311 ``` 

2312 """ 

2313 

2314 DEFAULT_TOKEN: Final = 20210605 

2315 

2316 # String Parser States 

2317 START: Final = 1 

2318 DOT: Final = 2 

2319 NAME: Final = 3 

2320 PERCENT: Final = 4 

2321 SINGLE_FMT_ARG: Final = 5 

2322 LPAR: Final = 6 

2323 RPAR: Final = 7 

2324 DONE: Final = 8 

2325 

2326 # Lookup Table for Next State 

2327 _goto: Final[dict[tuple[ParserState, NodeType], ParserState]] = { 

2328 # A string trailer may start with '.' OR '%'. 

2329 (START, token.DOT): DOT, 

2330 (START, token.PERCENT): PERCENT, 

2331 (START, DEFAULT_TOKEN): DONE, 

2332 # A '.' MUST be followed by an attribute or method name. 

2333 (DOT, token.NAME): NAME, 

2334 # A method name MUST be followed by an '(', whereas an attribute name 

2335 # is the last symbol in the string trailer. 

2336 (NAME, token.LPAR): LPAR, 

2337 (NAME, DEFAULT_TOKEN): DONE, 

2338 # A '%' symbol can be followed by an '(' or a single argument (e.g. a 

2339 # string or variable name). 

2340 (PERCENT, token.LPAR): LPAR, 

2341 (PERCENT, DEFAULT_TOKEN): SINGLE_FMT_ARG, 

2342 # If a '%' symbol is followed by a single argument, that argument is 

2343 # the last leaf in the string trailer. 

2344 (SINGLE_FMT_ARG, DEFAULT_TOKEN): DONE, 

2345 # If present, a ')' symbol is the last symbol in a string trailer. 

2346 # (NOTE: LPARS and nested RPARS are not included in this lookup table, 

2347 # since they are treated as a special case by the parsing logic in this 

2348 # classes' implementation.) 

2349 (RPAR, DEFAULT_TOKEN): DONE, 

2350 } 

2351 

2352 def __init__(self) -> None: 

2353 self._state = self.START 

2354 self._unmatched_lpars = 0 

2355 

2356 def parse(self, leaves: list[Leaf], string_idx: int) -> int: 

2357 """ 

2358 Pre-conditions: 

2359 * @leaves[@string_idx].type == token.STRING 

2360 

2361 Returns: 

2362 The index directly after the last leaf which is a part of the string 

2363 trailer, if a "trailer" exists. 

2364 OR 

2365 @string_idx + 1, if no string "trailer" exists. 

2366 """ 

2367 assert leaves[string_idx].type == token.STRING 

2368 

2369 idx = string_idx + 1 

2370 while idx < len(leaves) and self._next_state(leaves[idx]): 

2371 idx += 1 

2372 return idx 

2373 

2374 def _next_state(self, leaf: Leaf) -> bool: 

2375 """ 

2376 Pre-conditions: 

2377 * On the first call to this function, @leaf MUST be the leaf that 

2378 was directly after the string leaf in question (e.g. if our target 

2379 string is `line.leaves[i]` then the first call to this method must 

2380 be `line.leaves[i + 1]`). 

2381 * On the next call to this function, the leaf parameter passed in 

2382 MUST be the leaf directly following @leaf. 

2383 

2384 Returns: 

2385 True iff @leaf is a part of the string's trailer. 

2386 """ 

2387 # We ignore empty LPAR or RPAR leaves. 

2388 if is_empty_par(leaf): 

2389 return True 

2390 

2391 next_token = leaf.type 

2392 if next_token == token.LPAR: 

2393 self._unmatched_lpars += 1 

2394 

2395 current_state = self._state 

2396 

2397 # The LPAR parser state is a special case. We will return True until we 

2398 # find the matching RPAR token. 

2399 if current_state == self.LPAR: 

2400 if next_token == token.RPAR: 

2401 self._unmatched_lpars -= 1 

2402 if self._unmatched_lpars == 0: 

2403 self._state = self.RPAR 

2404 # Otherwise, we use a lookup table to determine the next state. 

2405 else: 

2406 # If the lookup table matches the current state to the next 

2407 # token, we use the lookup table. 

2408 if (current_state, next_token) in self._goto: 

2409 self._state = self._goto[current_state, next_token] 

2410 else: 

2411 # Otherwise, we check if a the current state was assigned a 

2412 # default. 

2413 if (current_state, self.DEFAULT_TOKEN) in self._goto: 

2414 self._state = self._goto[current_state, self.DEFAULT_TOKEN] 

2415 # If no default has been assigned, then this parser has a logic 

2416 # error. 

2417 else: 

2418 raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!") 

2419 

2420 if self._state == self.DONE: 

2421 return False 

2422 

2423 return True 

2424 

2425 

2426def insert_str_child_factory(string_leaf: Leaf) -> Callable[[LN], None]: 

2427 """ 

2428 Factory for a convenience function that is used to orphan @string_leaf 

2429 and then insert multiple new leaves into the same part of the node 

2430 structure that @string_leaf had originally occupied. 

2431 

2432 Examples: 

2433 Let `string_leaf = Leaf(token.STRING, '"foo"')` and `N = 

2434 string_leaf.parent`. Assume the node `N` has the following 

2435 original structure: 

2436 

2437 Node( 

2438 expr_stmt, [ 

2439 Leaf(NAME, 'x'), 

2440 Leaf(EQUAL, '='), 

2441 Leaf(STRING, '"foo"'), 

2442 ] 

2443 ) 

2444 

2445 We then run the code snippet shown below. 

2446 ``` 

2447 insert_str_child = insert_str_child_factory(string_leaf) 

2448 

2449 lpar = Leaf(token.LPAR, '(') 

2450 insert_str_child(lpar) 

2451 

2452 bar = Leaf(token.STRING, '"bar"') 

2453 insert_str_child(bar) 

2454 

2455 rpar = Leaf(token.RPAR, ')') 

2456 insert_str_child(rpar) 

2457 ``` 

2458 

2459 After which point, it follows that `string_leaf.parent is None` and 

2460 the node `N` now has the following structure: 

2461 

2462 Node( 

2463 expr_stmt, [ 

2464 Leaf(NAME, 'x'), 

2465 Leaf(EQUAL, '='), 

2466 Leaf(LPAR, '('), 

2467 Leaf(STRING, '"bar"'), 

2468 Leaf(RPAR, ')'), 

2469 ] 

2470 ) 

2471 """ 

2472 string_parent = string_leaf.parent 

2473 string_child_idx = string_leaf.remove() 

2474 

2475 def insert_str_child(child: LN) -> None: 

2476 nonlocal string_child_idx 

2477 

2478 assert string_parent is not None 

2479 assert string_child_idx is not None 

2480 

2481 string_parent.insert_child(string_child_idx, child) 

2482 string_child_idx += 1 

2483 

2484 return insert_str_child 

2485 

2486 

2487def is_valid_index_factory(seq: Sequence[Any]) -> Callable[[int], bool]: 

2488 """ 

2489 Examples: 

2490 ``` 

2491 my_list = [1, 2, 3] 

2492 

2493 is_valid_index = is_valid_index_factory(my_list) 

2494 

2495 assert is_valid_index(0) 

2496 assert is_valid_index(2) 

2497 

2498 assert not is_valid_index(3) 

2499 assert not is_valid_index(-1) 

2500 ``` 

2501 """ 

2502 

2503 def is_valid_index(idx: int) -> bool: 

2504 """ 

2505 Returns: 

2506 True iff @idx is positive AND seq[@idx] does NOT raise an 

2507 IndexError. 

2508 """ 

2509 return 0 <= idx < len(seq) 

2510 

2511 return is_valid_index