Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/cssselect/parser.py: 77%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

603 statements  

1""" 

2cssselect.parser 

3================ 

4 

5Tokenizer, parser and parsed objects for CSS selectors. 

6 

7 

8:copyright: (c) 2007-2012 Ian Bicking and contributors. 

9See AUTHORS for more details. 

10:license: BSD, see LICENSE for more details. 

11 

12""" 

13 

14from __future__ import annotations 

15 

16import operator 

17import re 

18import sys 

19from typing import TYPE_CHECKING, Literal, Optional, Protocol, Union, cast, overload 

20 

21if TYPE_CHECKING: 

22 from collections.abc import Iterable, Iterator, Sequence 

23 

24 # typing.Self requires Python 3.11 

25 from typing_extensions import Self 

26 

27 

28def ascii_lower(string: str) -> str: 

29 """Lower-case, but only in the ASCII range.""" 

30 return string.encode("utf8").lower().decode("utf8") 

31 

32 

33class SelectorError(Exception): 

34 """Common parent for :class:`SelectorSyntaxError` and 

35 :class:`ExpressionError`. 

36 

37 You can just use ``except SelectorError:`` when calling 

38 :meth:`~GenericTranslator.css_to_xpath` and handle both exceptions types. 

39 

40 """ 

41 

42 

43class SelectorSyntaxError(SelectorError, SyntaxError): 

44 """Parsing a selector that does not match the grammar.""" 

45 

46 

47#### Parsed objects 

48 

49Tree = Union[ 

50 "Element", 

51 "Hash", 

52 "Class", 

53 "Function", 

54 "Pseudo", 

55 "Attrib", 

56 "Negation", 

57 "Relation", 

58 "Matching", 

59 "SpecificityAdjustment", 

60 "CombinedSelector", 

61] 

62PseudoElement = Union["FunctionalPseudoElement", str] 

63 

64 

65class Selector: 

66 """ 

67 Represents a parsed selector. 

68 

69 :meth:`~GenericTranslator.selector_to_xpath` accepts this object, 

70 but ignores :attr:`pseudo_element`. It is the user’s responsibility 

71 to account for pseudo-elements and reject selectors with unknown 

72 or unsupported pseudo-elements. 

73 

74 """ 

75 

76 def __init__(self, tree: Tree, pseudo_element: PseudoElement | None = None) -> None: 

77 self.parsed_tree = tree 

78 if pseudo_element is not None and not isinstance( 

79 pseudo_element, FunctionalPseudoElement 

80 ): 

81 pseudo_element = ascii_lower(pseudo_element) 

82 #: A :class:`FunctionalPseudoElement`, 

83 #: or the identifier for the pseudo-element as a string, 

84 # or ``None``. 

85 #: 

86 #: +-------------------------+----------------+--------------------------------+ 

87 #: | | Selector | Pseudo-element | 

88 #: +=========================+================+================================+ 

89 #: | CSS3 syntax | ``a::before`` | ``'before'`` | 

90 #: +-------------------------+----------------+--------------------------------+ 

91 #: | Older syntax | ``a:before`` | ``'before'`` | 

92 #: +-------------------------+----------------+--------------------------------+ 

93 #: | From the Lists3_ draft, | ``li::marker`` | ``'marker'`` | 

94 #: | not in Selectors3 | | | 

95 #: +-------------------------+----------------+--------------------------------+ 

96 #: | Invalid pseudo-class | ``li:marker`` | ``None`` | 

97 #: +-------------------------+----------------+--------------------------------+ 

98 #: | Functional | ``a::foo(2)`` | ``FunctionalPseudoElement(…)`` | 

99 #: +-------------------------+----------------+--------------------------------+ 

100 #: 

101 #: .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement 

102 self.pseudo_element = pseudo_element 

103 

104 def __repr__(self) -> str: 

105 if isinstance(self.pseudo_element, FunctionalPseudoElement): 

106 pseudo_element = repr(self.pseudo_element) 

107 elif self.pseudo_element: 

108 pseudo_element = f"::{self.pseudo_element}" 

109 else: 

110 pseudo_element = "" 

111 return f"{self.__class__.__name__}[{self.parsed_tree!r}{pseudo_element}]" 

112 

113 def canonical(self) -> str: 

114 """Return a CSS representation for this selector (a string)""" 

115 if isinstance(self.pseudo_element, FunctionalPseudoElement): 

116 pseudo_element = f"::{self.pseudo_element.canonical()}" 

117 elif self.pseudo_element: 

118 pseudo_element = f"::{self.pseudo_element}" 

119 else: 

120 pseudo_element = "" 

121 res = f"{self.parsed_tree.canonical()}{pseudo_element}" 

122 if len(res) > 1: 

123 res = res.lstrip("*") 

124 return res 

125 

126 def specificity(self) -> tuple[int, int, int]: 

127 """Return the specificity_ of this selector as a tuple of 3 integers. 

128 

129 .. _specificity: http://www.w3.org/TR/selectors/#specificity 

130 

131 """ 

132 a, b, c = self.parsed_tree.specificity() 

133 if self.pseudo_element: 

134 c += 1 

135 return a, b, c 

136 

137 

138class Class: 

139 """ 

140 Represents selector.class_name 

141 """ 

142 

143 def __init__(self, selector: Tree, class_name: str) -> None: 

144 self.selector = selector 

145 self.class_name = class_name 

146 

147 def __repr__(self) -> str: 

148 return f"{self.__class__.__name__}[{self.selector!r}.{self.class_name}]" 

149 

150 def canonical(self) -> str: 

151 return f"{self.selector.canonical()}.{self.class_name}" 

152 

153 def specificity(self) -> tuple[int, int, int]: 

154 a, b, c = self.selector.specificity() 

155 b += 1 

156 return a, b, c 

157 

158 

159class FunctionalPseudoElement: 

160 """ 

161 Represents selector::name(arguments) 

162 

163 .. attribute:: name 

164 

165 The name (identifier) of the pseudo-element, as a string. 

166 

167 .. attribute:: arguments 

168 

169 The arguments of the pseudo-element, as a list of tokens. 

170 

171 **Note:** tokens are not part of the public API, 

172 and may change between cssselect versions. 

173 Use at your own risks. 

174 

175 """ 

176 

177 def __init__(self, name: str, arguments: Sequence[Token]): 

178 self.name = ascii_lower(name) 

179 self.arguments = arguments 

180 

181 def __repr__(self) -> str: 

182 token_values = [token.value for token in self.arguments] 

183 return f"{self.__class__.__name__}[::{self.name}({token_values!r})]" 

184 

185 def argument_types(self) -> list[str]: 

186 return [token.type for token in self.arguments] 

187 

188 def canonical(self) -> str: 

189 args = "".join(token.css() for token in self.arguments) 

190 return f"{self.name}({args})" 

191 

192 

193class Function: 

194 """ 

195 Represents selector:name(expr) 

196 """ 

197 

198 def __init__(self, selector: Tree, name: str, arguments: Sequence[Token]) -> None: 

199 self.selector = selector 

200 self.name = ascii_lower(name) 

201 self.arguments = arguments 

202 

203 def __repr__(self) -> str: 

204 token_values = [token.value for token in self.arguments] 

205 return f"{self.__class__.__name__}[{self.selector!r}:{self.name}({token_values!r})]" 

206 

207 def argument_types(self) -> list[str]: 

208 return [token.type for token in self.arguments] 

209 

210 def canonical(self) -> str: 

211 args = "".join(token.css() for token in self.arguments) 

212 return f"{self.selector.canonical()}:{self.name}({args})" 

213 

214 def specificity(self) -> tuple[int, int, int]: 

215 a, b, c = self.selector.specificity() 

216 b += 1 

217 return a, b, c 

218 

219 

220class Pseudo: 

221 """ 

222 Represents selector:ident 

223 """ 

224 

225 def __init__(self, selector: Tree, ident: str) -> None: 

226 self.selector = selector 

227 self.ident = ascii_lower(ident) 

228 

229 def __repr__(self) -> str: 

230 return f"{self.__class__.__name__}[{self.selector!r}:{self.ident}]" 

231 

232 def canonical(self) -> str: 

233 return f"{self.selector.canonical()}:{self.ident}" 

234 

235 def specificity(self) -> tuple[int, int, int]: 

236 a, b, c = self.selector.specificity() 

237 b += 1 

238 return a, b, c 

239 

240 

241class Negation: 

242 """ 

243 Represents selector:not(subselector) 

244 """ 

245 

246 def __init__(self, selector: Tree, subselector: Tree) -> None: 

247 self.selector = selector 

248 self.subselector = subselector 

249 

250 def __repr__(self) -> str: 

251 return f"{self.__class__.__name__}[{self.selector!r}:not({self.subselector!r})]" 

252 

253 def canonical(self) -> str: 

254 subsel = self.subselector.canonical() 

255 if len(subsel) > 1: 

256 subsel = subsel.lstrip("*") 

257 return f"{self.selector.canonical()}:not({subsel})" 

258 

259 def specificity(self) -> tuple[int, int, int]: 

260 a1, b1, c1 = self.selector.specificity() 

261 a2, b2, c2 = self.subselector.specificity() 

262 return a1 + a2, b1 + b2, c1 + c2 

263 

264 

265class Relation: 

266 """ 

267 Represents selector:has(subselector) 

268 """ 

269 

270 def __init__(self, selector: Tree, combinator: Token, subselector: Selector): 

271 self.selector = selector 

272 self.combinator = combinator 

273 self.subselector = subselector 

274 

275 def __repr__(self) -> str: 

276 return f"{self.__class__.__name__}[{self.selector!r}:has({self.subselector!r})]" 

277 

278 def canonical(self) -> str: 

279 try: 

280 subsel = self.subselector[0].canonical() # type: ignore[index] 

281 except TypeError: 

282 subsel = self.subselector.canonical() 

283 if len(subsel) > 1: 

284 subsel = subsel.lstrip("*") 

285 return f"{self.selector.canonical()}:has({subsel})" 

286 

287 def specificity(self) -> tuple[int, int, int]: 

288 a1, b1, c1 = self.selector.specificity() 

289 try: 

290 a2, b2, c2 = self.subselector[-1].specificity() # type: ignore[index] 

291 except TypeError: 

292 a2, b2, c2 = self.subselector.specificity() 

293 return a1 + a2, b1 + b2, c1 + c2 

294 

295 

296class Matching: 

297 """ 

298 Represents selector:is(selector_list) 

299 """ 

300 

301 def __init__(self, selector: Tree, selector_list: Iterable[Tree]): 

302 self.selector = selector 

303 self.selector_list = selector_list 

304 

305 def __repr__(self) -> str: 

306 args_str = ", ".join(repr(s) for s in self.selector_list) 

307 return f"{self.__class__.__name__}[{self.selector!r}:is({args_str})]" 

308 

309 def canonical(self) -> str: 

310 selector_arguments = [] 

311 for s in self.selector_list: 

312 selarg = s.canonical() 

313 selector_arguments.append(selarg.lstrip("*")) 

314 args_str = ", ".join(str(s) for s in selector_arguments) 

315 return f"{self.selector.canonical()}:is({args_str})" 

316 

317 def specificity(self) -> tuple[int, int, int]: 

318 return max(x.specificity() for x in self.selector_list) 

319 

320 

321class SpecificityAdjustment: 

322 """ 

323 Represents selector:where(selector_list) 

324 Same as selector:is(selector_list), but its specificity is always 0 

325 """ 

326 

327 def __init__(self, selector: Tree, selector_list: list[Tree]): 

328 self.selector = selector 

329 self.selector_list = selector_list 

330 

331 def __repr__(self) -> str: 

332 args_str = ", ".join(repr(s) for s in self.selector_list) 

333 return f"{self.__class__.__name__}[{self.selector!r}:where({args_str})]" 

334 

335 def canonical(self) -> str: 

336 selector_arguments = [] 

337 for s in self.selector_list: 

338 selarg = s.canonical() 

339 selector_arguments.append(selarg.lstrip("*")) 

340 args_str = ", ".join(str(s) for s in selector_arguments) 

341 return f"{self.selector.canonical()}:where({args_str})" 

342 

343 def specificity(self) -> tuple[int, int, int]: 

344 return 0, 0, 0 

345 

346 

347class Attrib: 

348 """ 

349 Represents selector[namespace|attrib operator value] 

350 """ 

351 

352 @overload 

353 def __init__( 

354 self, 

355 selector: Tree, 

356 namespace: str | None, 

357 attrib: str, 

358 operator: Literal["exists"], 

359 value: None, 

360 ) -> None: ... 

361 

362 @overload 

363 def __init__( 

364 self, 

365 selector: Tree, 

366 namespace: str | None, 

367 attrib: str, 

368 operator: str, 

369 value: Token, 

370 ) -> None: ... 

371 

372 def __init__( 

373 self, 

374 selector: Tree, 

375 namespace: str | None, 

376 attrib: str, 

377 operator: str, 

378 value: Token | None, 

379 ) -> None: 

380 self.selector = selector 

381 self.namespace = namespace 

382 self.attrib = attrib 

383 self.operator = operator 

384 self.value = value 

385 

386 def __repr__(self) -> str: 

387 attrib = f"{self.namespace}|{self.attrib}" if self.namespace else self.attrib 

388 if self.operator == "exists": 

389 return f"{self.__class__.__name__}[{self.selector!r}[{attrib}]]" 

390 assert self.value is not None 

391 return f"{self.__class__.__name__}[{self.selector!r}[{attrib} {self.operator} {self.value.value!r}]]" 

392 

393 def canonical(self) -> str: 

394 attrib = f"{self.namespace}|{self.attrib}" if self.namespace else self.attrib 

395 

396 if self.operator == "exists": 

397 op = attrib 

398 else: 

399 assert self.value is not None 

400 op = f"{attrib}{self.operator}{self.value.css()}" 

401 

402 return f"{self.selector.canonical()}[{op}]" 

403 

404 def specificity(self) -> tuple[int, int, int]: 

405 a, b, c = self.selector.specificity() 

406 b += 1 

407 return a, b, c 

408 

409 

410class Element: 

411 """ 

412 Represents namespace|element 

413 

414 `None` is for the universal selector '*' 

415 

416 """ 

417 

418 def __init__( 

419 self, namespace: str | None = None, element: str | None = None 

420 ) -> None: 

421 self.namespace = namespace 

422 self.element = element 

423 

424 def __repr__(self) -> str: 

425 return f"{self.__class__.__name__}[{self.canonical()}]" 

426 

427 def canonical(self) -> str: 

428 element = self.element or "*" 

429 if self.namespace: 

430 element = f"{self.namespace}|{element}" 

431 return element 

432 

433 def specificity(self) -> tuple[int, int, int]: 

434 if self.element: 

435 return 0, 0, 1 

436 return 0, 0, 0 

437 

438 

439class Hash: 

440 """ 

441 Represents selector#id 

442 """ 

443 

444 def __init__(self, selector: Tree, id: str) -> None: 

445 self.selector = selector 

446 self.id = id 

447 

448 def __repr__(self) -> str: 

449 return f"{self.__class__.__name__}[{self.selector!r}#{self.id}]" 

450 

451 def canonical(self) -> str: 

452 return f"{self.selector.canonical()}#{self.id}" 

453 

454 def specificity(self) -> tuple[int, int, int]: 

455 a, b, c = self.selector.specificity() 

456 a += 1 

457 return a, b, c 

458 

459 

460class CombinedSelector: 

461 def __init__(self, selector: Tree, combinator: str, subselector: Tree) -> None: 

462 assert selector is not None 

463 self.selector = selector 

464 self.combinator = combinator 

465 self.subselector = subselector 

466 

467 def __repr__(self) -> str: 

468 comb = "<followed>" if self.combinator == " " else self.combinator 

469 return ( 

470 f"{self.__class__.__name__}[{self.selector!r} {comb} {self.subselector!r}]" 

471 ) 

472 

473 def canonical(self) -> str: 

474 subsel = self.subselector.canonical() 

475 if len(subsel) > 1: 

476 subsel = subsel.lstrip("*") 

477 return f"{self.selector.canonical()} {self.combinator} {subsel}" 

478 

479 def specificity(self) -> tuple[int, int, int]: 

480 a1, b1, c1 = self.selector.specificity() 

481 a2, b2, c2 = self.subselector.specificity() 

482 return a1 + a2, b1 + b2, c1 + c2 

483 

484 

485#### Parser 

486 

487# foo 

488_el_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$") 

489 

490# foo#bar or #bar 

491_id_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$") 

492 

493# foo.bar or .bar 

494_class_re = re.compile( 

495 r"^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$" 

496) 

497 

498 

499def parse(css: str) -> list[Selector]: 

500 """Parse a CSS *group of selectors*. 

501 

502 If you don't care about pseudo-elements or selector specificity, 

503 you can skip this and use :meth:`~GenericTranslator.css_to_xpath`. 

504 

505 :param css: 

506 A *group of selectors* as a string. 

507 :raises: 

508 :class:`SelectorSyntaxError` on invalid selectors. 

509 :returns: 

510 A list of parsed :class:`Selector` objects, one for each 

511 selector in the comma-separated group. 

512 

513 """ 

514 # Fast path for simple cases 

515 match = _el_re.match(css) 

516 if match: 

517 return [Selector(Element(element=match.group(1)))] 

518 match = _id_re.match(css) 

519 if match is not None: 

520 return [Selector(Hash(Element(element=match.group(1) or None), match.group(2)))] 

521 match = _class_re.match(css) 

522 if match is not None: 

523 return [ 

524 Selector(Class(Element(element=match.group(1) or None), match.group(2))) 

525 ] 

526 

527 stream = TokenStream(tokenize(css)) 

528 stream.source = css 

529 return list(parse_selector_group(stream)) 

530 

531 

532# except SelectorSyntaxError: 

533# e = sys.exc_info()[1] 

534# message = "%s at %s -> %r" % ( 

535# e, stream.used, stream.peek()) 

536# e.msg = message 

537# e.args = tuple([message]) 

538# raise 

539 

540 

541def parse_selector_group(stream: TokenStream) -> Iterator[Selector]: 

542 stream.skip_whitespace() 

543 while 1: 

544 yield Selector(*parse_selector(stream)) 

545 if stream.peek() == ("DELIM", ","): 

546 stream.next() 

547 stream.skip_whitespace() 

548 else: 

549 break 

550 

551 

552def parse_selector(stream: TokenStream) -> tuple[Tree, PseudoElement | None]: 

553 result, pseudo_element = parse_simple_selector(stream) 

554 while 1: 

555 stream.skip_whitespace() 

556 peek = stream.peek() 

557 if peek in (("EOF", None), ("DELIM", ",")): 

558 break 

559 if pseudo_element: 

560 raise SelectorSyntaxError( 

561 f"Got pseudo-element ::{pseudo_element} not at the end of a selector" 

562 ) 

563 if peek.is_delim("+", ">", "~"): 

564 # A combinator 

565 combinator = cast("str", stream.next().value) 

566 stream.skip_whitespace() 

567 else: 

568 # By exclusion, the last parse_simple_selector() ended 

569 # at peek == ' ' 

570 combinator = " " 

571 next_selector, pseudo_element = parse_simple_selector(stream) 

572 result = CombinedSelector(result, combinator, next_selector) 

573 return result, pseudo_element 

574 

575 

576def parse_simple_selector( 

577 stream: TokenStream, inside_negation: bool = False 

578) -> tuple[Tree, PseudoElement | None]: 

579 stream.skip_whitespace() 

580 selector_start = len(stream.used) 

581 peek = stream.peek() 

582 if peek.type == "IDENT" or peek == ("DELIM", "*"): 

583 if peek.type == "IDENT": 

584 namespace = stream.next().value 

585 else: 

586 stream.next() 

587 namespace = None 

588 if stream.peek() == ("DELIM", "|"): 

589 stream.next() 

590 element = stream.next_ident_or_star() 

591 else: 

592 element = namespace 

593 namespace = None 

594 else: 

595 element = namespace = None 

596 result: Tree = Element(namespace, element) 

597 pseudo_element: PseudoElement | None = None 

598 while 1: 

599 peek = stream.peek() 

600 if ( 

601 peek.type in ("S", "EOF") 

602 or peek.is_delim(",", "+", ">", "~") 

603 or (inside_negation and peek == ("DELIM", ")")) 

604 ): 

605 break 

606 if pseudo_element: 

607 raise SelectorSyntaxError( 

608 f"Got pseudo-element ::{pseudo_element} not at the end of a selector" 

609 ) 

610 if peek.type == "HASH": 

611 result = Hash(result, cast("str", stream.next().value)) 

612 elif peek == ("DELIM", "."): 

613 stream.next() 

614 result = Class(result, stream.next_ident()) 

615 elif peek == ("DELIM", "|"): 

616 stream.next() 

617 result = Element(None, stream.next_ident()) 

618 elif peek == ("DELIM", "["): 

619 stream.next() 

620 result = parse_attrib(result, stream) 

621 elif peek == ("DELIM", ":"): 

622 stream.next() 

623 if stream.peek() == ("DELIM", ":"): 

624 stream.next() 

625 pseudo_element = stream.next_ident() 

626 if stream.peek() == ("DELIM", "("): 

627 stream.next() 

628 pseudo_element = FunctionalPseudoElement( 

629 pseudo_element, parse_arguments(stream) 

630 ) 

631 continue 

632 ident = stream.next_ident() 

633 if ident.lower() in ("first-line", "first-letter", "before", "after"): 

634 # Special case: CSS 2.1 pseudo-elements can have a single ':' 

635 # Any new pseudo-element must have two. 

636 pseudo_element = str(ident) 

637 continue 

638 if stream.peek() != ("DELIM", "("): 

639 result = Pseudo(result, ident) 

640 if repr(result) == "Pseudo[Element[*]:scope]" and not ( 

641 len(stream.used) == 2 

642 or (len(stream.used) == 3 and stream.used[0].type == "S") 

643 or (len(stream.used) >= 3 and stream.used[-3].is_delim(",")) 

644 or ( 

645 len(stream.used) >= 4 

646 and stream.used[-3].type == "S" 

647 and stream.used[-4].is_delim(",") 

648 ) 

649 ): 

650 raise SelectorSyntaxError( 

651 'Got immediate child pseudo-element ":scope" ' 

652 "not at the start of a selector" 

653 ) 

654 continue 

655 stream.next() 

656 stream.skip_whitespace() 

657 if ident.lower() == "not": 

658 if inside_negation: 

659 raise SelectorSyntaxError("Got nested :not()") 

660 argument, argument_pseudo_element = parse_simple_selector( 

661 stream, inside_negation=True 

662 ) 

663 next = stream.next() 

664 if argument_pseudo_element: 

665 raise SelectorSyntaxError( 

666 f"Got pseudo-element ::{argument_pseudo_element} inside :not() at {next.pos}" 

667 ) 

668 if next != ("DELIM", ")"): 

669 raise SelectorSyntaxError(f"Expected ')', got {next}") 

670 result = Negation(result, argument) 

671 elif ident.lower() == "has": 

672 combinator, arguments = parse_relative_selector(stream) 

673 result = Relation(result, combinator, arguments) 

674 

675 elif ident.lower() in ("matches", "is"): 

676 selectors = parse_simple_selector_arguments(stream) 

677 result = Matching(result, selectors) 

678 elif ident.lower() == "where": 

679 selectors = parse_simple_selector_arguments(stream) 

680 result = SpecificityAdjustment(result, selectors) 

681 else: 

682 result = Function(result, ident, parse_arguments(stream)) 

683 else: 

684 raise SelectorSyntaxError(f"Expected selector, got {peek}") 

685 if len(stream.used) == selector_start: 

686 raise SelectorSyntaxError(f"Expected selector, got {stream.peek()}") 

687 return result, pseudo_element 

688 

689 

690def parse_arguments(stream: TokenStream) -> list[Token]: 

691 arguments: list[Token] = [] 

692 while 1: # noqa: RET503 

693 stream.skip_whitespace() 

694 next = stream.next() 

695 if next.type in ("IDENT", "STRING", "NUMBER") or next in [ 

696 ("DELIM", "+"), 

697 ("DELIM", "-"), 

698 ]: 

699 arguments.append(next) 

700 elif next == ("DELIM", ")"): 

701 return arguments 

702 else: 

703 raise SelectorSyntaxError(f"Expected an argument, got {next}") 

704 

705 

706def parse_relative_selector(stream: TokenStream) -> tuple[Token, Selector]: 

707 stream.skip_whitespace() 

708 subselector = "" 

709 next = stream.next() 

710 

711 if next in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: 

712 combinator = next 

713 stream.skip_whitespace() 

714 next = stream.next() 

715 else: 

716 combinator = Token("DELIM", " ", pos=0) 

717 

718 while 1: # noqa: RET503 

719 if next.type in ("IDENT", "STRING", "NUMBER") or next in [ 

720 ("DELIM", "."), 

721 ("DELIM", "*"), 

722 ]: 

723 subselector += cast("str", next.value) 

724 elif next == ("DELIM", ")"): 

725 result = parse(subselector) 

726 return combinator, result[0] 

727 else: 

728 raise SelectorSyntaxError(f"Expected an argument, got {next}") 

729 next = stream.next() 

730 

731 

732def parse_simple_selector_arguments(stream: TokenStream) -> list[Tree]: 

733 arguments = [] 

734 while 1: 

735 result, pseudo_element = parse_simple_selector(stream, True) 

736 if pseudo_element: 

737 raise SelectorSyntaxError( 

738 f"Got pseudo-element ::{pseudo_element} inside function" 

739 ) 

740 stream.skip_whitespace() 

741 next = stream.next() 

742 if next in (("EOF", None), ("DELIM", ",")): 

743 stream.next() 

744 stream.skip_whitespace() 

745 arguments.append(result) 

746 elif next == ("DELIM", ")"): 

747 arguments.append(result) 

748 break 

749 else: 

750 raise SelectorSyntaxError(f"Expected an argument, got {next}") 

751 return arguments 

752 

753 

754def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib: 

755 stream.skip_whitespace() 

756 attrib = stream.next_ident_or_star() 

757 if attrib is None and stream.peek() != ("DELIM", "|"): 

758 raise SelectorSyntaxError(f"Expected '|', got {stream.peek()}") 

759 namespace: str | None 

760 op: str | None 

761 if stream.peek() == ("DELIM", "|"): 

762 stream.next() 

763 if stream.peek() == ("DELIM", "="): 

764 namespace = None 

765 stream.next() 

766 op = "|=" 

767 else: 

768 namespace = attrib 

769 attrib = stream.next_ident() 

770 op = None 

771 else: 

772 namespace = op = None 

773 if op is None: 

774 stream.skip_whitespace() 

775 next = stream.next() 

776 if next == ("DELIM", "]"): 

777 return Attrib(selector, namespace, cast("str", attrib), "exists", None) 

778 if next == ("DELIM", "="): 

779 op = "=" 

780 elif next.is_delim("^", "$", "*", "~", "|", "!") and ( 

781 stream.peek() == ("DELIM", "=") 

782 ): 

783 op = cast("str", next.value) + "=" 

784 stream.next() 

785 else: 

786 raise SelectorSyntaxError(f"Operator expected, got {next}") 

787 stream.skip_whitespace() 

788 value = stream.next() 

789 if value.type not in ("IDENT", "STRING"): 

790 raise SelectorSyntaxError(f"Expected string or ident, got {value}") 

791 stream.skip_whitespace() 

792 next = stream.next() 

793 if next != ("DELIM", "]"): 

794 raise SelectorSyntaxError(f"Expected ']', got {next}") 

795 return Attrib(selector, namespace, cast("str", attrib), op, value) 

796 

797 

798def parse_series(tokens: Iterable[Token]) -> tuple[int, int]: 

799 """ 

800 Parses the arguments for :nth-child() and friends. 

801 

802 :raises: A list of tokens 

803 :returns: :``(a, b)`` 

804 

805 """ 

806 for token in tokens: 

807 if token.type == "STRING": 

808 raise ValueError("String tokens not allowed in series.") 

809 s = "".join(cast("str", token.value) for token in tokens).strip() 

810 if s == "odd": 

811 return 2, 1 

812 if s == "even": 

813 return 2, 0 

814 if s == "n": 

815 return 1, 0 

816 if "n" not in s: 

817 # Just b 

818 return 0, int(s) 

819 a, b = s.split("n", 1) 

820 a_as_int: int 

821 if not a: 

822 a_as_int = 1 

823 elif a in {"-", "+"}: 

824 a_as_int = int(a + "1") 

825 else: 

826 a_as_int = int(a) 

827 b_as_int = int(b) if b else 0 

828 return a_as_int, b_as_int 

829 

830 

831#### Token objects 

832 

833 

834class Token(tuple[str, Optional[str]]): # noqa: SLOT001 

835 @overload 

836 def __new__( 

837 cls, 

838 type_: Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"], 

839 value: str, 

840 pos: int, 

841 ) -> Self: ... 

842 

843 @overload 

844 def __new__(cls, type_: Literal["EOF"], value: None, pos: int) -> Self: ... 

845 

846 def __new__(cls, type_: str, value: str | None, pos: int) -> Self: 

847 obj = tuple.__new__(cls, (type_, value)) 

848 obj.pos = pos 

849 return obj 

850 

851 def __repr__(self) -> str: 

852 return f"<{self.type} '{self.value}' at {self.pos}>" 

853 

854 def is_delim(self, *values: str) -> bool: 

855 return self.type == "DELIM" and self.value in values 

856 

857 pos: int 

858 

859 @property 

860 def type(self) -> str: 

861 return self[0] 

862 

863 @property 

864 def value(self) -> str | None: 

865 return self[1] 

866 

867 def css(self) -> str: 

868 if self.type == "STRING": 

869 return repr(self.value) 

870 return cast("str", self.value) 

871 

872 

873class EOFToken(Token): 

874 def __new__(cls, pos: int) -> Self: 

875 return Token.__new__(cls, "EOF", None, pos) 

876 

877 def __repr__(self) -> str: 

878 return f"<{self.type} at {self.pos}>" 

879 

880 

881#### Tokenizer 

882 

883 

884class TokenMacros: 

885 unicode_escape = r"\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?" 

886 escape = unicode_escape + r"|\\[^\n\r\f0-9a-f]" 

887 string_escape = r"\\(?:\n|\r\n|\r|\f)|" + escape 

888 nonascii = r"[^\0-\177]" 

889 nmchar = f"[_a-z0-9-]|{escape}|{nonascii}" 

890 nmstart = f"[_a-z]|{escape}|{nonascii}" 

891 

892 

893class MatchFunc(Protocol): 

894 def __call__( 

895 self, string: str, pos: int = ..., endpos: int = ... 

896 ) -> re.Match[str] | None: ... 

897 

898 

899def _compile(pattern: str) -> MatchFunc: 

900 return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match 

901 

902 

903_match_whitespace = _compile(r"[ \t\r\n\f]+") 

904_match_number = _compile(r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)") 

905_match_hash = _compile("#(?:%(nmchar)s)+") 

906_match_ident = _compile("-?(?:%(nmstart)s)(?:%(nmchar)s)*") 

907_match_string_by_quote = { 

908 "'": _compile(r"([^\n\r\f\\']|%(string_escape)s)*"), 

909 '"': _compile(r'([^\n\r\f\\"]|%(string_escape)s)*'), 

910} 

911 

912_sub_simple_escape = re.compile(r"\\(.)").sub 

913_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.IGNORECASE).sub 

914_sub_newline_escape = re.compile(r"\\(?:\n|\r\n|\r|\f)").sub 

915 

916# Same as r'\1', but faster on CPython 

917_replace_simple = operator.methodcaller("group", 1) 

918 

919 

920def _replace_unicode(match: re.Match[str]) -> str: 

921 codepoint = int(match.group(1), 16) 

922 if codepoint > sys.maxunicode: 

923 codepoint = 0xFFFD 

924 return chr(codepoint) 

925 

926 

927def unescape_ident(value: str) -> str: 

928 value = _sub_unicode_escape(_replace_unicode, value) 

929 return _sub_simple_escape(_replace_simple, value) 

930 

931 

932def tokenize(s: str) -> Iterator[Token]: 

933 pos = 0 

934 len_s = len(s) 

935 while pos < len_s: 

936 match = _match_whitespace(s, pos=pos) 

937 if match: 

938 yield Token("S", " ", pos) 

939 pos = match.end() 

940 continue 

941 

942 match = _match_ident(s, pos=pos) 

943 if match: 

944 value = _sub_simple_escape( 

945 _replace_simple, _sub_unicode_escape(_replace_unicode, match.group()) 

946 ) 

947 yield Token("IDENT", value, pos) 

948 pos = match.end() 

949 continue 

950 

951 match = _match_hash(s, pos=pos) 

952 if match: 

953 value = _sub_simple_escape( 

954 _replace_simple, 

955 _sub_unicode_escape(_replace_unicode, match.group()[1:]), 

956 ) 

957 yield Token("HASH", value, pos) 

958 pos = match.end() 

959 continue 

960 

961 quote = s[pos] 

962 if quote in _match_string_by_quote: 

963 match = _match_string_by_quote[quote](s, pos=pos + 1) 

964 assert match, "Should have found at least an empty match" 

965 end_pos = match.end() 

966 if end_pos == len_s: 

967 raise SelectorSyntaxError(f"Unclosed string at {pos}") 

968 if s[end_pos] != quote: 

969 raise SelectorSyntaxError(f"Invalid string at {pos}") 

970 value = _sub_simple_escape( 

971 _replace_simple, 

972 _sub_unicode_escape( 

973 _replace_unicode, _sub_newline_escape("", match.group()) 

974 ), 

975 ) 

976 yield Token("STRING", value, pos) 

977 pos = end_pos + 1 

978 continue 

979 

980 match = _match_number(s, pos=pos) 

981 if match: 

982 value = match.group() 

983 yield Token("NUMBER", value, pos) 

984 pos = match.end() 

985 continue 

986 

987 pos2 = pos + 2 

988 if s[pos:pos2] == "/*": 

989 pos = s.find("*/", pos2) 

990 if pos == -1: 

991 pos = len_s 

992 else: 

993 pos += 2 

994 continue 

995 

996 yield Token("DELIM", s[pos], pos) 

997 pos += 1 

998 

999 assert pos == len_s 

1000 yield EOFToken(pos) 

1001 

1002 

1003class TokenStream: 

1004 def __init__(self, tokens: Iterable[Token], source: str | None = None) -> None: 

1005 self.used: list[Token] = [] 

1006 self.tokens = iter(tokens) 

1007 self.source = source 

1008 self.peeked: Token | None = None 

1009 self._peeking = False 

1010 self.next_token = self.tokens.__next__ 

1011 

1012 def next(self) -> Token: 

1013 if self._peeking: 

1014 self._peeking = False 

1015 assert self.peeked is not None 

1016 self.used.append(self.peeked) 

1017 return self.peeked 

1018 next = self.next_token() 

1019 self.used.append(next) 

1020 return next 

1021 

1022 def peek(self) -> Token: 

1023 if not self._peeking: 

1024 self.peeked = self.next_token() 

1025 self._peeking = True 

1026 assert self.peeked is not None 

1027 return self.peeked 

1028 

1029 def next_ident(self) -> str: 

1030 next = self.next() 

1031 if next.type != "IDENT": 

1032 raise SelectorSyntaxError(f"Expected ident, got {next}") 

1033 return cast("str", next.value) 

1034 

1035 def next_ident_or_star(self) -> str | None: 

1036 next = self.next() 

1037 if next.type == "IDENT": 

1038 return next.value 

1039 if next == ("DELIM", "*"): 

1040 return None 

1041 raise SelectorSyntaxError(f"Expected ident or '*', got {next}") 

1042 

1043 def skip_whitespace(self) -> None: 

1044 peek = self.peek() 

1045 if peek.type == "S": 

1046 self.next()