Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/cssselect/parser.py: 80%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

595 statements  

1# -*- coding: utf-8 -*- 

2""" 

3 cssselect.parser 

4 ================ 

5 

6 Tokenizer, parser and parsed objects for CSS selectors. 

7 

8 

9 :copyright: (c) 2007-2012 Ian Bicking and contributors. 

10 See AUTHORS for more details. 

11 :license: BSD, see LICENSE for more details. 

12 

13""" 

14 

15import operator 

16import re 

17import sys 

18import typing 

19from typing import Iterable, Iterator, List, Optional, Sequence, Tuple, Union 

20 

21 

22def ascii_lower(string: str) -> str: 

23 """Lower-case, but only in the ASCII range.""" 

24 return string.encode("utf8").lower().decode("utf8") 

25 

26 

27class SelectorError(Exception): 

28 """Common parent for :class:`SelectorSyntaxError` and 

29 :class:`ExpressionError`. 

30 

31 You can just use ``except SelectorError:`` when calling 

32 :meth:`~GenericTranslator.css_to_xpath` and handle both exceptions types. 

33 

34 """ 

35 

36 

37class SelectorSyntaxError(SelectorError, SyntaxError): 

38 """Parsing a selector that does not match the grammar.""" 

39 

40 

41#### Parsed objects 

42 

43Tree = Union[ 

44 "Element", 

45 "Hash", 

46 "Class", 

47 "Function", 

48 "Pseudo", 

49 "Attrib", 

50 "Negation", 

51 "Relation", 

52 "Matching", 

53 "SpecificityAdjustment", 

54 "CombinedSelector", 

55] 

56PseudoElement = Union["FunctionalPseudoElement", str] 

57 

58 

59class Selector: 

60 """ 

61 Represents a parsed selector. 

62 

63 :meth:`~GenericTranslator.selector_to_xpath` accepts this object, 

64 but ignores :attr:`pseudo_element`. It is the user’s responsibility 

65 to account for pseudo-elements and reject selectors with unknown 

66 or unsupported pseudo-elements. 

67 

68 """ 

69 

70 def __init__( 

71 self, tree: Tree, pseudo_element: Optional[PseudoElement] = None 

72 ) -> None: 

73 self.parsed_tree = tree 

74 if pseudo_element is not None and not isinstance( 

75 pseudo_element, FunctionalPseudoElement 

76 ): 

77 pseudo_element = ascii_lower(pseudo_element) 

78 #: A :class:`FunctionalPseudoElement`, 

79 #: or the identifier for the pseudo-element as a string, 

80 # or ``None``. 

81 #: 

82 #: +-------------------------+----------------+--------------------------------+ 

83 #: | | Selector | Pseudo-element | 

84 #: +=========================+================+================================+ 

85 #: | CSS3 syntax | ``a::before`` | ``'before'`` | 

86 #: +-------------------------+----------------+--------------------------------+ 

87 #: | Older syntax | ``a:before`` | ``'before'`` | 

88 #: +-------------------------+----------------+--------------------------------+ 

89 #: | From the Lists3_ draft, | ``li::marker`` | ``'marker'`` | 

90 #: | not in Selectors3 | | | 

91 #: +-------------------------+----------------+--------------------------------+ 

92 #: | Invalid pseudo-class | ``li:marker`` | ``None`` | 

93 #: +-------------------------+----------------+--------------------------------+ 

94 #: | Functional | ``a::foo(2)`` | ``FunctionalPseudoElement(…)`` | 

95 #: +-------------------------+----------------+--------------------------------+ 

96 #: 

97 #: .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement 

98 self.pseudo_element = pseudo_element 

99 

100 def __repr__(self) -> str: 

101 if isinstance(self.pseudo_element, FunctionalPseudoElement): 

102 pseudo_element = repr(self.pseudo_element) 

103 elif self.pseudo_element: 

104 pseudo_element = "::%s" % self.pseudo_element 

105 else: 

106 pseudo_element = "" 

107 return "%s[%r%s]" % (self.__class__.__name__, self.parsed_tree, pseudo_element) 

108 

109 def canonical(self) -> str: 

110 """Return a CSS representation for this selector (a string)""" 

111 if isinstance(self.pseudo_element, FunctionalPseudoElement): 

112 pseudo_element = "::%s" % self.pseudo_element.canonical() 

113 elif self.pseudo_element: 

114 pseudo_element = "::%s" % self.pseudo_element 

115 else: 

116 pseudo_element = "" 

117 res = "%s%s" % (self.parsed_tree.canonical(), pseudo_element) 

118 if len(res) > 1: 

119 res = res.lstrip("*") 

120 return res 

121 

122 def specificity(self) -> Tuple[int, int, int]: 

123 """Return the specificity_ of this selector as a tuple of 3 integers. 

124 

125 .. _specificity: http://www.w3.org/TR/selectors/#specificity 

126 

127 """ 

128 a, b, c = self.parsed_tree.specificity() 

129 if self.pseudo_element: 

130 c += 1 

131 return a, b, c 

132 

133 

134class Class: 

135 """ 

136 Represents selector.class_name 

137 """ 

138 

139 def __init__(self, selector: Tree, class_name: str) -> None: 

140 self.selector = selector 

141 self.class_name = class_name 

142 

143 def __repr__(self) -> str: 

144 return "%s[%r.%s]" % (self.__class__.__name__, self.selector, self.class_name) 

145 

146 def canonical(self) -> str: 

147 return "%s.%s" % (self.selector.canonical(), self.class_name) 

148 

149 def specificity(self) -> Tuple[int, int, int]: 

150 a, b, c = self.selector.specificity() 

151 b += 1 

152 return a, b, c 

153 

154 

155class FunctionalPseudoElement: 

156 """ 

157 Represents selector::name(arguments) 

158 

159 .. attribute:: name 

160 

161 The name (identifier) of the pseudo-element, as a string. 

162 

163 .. attribute:: arguments 

164 

165 The arguments of the pseudo-element, as a list of tokens. 

166 

167 **Note:** tokens are not part of the public API, 

168 and may change between cssselect versions. 

169 Use at your own risks. 

170 

171 """ 

172 

173 def __init__(self, name: str, arguments: Sequence["Token"]): 

174 self.name = ascii_lower(name) 

175 self.arguments = arguments 

176 

177 def __repr__(self) -> str: 

178 return "%s[::%s(%r)]" % ( 

179 self.__class__.__name__, 

180 self.name, 

181 [token.value for token in self.arguments], 

182 ) 

183 

184 def argument_types(self) -> List[str]: 

185 return [token.type for token in self.arguments] 

186 

187 def canonical(self) -> str: 

188 args = "".join(token.css() for token in self.arguments) 

189 return "%s(%s)" % (self.name, args) 

190 

191 

192class Function: 

193 """ 

194 Represents selector:name(expr) 

195 """ 

196 

197 def __init__(self, selector: Tree, name: str, arguments: Sequence["Token"]) -> None: 

198 self.selector = selector 

199 self.name = ascii_lower(name) 

200 self.arguments = arguments 

201 

202 def __repr__(self) -> str: 

203 return "%s[%r:%s(%r)]" % ( 

204 self.__class__.__name__, 

205 self.selector, 

206 self.name, 

207 [token.value for token in self.arguments], 

208 ) 

209 

210 def argument_types(self) -> List[str]: 

211 return [token.type for token in self.arguments] 

212 

213 def canonical(self) -> str: 

214 args = "".join(token.css() for token in self.arguments) 

215 return "%s:%s(%s)" % (self.selector.canonical(), self.name, args) 

216 

217 def specificity(self) -> Tuple[int, int, int]: 

218 a, b, c = self.selector.specificity() 

219 b += 1 

220 return a, b, c 

221 

222 

223class Pseudo: 

224 """ 

225 Represents selector:ident 

226 """ 

227 

228 def __init__(self, selector: Tree, ident: str) -> None: 

229 self.selector = selector 

230 self.ident = ascii_lower(ident) 

231 

232 def __repr__(self) -> str: 

233 return "%s[%r:%s]" % (self.__class__.__name__, self.selector, self.ident) 

234 

235 def canonical(self) -> str: 

236 return "%s:%s" % (self.selector.canonical(), self.ident) 

237 

238 def specificity(self) -> Tuple[int, int, int]: 

239 a, b, c = self.selector.specificity() 

240 b += 1 

241 return a, b, c 

242 

243 

244class Negation: 

245 """ 

246 Represents selector:not(subselector) 

247 """ 

248 

249 def __init__(self, selector: Tree, subselector: Tree) -> None: 

250 self.selector = selector 

251 self.subselector = subselector 

252 

253 def __repr__(self) -> str: 

254 return "%s[%r:not(%r)]" % ( 

255 self.__class__.__name__, 

256 self.selector, 

257 self.subselector, 

258 ) 

259 

260 def canonical(self) -> str: 

261 subsel = self.subselector.canonical() 

262 if len(subsel) > 1: 

263 subsel = subsel.lstrip("*") 

264 return "%s:not(%s)" % (self.selector.canonical(), subsel) 

265 

266 def specificity(self) -> Tuple[int, int, int]: 

267 a1, b1, c1 = self.selector.specificity() 

268 a2, b2, c2 = self.subselector.specificity() 

269 return a1 + a2, b1 + b2, c1 + c2 

270 

271 

272class Relation: 

273 """ 

274 Represents selector:has(subselector) 

275 """ 

276 

277 def __init__(self, selector: Tree, combinator: "Token", subselector: Selector): 

278 self.selector = selector 

279 self.combinator = combinator 

280 self.subselector = subselector 

281 

282 def __repr__(self) -> str: 

283 return "%s[%r:has(%r)]" % ( 

284 self.__class__.__name__, 

285 self.selector, 

286 self.subselector, 

287 ) 

288 

289 def canonical(self) -> str: 

290 try: 

291 subsel = self.subselector[0].canonical() # type: ignore 

292 except TypeError: 

293 subsel = self.subselector.canonical() 

294 if len(subsel) > 1: 

295 subsel = subsel.lstrip("*") 

296 return "%s:has(%s)" % (self.selector.canonical(), subsel) 

297 

298 def specificity(self) -> Tuple[int, int, int]: 

299 a1, b1, c1 = self.selector.specificity() 

300 try: 

301 a2, b2, c2 = self.subselector[-1].specificity() # type: ignore 

302 except TypeError: 

303 a2, b2, c2 = self.subselector.specificity() 

304 return a1 + a2, b1 + b2, c1 + c2 

305 

306 

307class Matching: 

308 """ 

309 Represents selector:is(selector_list) 

310 """ 

311 

312 def __init__(self, selector: Tree, selector_list: Iterable[Tree]): 

313 self.selector = selector 

314 self.selector_list = selector_list 

315 

316 def __repr__(self) -> str: 

317 return "%s[%r:is(%s)]" % ( 

318 self.__class__.__name__, 

319 self.selector, 

320 ", ".join(map(repr, self.selector_list)), 

321 ) 

322 

323 def canonical(self) -> str: 

324 selector_arguments = [] 

325 for s in self.selector_list: 

326 selarg = s.canonical() 

327 selector_arguments.append(selarg.lstrip("*")) 

328 return "%s:is(%s)" % ( 

329 self.selector.canonical(), 

330 ", ".join(map(str, selector_arguments)), 

331 ) 

332 

333 def specificity(self) -> Tuple[int, int, int]: 

334 return max(x.specificity() for x in self.selector_list) 

335 

336 

337class SpecificityAdjustment: 

338 """ 

339 Represents selector:where(selector_list) 

340 Same as selector:is(selector_list), but its specificity is always 0 

341 """ 

342 

343 def __init__(self, selector: Tree, selector_list: List[Tree]): 

344 self.selector = selector 

345 self.selector_list = selector_list 

346 

347 def __repr__(self) -> str: 

348 return "%s[%r:where(%s)]" % ( 

349 self.__class__.__name__, 

350 self.selector, 

351 ", ".join(map(repr, self.selector_list)), 

352 ) 

353 

354 def canonical(self) -> str: 

355 selector_arguments = [] 

356 for s in self.selector_list: 

357 selarg = s.canonical() 

358 selector_arguments.append(selarg.lstrip("*")) 

359 return "%s:where(%s)" % ( 

360 self.selector.canonical(), 

361 ", ".join(map(str, selector_arguments)), 

362 ) 

363 

364 def specificity(self) -> Tuple[int, int, int]: 

365 return 0, 0, 0 

366 

367 

368class Attrib: 

369 """ 

370 Represents selector[namespace|attrib operator value] 

371 """ 

372 

373 @typing.overload 

374 def __init__( 

375 self, 

376 selector: Tree, 

377 namespace: Optional[str], 

378 attrib: str, 

379 operator: 'typing.Literal["exists"]', 

380 value: None, 

381 ) -> None: ... 

382 

383 @typing.overload 

384 def __init__( 

385 self, 

386 selector: Tree, 

387 namespace: Optional[str], 

388 attrib: str, 

389 operator: str, 

390 value: "Token", 

391 ) -> None: ... 

392 

393 def __init__( 

394 self, 

395 selector: Tree, 

396 namespace: Optional[str], 

397 attrib: str, 

398 operator: str, 

399 value: Optional["Token"], 

400 ) -> None: 

401 self.selector = selector 

402 self.namespace = namespace 

403 self.attrib = attrib 

404 self.operator = operator 

405 self.value = value 

406 

407 def __repr__(self) -> str: 

408 if self.namespace: 

409 attrib = "%s|%s" % (self.namespace, self.attrib) 

410 else: 

411 attrib = self.attrib 

412 if self.operator == "exists": 

413 return "%s[%r[%s]]" % (self.__class__.__name__, self.selector, attrib) 

414 else: 

415 return "%s[%r[%s %s %r]]" % ( 

416 self.__class__.__name__, 

417 self.selector, 

418 attrib, 

419 self.operator, 

420 typing.cast("Token", self.value).value, 

421 ) 

422 

423 def canonical(self) -> str: 

424 if self.namespace: 

425 attrib = "%s|%s" % (self.namespace, self.attrib) 

426 else: 

427 attrib = self.attrib 

428 

429 if self.operator == "exists": 

430 op = attrib 

431 else: 

432 op = "%s%s%s" % ( 

433 attrib, 

434 self.operator, 

435 typing.cast("Token", self.value).css(), 

436 ) 

437 

438 return "%s[%s]" % (self.selector.canonical(), op) 

439 

440 def specificity(self) -> Tuple[int, int, int]: 

441 a, b, c = self.selector.specificity() 

442 b += 1 

443 return a, b, c 

444 

445 

446class Element: 

447 """ 

448 Represents namespace|element 

449 

450 `None` is for the universal selector '*' 

451 

452 """ 

453 

454 def __init__( 

455 self, namespace: Optional[str] = None, element: Optional[str] = None 

456 ) -> None: 

457 self.namespace = namespace 

458 self.element = element 

459 

460 def __repr__(self) -> str: 

461 return "%s[%s]" % (self.__class__.__name__, self.canonical()) 

462 

463 def canonical(self) -> str: 

464 element = self.element or "*" 

465 if self.namespace: 

466 element = "%s|%s" % (self.namespace, element) 

467 return element 

468 

469 def specificity(self) -> Tuple[int, int, int]: 

470 if self.element: 

471 return 0, 0, 1 

472 else: 

473 return 0, 0, 0 

474 

475 

476class Hash: 

477 """ 

478 Represents selector#id 

479 """ 

480 

481 def __init__(self, selector: Tree, id: str) -> None: 

482 self.selector = selector 

483 self.id = id 

484 

485 def __repr__(self) -> str: 

486 return "%s[%r#%s]" % (self.__class__.__name__, self.selector, self.id) 

487 

488 def canonical(self) -> str: 

489 return "%s#%s" % (self.selector.canonical(), self.id) 

490 

491 def specificity(self) -> Tuple[int, int, int]: 

492 a, b, c = self.selector.specificity() 

493 a += 1 

494 return a, b, c 

495 

496 

497class CombinedSelector: 

498 def __init__(self, selector: Tree, combinator: str, subselector: Tree) -> None: 

499 assert selector is not None 

500 self.selector = selector 

501 self.combinator = combinator 

502 self.subselector = subselector 

503 

504 def __repr__(self) -> str: 

505 if self.combinator == " ": 

506 comb = "<followed>" 

507 else: 

508 comb = self.combinator 

509 return "%s[%r %s %r]" % ( 

510 self.__class__.__name__, 

511 self.selector, 

512 comb, 

513 self.subselector, 

514 ) 

515 

516 def canonical(self) -> str: 

517 subsel = self.subselector.canonical() 

518 if len(subsel) > 1: 

519 subsel = subsel.lstrip("*") 

520 return "%s %s %s" % (self.selector.canonical(), self.combinator, subsel) 

521 

522 def specificity(self) -> Tuple[int, int, int]: 

523 a1, b1, c1 = self.selector.specificity() 

524 a2, b2, c2 = self.subselector.specificity() 

525 return a1 + a2, b1 + b2, c1 + c2 

526 

527 

528#### Parser 

529 

530# foo 

531_el_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$") 

532 

533# foo#bar or #bar 

534_id_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$") 

535 

536# foo.bar or .bar 

537_class_re = re.compile( 

538 r"^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$" 

539) 

540 

541 

542def parse(css: str) -> List[Selector]: 

543 """Parse a CSS *group of selectors*. 

544 

545 If you don't care about pseudo-elements or selector specificity, 

546 you can skip this and use :meth:`~GenericTranslator.css_to_xpath`. 

547 

548 :param css: 

549 A *group of selectors* as a string. 

550 :raises: 

551 :class:`SelectorSyntaxError` on invalid selectors. 

552 :returns: 

553 A list of parsed :class:`Selector` objects, one for each 

554 selector in the comma-separated group. 

555 

556 """ 

557 # Fast path for simple cases 

558 match = _el_re.match(css) 

559 if match: 

560 return [Selector(Element(element=match.group(1)))] 

561 match = _id_re.match(css) 

562 if match is not None: 

563 return [Selector(Hash(Element(element=match.group(1) or None), match.group(2)))] 

564 match = _class_re.match(css) 

565 if match is not None: 

566 return [ 

567 Selector(Class(Element(element=match.group(1) or None), match.group(2))) 

568 ] 

569 

570 stream = TokenStream(tokenize(css)) 

571 stream.source = css 

572 return list(parse_selector_group(stream)) 

573 

574 

575# except SelectorSyntaxError: 

576# e = sys.exc_info()[1] 

577# message = "%s at %s -> %r" % ( 

578# e, stream.used, stream.peek()) 

579# e.msg = message 

580# e.args = tuple([message]) 

581# raise 

582 

583 

584def parse_selector_group(stream: "TokenStream") -> Iterator[Selector]: 

585 stream.skip_whitespace() 

586 while 1: 

587 yield Selector(*parse_selector(stream)) 

588 if stream.peek() == ("DELIM", ","): 

589 stream.next() 

590 stream.skip_whitespace() 

591 else: 

592 break 

593 

594 

595def parse_selector(stream: "TokenStream") -> Tuple[Tree, Optional[PseudoElement]]: 

596 result, pseudo_element = parse_simple_selector(stream) 

597 while 1: 

598 stream.skip_whitespace() 

599 peek = stream.peek() 

600 if peek in (("EOF", None), ("DELIM", ",")): 

601 break 

602 if pseudo_element: 

603 raise SelectorSyntaxError( 

604 "Got pseudo-element ::%s not at the end of a selector" % pseudo_element 

605 ) 

606 if peek.is_delim("+", ">", "~"): 

607 # A combinator 

608 combinator = typing.cast(str, stream.next().value) 

609 stream.skip_whitespace() 

610 else: 

611 # By exclusion, the last parse_simple_selector() ended 

612 # at peek == ' ' 

613 combinator = " " 

614 next_selector, pseudo_element = parse_simple_selector(stream) 

615 result = CombinedSelector(result, combinator, next_selector) 

616 return result, pseudo_element 

617 

618 

619def parse_simple_selector( 

620 stream: "TokenStream", inside_negation: bool = False 

621) -> Tuple[Tree, Optional[PseudoElement]]: 

622 stream.skip_whitespace() 

623 selector_start = len(stream.used) 

624 peek = stream.peek() 

625 if peek.type == "IDENT" or peek == ("DELIM", "*"): 

626 if peek.type == "IDENT": 

627 namespace = stream.next().value 

628 else: 

629 stream.next() 

630 namespace = None 

631 if stream.peek() == ("DELIM", "|"): 

632 stream.next() 

633 element = stream.next_ident_or_star() 

634 else: 

635 element = namespace 

636 namespace = None 

637 else: 

638 element = namespace = None 

639 result: Tree = Element(namespace, element) 

640 pseudo_element: Optional[PseudoElement] = None 

641 while 1: 

642 peek = stream.peek() 

643 if ( 

644 peek.type in ("S", "EOF") 

645 or peek.is_delim(",", "+", ">", "~") 

646 or (inside_negation and peek == ("DELIM", ")")) 

647 ): 

648 break 

649 if pseudo_element: 

650 raise SelectorSyntaxError( 

651 "Got pseudo-element ::%s not at the end of a selector" % pseudo_element 

652 ) 

653 if peek.type == "HASH": 

654 result = Hash(result, typing.cast(str, stream.next().value)) 

655 elif peek == ("DELIM", "."): 

656 stream.next() 

657 result = Class(result, stream.next_ident()) 

658 elif peek == ("DELIM", "|"): 

659 stream.next() 

660 result = Element(None, stream.next_ident()) 

661 elif peek == ("DELIM", "["): 

662 stream.next() 

663 result = parse_attrib(result, stream) 

664 elif peek == ("DELIM", ":"): 

665 stream.next() 

666 if stream.peek() == ("DELIM", ":"): 

667 stream.next() 

668 pseudo_element = stream.next_ident() 

669 if stream.peek() == ("DELIM", "("): 

670 stream.next() 

671 pseudo_element = FunctionalPseudoElement( 

672 pseudo_element, parse_arguments(stream) 

673 ) 

674 continue 

675 ident = stream.next_ident() 

676 if ident.lower() in ("first-line", "first-letter", "before", "after"): 

677 # Special case: CSS 2.1 pseudo-elements can have a single ':' 

678 # Any new pseudo-element must have two. 

679 pseudo_element = str(ident) 

680 continue 

681 if stream.peek() != ("DELIM", "("): 

682 result = Pseudo(result, ident) 

683 if repr(result) == "Pseudo[Element[*]:scope]": 

684 if not ( 

685 len(stream.used) == 2 

686 or (len(stream.used) == 3 and stream.used[0].type == "S") 

687 or (len(stream.used) >= 3 and stream.used[-3].is_delim(",")) 

688 or ( 

689 len(stream.used) >= 4 

690 and stream.used[-3].type == "S" 

691 and stream.used[-4].is_delim(",") 

692 ) 

693 ): 

694 raise SelectorSyntaxError( 

695 'Got immediate child pseudo-element ":scope" ' 

696 "not at the start of a selector" 

697 ) 

698 continue 

699 stream.next() 

700 stream.skip_whitespace() 

701 if ident.lower() == "not": 

702 if inside_negation: 

703 raise SelectorSyntaxError("Got nested :not()") 

704 argument, argument_pseudo_element = parse_simple_selector( 

705 stream, inside_negation=True 

706 ) 

707 next = stream.next() 

708 if argument_pseudo_element: 

709 raise SelectorSyntaxError( 

710 "Got pseudo-element ::%s inside :not() at %s" 

711 % (argument_pseudo_element, next.pos) 

712 ) 

713 if next != ("DELIM", ")"): 

714 raise SelectorSyntaxError("Expected ')', got %s" % (next,)) 

715 result = Negation(result, argument) 

716 elif ident.lower() == "has": 

717 combinator, arguments = parse_relative_selector(stream) 

718 result = Relation(result, combinator, arguments) 

719 

720 elif ident.lower() in ("matches", "is"): 

721 selectors = parse_simple_selector_arguments(stream) 

722 result = Matching(result, selectors) 

723 elif ident.lower() == "where": 

724 selectors = parse_simple_selector_arguments(stream) 

725 result = SpecificityAdjustment(result, selectors) 

726 else: 

727 result = Function(result, ident, parse_arguments(stream)) 

728 else: 

729 raise SelectorSyntaxError("Expected selector, got %s" % (peek,)) 

730 if len(stream.used) == selector_start: 

731 raise SelectorSyntaxError("Expected selector, got %s" % (stream.peek(),)) 

732 return result, pseudo_element 

733 

734 

735def parse_arguments(stream: "TokenStream") -> List["Token"]: 

736 arguments: List["Token"] = [] 

737 while 1: 

738 stream.skip_whitespace() 

739 next = stream.next() 

740 if next.type in ("IDENT", "STRING", "NUMBER") or next in [ 

741 ("DELIM", "+"), 

742 ("DELIM", "-"), 

743 ]: 

744 arguments.append(next) 

745 elif next == ("DELIM", ")"): 

746 return arguments 

747 else: 

748 raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) 

749 

750 

751def parse_relative_selector(stream: "TokenStream") -> Tuple["Token", Selector]: 

752 stream.skip_whitespace() 

753 subselector = "" 

754 next = stream.next() 

755 

756 if next in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: 

757 combinator = next 

758 stream.skip_whitespace() 

759 next = stream.next() 

760 else: 

761 combinator = Token("DELIM", " ", pos=0) 

762 

763 while 1: 

764 if next.type in ("IDENT", "STRING", "NUMBER") or next in [ 

765 ("DELIM", "."), 

766 ("DELIM", "*"), 

767 ]: 

768 subselector += typing.cast(str, next.value) 

769 elif next == ("DELIM", ")"): 

770 result = parse(subselector) 

771 return combinator, result[0] 

772 else: 

773 raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) 

774 next = stream.next() 

775 

776 

777def parse_simple_selector_arguments(stream: "TokenStream") -> List[Tree]: 

778 arguments = [] 

779 while 1: 

780 result, pseudo_element = parse_simple_selector(stream, True) 

781 if pseudo_element: 

782 raise SelectorSyntaxError( 

783 "Got pseudo-element ::%s inside function" % (pseudo_element,) 

784 ) 

785 stream.skip_whitespace() 

786 next = stream.next() 

787 if next in (("EOF", None), ("DELIM", ",")): 

788 stream.next() 

789 stream.skip_whitespace() 

790 arguments.append(result) 

791 elif next == ("DELIM", ")"): 

792 arguments.append(result) 

793 break 

794 else: 

795 raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) 

796 return arguments 

797 

798 

799def parse_attrib(selector: Tree, stream: "TokenStream") -> Attrib: 

800 stream.skip_whitespace() 

801 attrib = stream.next_ident_or_star() 

802 if attrib is None and stream.peek() != ("DELIM", "|"): 

803 raise SelectorSyntaxError("Expected '|', got %s" % (stream.peek(),)) 

804 namespace: Optional[str] 

805 op: Optional[str] 

806 if stream.peek() == ("DELIM", "|"): 

807 stream.next() 

808 if stream.peek() == ("DELIM", "="): 

809 namespace = None 

810 stream.next() 

811 op = "|=" 

812 else: 

813 namespace = attrib 

814 attrib = stream.next_ident() 

815 op = None 

816 else: 

817 namespace = op = None 

818 if op is None: 

819 stream.skip_whitespace() 

820 next = stream.next() 

821 if next == ("DELIM", "]"): 

822 return Attrib(selector, namespace, typing.cast(str, attrib), "exists", None) 

823 elif next == ("DELIM", "="): 

824 op = "=" 

825 elif next.is_delim("^", "$", "*", "~", "|", "!") and ( 

826 stream.peek() == ("DELIM", "=") 

827 ): 

828 op = typing.cast(str, next.value) + "=" 

829 stream.next() 

830 else: 

831 raise SelectorSyntaxError("Operator expected, got %s" % (next,)) 

832 stream.skip_whitespace() 

833 value = stream.next() 

834 if value.type not in ("IDENT", "STRING"): 

835 raise SelectorSyntaxError("Expected string or ident, got %s" % (value,)) 

836 stream.skip_whitespace() 

837 next = stream.next() 

838 if next != ("DELIM", "]"): 

839 raise SelectorSyntaxError("Expected ']', got %s" % (next,)) 

840 return Attrib(selector, namespace, typing.cast(str, attrib), op, value) 

841 

842 

843def parse_series(tokens: Iterable["Token"]) -> Tuple[int, int]: 

844 """ 

845 Parses the arguments for :nth-child() and friends. 

846 

847 :raises: A list of tokens 

848 :returns: :``(a, b)`` 

849 

850 """ 

851 for token in tokens: 

852 if token.type == "STRING": 

853 raise ValueError("String tokens not allowed in series.") 

854 s = "".join(typing.cast(str, token.value) for token in tokens).strip() 

855 if s == "odd": 

856 return 2, 1 

857 elif s == "even": 

858 return 2, 0 

859 elif s == "n": 

860 return 1, 0 

861 if "n" not in s: 

862 # Just b 

863 return 0, int(s) 

864 a, b = s.split("n", 1) 

865 a_as_int: int 

866 if not a: 

867 a_as_int = 1 

868 elif a == "-" or a == "+": 

869 a_as_int = int(a + "1") 

870 else: 

871 a_as_int = int(a) 

872 b_as_int: int 

873 if not b: 

874 b_as_int = 0 

875 else: 

876 b_as_int = int(b) 

877 return a_as_int, b_as_int 

878 

879 

880#### Token objects 

881 

882 

883class Token(Tuple[str, Optional[str]]): 

884 @typing.overload 

885 def __new__( 

886 cls, 

887 type_: 'typing.Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"]', 

888 value: str, 

889 pos: int, 

890 ) -> "Token": ... 

891 

892 @typing.overload 

893 def __new__( 

894 cls, type_: 'typing.Literal["EOF"]', value: None, pos: int 

895 ) -> "Token": ... 

896 

897 def __new__(cls, type_: str, value: Optional[str], pos: int) -> "Token": 

898 obj = tuple.__new__(cls, (type_, value)) 

899 obj.pos = pos 

900 return obj 

901 

902 def __repr__(self) -> str: 

903 return "<%s '%s' at %i>" % (self.type, self.value, self.pos) 

904 

905 def is_delim(self, *values: str) -> bool: 

906 return self.type == "DELIM" and self.value in values 

907 

908 pos: int 

909 

910 @property 

911 def type(self) -> str: 

912 return self[0] 

913 

914 @property 

915 def value(self) -> Optional[str]: 

916 return self[1] 

917 

918 def css(self) -> str: 

919 if self.type == "STRING": 

920 return repr(self.value) 

921 else: 

922 return typing.cast(str, self.value) 

923 

924 

925class EOFToken(Token): 

926 def __new__(cls, pos: int) -> "EOFToken": 

927 return typing.cast("EOFToken", Token.__new__(cls, "EOF", None, pos)) 

928 

929 def __repr__(self) -> str: 

930 return "<%s at %i>" % (self.type, self.pos) 

931 

932 

933#### Tokenizer 

934 

935 

936class TokenMacros: 

937 unicode_escape = r"\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?" 

938 escape = unicode_escape + r"|\\[^\n\r\f0-9a-f]" 

939 string_escape = r"\\(?:\n|\r\n|\r|\f)|" + escape 

940 nonascii = r"[^\0-\177]" 

941 nmchar = "[_a-z0-9-]|%s|%s" % (escape, nonascii) 

942 nmstart = "[_a-z]|%s|%s" % (escape, nonascii) 

943 

944 

945if typing.TYPE_CHECKING: 

946 

947 class MatchFunc(typing.Protocol): 

948 def __call__( 

949 self, string: str, pos: int = ..., endpos: int = ... 

950 ) -> Optional["re.Match[str]"]: ... 

951 

952 

953def _compile(pattern: str) -> "MatchFunc": 

954 return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match 

955 

956 

957_match_whitespace = _compile(r"[ \t\r\n\f]+") 

958_match_number = _compile(r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)") 

959_match_hash = _compile("#(?:%(nmchar)s)+") 

960_match_ident = _compile("-?(?:%(nmstart)s)(?:%(nmchar)s)*") 

961_match_string_by_quote = { 

962 "'": _compile(r"([^\n\r\f\\']|%(string_escape)s)*"), 

963 '"': _compile(r'([^\n\r\f\\"]|%(string_escape)s)*'), 

964} 

965 

966_sub_simple_escape = re.compile(r"\\(.)").sub 

967_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.I).sub 

968_sub_newline_escape = re.compile(r"\\(?:\n|\r\n|\r|\f)").sub 

969 

970# Same as r'\1', but faster on CPython 

971_replace_simple = operator.methodcaller("group", 1) 

972 

973 

974def _replace_unicode(match: "re.Match[str]") -> str: 

975 codepoint = int(match.group(1), 16) 

976 if codepoint > sys.maxunicode: 

977 codepoint = 0xFFFD 

978 return chr(codepoint) 

979 

980 

981def unescape_ident(value: str) -> str: 

982 value = _sub_unicode_escape(_replace_unicode, value) 

983 value = _sub_simple_escape(_replace_simple, value) 

984 return value 

985 

986 

987def tokenize(s: str) -> Iterator[Token]: 

988 pos = 0 

989 len_s = len(s) 

990 while pos < len_s: 

991 match = _match_whitespace(s, pos=pos) 

992 if match: 

993 yield Token("S", " ", pos) 

994 pos = match.end() 

995 continue 

996 

997 match = _match_ident(s, pos=pos) 

998 if match: 

999 value = _sub_simple_escape( 

1000 _replace_simple, _sub_unicode_escape(_replace_unicode, match.group()) 

1001 ) 

1002 yield Token("IDENT", value, pos) 

1003 pos = match.end() 

1004 continue 

1005 

1006 match = _match_hash(s, pos=pos) 

1007 if match: 

1008 value = _sub_simple_escape( 

1009 _replace_simple, 

1010 _sub_unicode_escape(_replace_unicode, match.group()[1:]), 

1011 ) 

1012 yield Token("HASH", value, pos) 

1013 pos = match.end() 

1014 continue 

1015 

1016 quote = s[pos] 

1017 if quote in _match_string_by_quote: 

1018 match = _match_string_by_quote[quote](s, pos=pos + 1) 

1019 assert match, "Should have found at least an empty match" 

1020 end_pos = match.end() 

1021 if end_pos == len_s: 

1022 raise SelectorSyntaxError("Unclosed string at %s" % pos) 

1023 if s[end_pos] != quote: 

1024 raise SelectorSyntaxError("Invalid string at %s" % pos) 

1025 value = _sub_simple_escape( 

1026 _replace_simple, 

1027 _sub_unicode_escape( 

1028 _replace_unicode, _sub_newline_escape("", match.group()) 

1029 ), 

1030 ) 

1031 yield Token("STRING", value, pos) 

1032 pos = end_pos + 1 

1033 continue 

1034 

1035 match = _match_number(s, pos=pos) 

1036 if match: 

1037 value = match.group() 

1038 yield Token("NUMBER", value, pos) 

1039 pos = match.end() 

1040 continue 

1041 

1042 pos2 = pos + 2 

1043 if s[pos:pos2] == "/*": 

1044 pos = s.find("*/", pos2) 

1045 if pos == -1: 

1046 pos = len_s 

1047 else: 

1048 pos += 2 

1049 continue 

1050 

1051 yield Token("DELIM", s[pos], pos) 

1052 pos += 1 

1053 

1054 assert pos == len_s 

1055 yield EOFToken(pos) 

1056 

1057 

1058class TokenStream: 

1059 def __init__(self, tokens: Iterable[Token], source: Optional[str] = None) -> None: 

1060 self.used: List[Token] = [] 

1061 self.tokens = iter(tokens) 

1062 self.source = source 

1063 self.peeked: Optional[Token] = None 

1064 self._peeking = False 

1065 self.next_token = self.tokens.__next__ 

1066 

1067 def next(self) -> Token: 

1068 if self._peeking: 

1069 self._peeking = False 

1070 self.used.append(typing.cast(Token, self.peeked)) 

1071 return typing.cast(Token, self.peeked) 

1072 else: 

1073 next = self.next_token() 

1074 self.used.append(next) 

1075 return next 

1076 

1077 def peek(self) -> Token: 

1078 if not self._peeking: 

1079 self.peeked = self.next_token() 

1080 self._peeking = True 

1081 return typing.cast(Token, self.peeked) 

1082 

1083 def next_ident(self) -> str: 

1084 next = self.next() 

1085 if next.type != "IDENT": 

1086 raise SelectorSyntaxError("Expected ident, got %s" % (next,)) 

1087 return typing.cast(str, next.value) 

1088 

1089 def next_ident_or_star(self) -> Optional[str]: 

1090 next = self.next() 

1091 if next.type == "IDENT": 

1092 return next.value 

1093 elif next == ("DELIM", "*"): 

1094 return None 

1095 else: 

1096 raise SelectorSyntaxError("Expected ident or '*', got %s" % (next,)) 

1097 

1098 def skip_whitespace(self) -> None: 

1099 peek = self.peek() 

1100 if peek.type == "S": 

1101 self.next()