Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/cssselect/parser.py: 80%

600 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-09 06:19 +0000

1# -*- coding: utf-8 -*- 

2""" 

3 cssselect.parser 

4 ================ 

5 

6 Tokenizer, parser and parsed objects for CSS selectors. 

7 

8 

9 :copyright: (c) 2007-2012 Ian Bicking and contributors. 

10 See AUTHORS for more details. 

11 :license: BSD, see LICENSE for more details. 

12 

13""" 

14 

15import sys 

16import re 

17import operator 

18import typing 

19from typing import Iterable, Iterator, List, Optional, Sequence, Tuple, Union 

20 

21 

22def ascii_lower(string: str) -> str: 

23 """Lower-case, but only in the ASCII range.""" 

24 return string.encode("utf8").lower().decode("utf8") 

25 

26 

27class SelectorError(Exception): 

28 """Common parent for :class:`SelectorSyntaxError` and 

29 :class:`ExpressionError`. 

30 

31 You can just use ``except SelectorError:`` when calling 

32 :meth:`~GenericTranslator.css_to_xpath` and handle both exceptions types. 

33 

34 """ 

35 

36 

37class SelectorSyntaxError(SelectorError, SyntaxError): 

38 """Parsing a selector that does not match the grammar.""" 

39 

40 

41#### Parsed objects 

42 

43Tree = Union[ 

44 "Element", 

45 "Hash", 

46 "Class", 

47 "Function", 

48 "Pseudo", 

49 "Attrib", 

50 "Negation", 

51 "Relation", 

52 "Matching", 

53 "SpecificityAdjustment", 

54 "CombinedSelector", 

55] 

56PseudoElement = Union["FunctionalPseudoElement", str] 

57 

58 

59class Selector: 

60 """ 

61 Represents a parsed selector. 

62 

63 :meth:`~GenericTranslator.selector_to_xpath` accepts this object, 

64 but ignores :attr:`pseudo_element`. It is the user’s responsibility 

65 to account for pseudo-elements and reject selectors with unknown 

66 or unsupported pseudo-elements. 

67 

68 """ 

69 

70 def __init__(self, tree: Tree, pseudo_element: Optional[PseudoElement] = None) -> None: 

71 self.parsed_tree = tree 

72 if pseudo_element is not None and not isinstance(pseudo_element, FunctionalPseudoElement): 

73 pseudo_element = ascii_lower(pseudo_element) 

74 #: A :class:`FunctionalPseudoElement`, 

75 #: or the identifier for the pseudo-element as a string, 

76 # or ``None``. 

77 #: 

78 #: +-------------------------+----------------+--------------------------------+ 

79 #: | | Selector | Pseudo-element | 

80 #: +=========================+================+================================+ 

81 #: | CSS3 syntax | ``a::before`` | ``'before'`` | 

82 #: +-------------------------+----------------+--------------------------------+ 

83 #: | Older syntax | ``a:before`` | ``'before'`` | 

84 #: +-------------------------+----------------+--------------------------------+ 

85 #: | From the Lists3_ draft, | ``li::marker`` | ``'marker'`` | 

86 #: | not in Selectors3 | | | 

87 #: +-------------------------+----------------+--------------------------------+ 

88 #: | Invalid pseudo-class | ``li:marker`` | ``None`` | 

89 #: +-------------------------+----------------+--------------------------------+ 

90 #: | Functional | ``a::foo(2)`` | ``FunctionalPseudoElement(…)`` | 

91 #: +-------------------------+----------------+--------------------------------+ 

92 #: 

93 #: .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement 

94 self.pseudo_element = pseudo_element 

95 

96 def __repr__(self) -> str: 

97 if isinstance(self.pseudo_element, FunctionalPseudoElement): 

98 pseudo_element = repr(self.pseudo_element) 

99 elif self.pseudo_element: 

100 pseudo_element = "::%s" % self.pseudo_element 

101 else: 

102 pseudo_element = "" 

103 return "%s[%r%s]" % (self.__class__.__name__, self.parsed_tree, pseudo_element) 

104 

105 def canonical(self) -> str: 

106 """Return a CSS representation for this selector (a string)""" 

107 if isinstance(self.pseudo_element, FunctionalPseudoElement): 

108 pseudo_element = "::%s" % self.pseudo_element.canonical() 

109 elif self.pseudo_element: 

110 pseudo_element = "::%s" % self.pseudo_element 

111 else: 

112 pseudo_element = "" 

113 res = "%s%s" % (self.parsed_tree.canonical(), pseudo_element) 

114 if len(res) > 1: 

115 res = res.lstrip("*") 

116 return res 

117 

118 def specificity(self) -> Tuple[int, int, int]: 

119 """Return the specificity_ of this selector as a tuple of 3 integers. 

120 

121 .. _specificity: http://www.w3.org/TR/selectors/#specificity 

122 

123 """ 

124 a, b, c = self.parsed_tree.specificity() 

125 if self.pseudo_element: 

126 c += 1 

127 return a, b, c 

128 

129 

130class Class: 

131 """ 

132 Represents selector.class_name 

133 """ 

134 

135 def __init__(self, selector: Tree, class_name: str) -> None: 

136 self.selector = selector 

137 self.class_name = class_name 

138 

139 def __repr__(self) -> str: 

140 return "%s[%r.%s]" % (self.__class__.__name__, self.selector, self.class_name) 

141 

142 def canonical(self) -> str: 

143 return "%s.%s" % (self.selector.canonical(), self.class_name) 

144 

145 def specificity(self) -> Tuple[int, int, int]: 

146 a, b, c = self.selector.specificity() 

147 b += 1 

148 return a, b, c 

149 

150 

151class FunctionalPseudoElement: 

152 """ 

153 Represents selector::name(arguments) 

154 

155 .. attribute:: name 

156 

157 The name (identifier) of the pseudo-element, as a string. 

158 

159 .. attribute:: arguments 

160 

161 The arguments of the pseudo-element, as a list of tokens. 

162 

163 **Note:** tokens are not part of the public API, 

164 and may change between cssselect versions. 

165 Use at your own risks. 

166 

167 """ 

168 

169 def __init__(self, name: str, arguments: Sequence["Token"]): 

170 self.name = ascii_lower(name) 

171 self.arguments = arguments 

172 

173 def __repr__(self) -> str: 

174 return "%s[::%s(%r)]" % ( 

175 self.__class__.__name__, 

176 self.name, 

177 [token.value for token in self.arguments], 

178 ) 

179 

180 def argument_types(self) -> List[str]: 

181 return [token.type for token in self.arguments] 

182 

183 def canonical(self) -> str: 

184 args = "".join(token.css() for token in self.arguments) 

185 return "%s(%s)" % (self.name, args) 

186 

187 

188class Function: 

189 """ 

190 Represents selector:name(expr) 

191 """ 

192 

193 def __init__(self, selector: Tree, name: str, arguments: Sequence["Token"]) -> None: 

194 self.selector = selector 

195 self.name = ascii_lower(name) 

196 self.arguments = arguments 

197 

198 def __repr__(self) -> str: 

199 return "%s[%r:%s(%r)]" % ( 

200 self.__class__.__name__, 

201 self.selector, 

202 self.name, 

203 [token.value for token in self.arguments], 

204 ) 

205 

206 def argument_types(self) -> List[str]: 

207 return [token.type for token in self.arguments] 

208 

209 def canonical(self) -> str: 

210 args = "".join(token.css() for token in self.arguments) 

211 return "%s:%s(%s)" % (self.selector.canonical(), self.name, args) 

212 

213 def specificity(self) -> Tuple[int, int, int]: 

214 a, b, c = self.selector.specificity() 

215 b += 1 

216 return a, b, c 

217 

218 

219class Pseudo: 

220 """ 

221 Represents selector:ident 

222 """ 

223 

224 def __init__(self, selector: Tree, ident: str) -> None: 

225 self.selector = selector 

226 self.ident = ascii_lower(ident) 

227 

228 def __repr__(self) -> str: 

229 return "%s[%r:%s]" % (self.__class__.__name__, self.selector, self.ident) 

230 

231 def canonical(self) -> str: 

232 return "%s:%s" % (self.selector.canonical(), self.ident) 

233 

234 def specificity(self) -> Tuple[int, int, int]: 

235 a, b, c = self.selector.specificity() 

236 b += 1 

237 return a, b, c 

238 

239 

240class Negation: 

241 """ 

242 Represents selector:not(subselector) 

243 """ 

244 

245 def __init__(self, selector: Tree, subselector: Tree) -> None: 

246 self.selector = selector 

247 self.subselector = subselector 

248 

249 def __repr__(self) -> str: 

250 return "%s[%r:not(%r)]" % (self.__class__.__name__, self.selector, self.subselector) 

251 

252 def canonical(self) -> str: 

253 subsel = self.subselector.canonical() 

254 if len(subsel) > 1: 

255 subsel = subsel.lstrip("*") 

256 return "%s:not(%s)" % (self.selector.canonical(), subsel) 

257 

258 def specificity(self) -> Tuple[int, int, int]: 

259 a1, b1, c1 = self.selector.specificity() 

260 a2, b2, c2 = self.subselector.specificity() 

261 return a1 + a2, b1 + b2, c1 + c2 

262 

263 

264class Relation: 

265 """ 

266 Represents selector:has(subselector) 

267 """ 

268 

269 def __init__(self, selector: Tree, combinator: "Token", subselector: Selector): 

270 self.selector = selector 

271 self.combinator = combinator 

272 self.subselector = subselector 

273 

274 def __repr__(self) -> str: 

275 return "%s[%r:has(%r)]" % ( 

276 self.__class__.__name__, 

277 self.selector, 

278 self.subselector, 

279 ) 

280 

281 def canonical(self) -> str: 

282 try: 

283 subsel = self.subselector[0].canonical() # type: ignore 

284 except TypeError: 

285 subsel = self.subselector.canonical() 

286 if len(subsel) > 1: 

287 subsel = subsel.lstrip("*") 

288 return "%s:has(%s)" % (self.selector.canonical(), subsel) 

289 

290 def specificity(self) -> Tuple[int, int, int]: 

291 a1, b1, c1 = self.selector.specificity() 

292 try: 

293 a2, b2, c2 = self.subselector[-1].specificity() # type: ignore 

294 except TypeError: 

295 a2, b2, c2 = self.subselector.specificity() 

296 return a1 + a2, b1 + b2, c1 + c2 

297 

298 

299class Matching: 

300 """ 

301 Represents selector:is(selector_list) 

302 """ 

303 

304 def __init__(self, selector: Tree, selector_list: Iterable[Tree]): 

305 self.selector = selector 

306 self.selector_list = selector_list 

307 

308 def __repr__(self) -> str: 

309 return "%s[%r:is(%s)]" % ( 

310 self.__class__.__name__, 

311 self.selector, 

312 ", ".join(map(repr, self.selector_list)), 

313 ) 

314 

315 def canonical(self) -> str: 

316 selector_arguments = [] 

317 for s in self.selector_list: 

318 selarg = s.canonical() 

319 selector_arguments.append(selarg.lstrip("*")) 

320 return "%s:is(%s)" % (self.selector.canonical(), ", ".join(map(str, selector_arguments))) 

321 

322 def specificity(self) -> Tuple[int, int, int]: 

323 return max(x.specificity() for x in self.selector_list) 

324 

325 

326class SpecificityAdjustment: 

327 """ 

328 Represents selector:where(selector_list) 

329 Same as selector:is(selector_list), but its specificity is always 0 

330 """ 

331 

332 def __init__(self, selector: Tree, selector_list: List[Tree]): 

333 self.selector = selector 

334 self.selector_list = selector_list 

335 

336 def __repr__(self) -> str: 

337 return "%s[%r:where(%s)]" % ( 

338 self.__class__.__name__, 

339 self.selector, 

340 ", ".join(map(repr, self.selector_list)), 

341 ) 

342 

343 def canonical(self) -> str: 

344 selector_arguments = [] 

345 for s in self.selector_list: 

346 selarg = s.canonical() 

347 selector_arguments.append(selarg.lstrip("*")) 

348 return "%s:where(%s)" % ( 

349 self.selector.canonical(), 

350 ", ".join(map(str, selector_arguments)), 

351 ) 

352 

353 def specificity(self) -> Tuple[int, int, int]: 

354 return 0, 0, 0 

355 

356 

357class Attrib: 

358 """ 

359 Represents selector[namespace|attrib operator value] 

360 """ 

361 

362 @typing.overload 

363 def __init__( 

364 self, 

365 selector: Tree, 

366 namespace: Optional[str], 

367 attrib: str, 

368 operator: 'typing.Literal["exists"]', 

369 value: None, 

370 ) -> None: 

371 ... 

372 

373 @typing.overload 

374 def __init__( 

375 self, selector: Tree, namespace: Optional[str], attrib: str, operator: str, value: "Token" 

376 ) -> None: 

377 ... 

378 

379 def __init__( 

380 self, 

381 selector: Tree, 

382 namespace: Optional[str], 

383 attrib: str, 

384 operator: str, 

385 value: Optional["Token"], 

386 ) -> None: 

387 self.selector = selector 

388 self.namespace = namespace 

389 self.attrib = attrib 

390 self.operator = operator 

391 self.value = value 

392 

393 def __repr__(self) -> str: 

394 if self.namespace: 

395 attrib = "%s|%s" % (self.namespace, self.attrib) 

396 else: 

397 attrib = self.attrib 

398 if self.operator == "exists": 

399 return "%s[%r[%s]]" % (self.__class__.__name__, self.selector, attrib) 

400 else: 

401 return "%s[%r[%s %s %r]]" % ( 

402 self.__class__.__name__, 

403 self.selector, 

404 attrib, 

405 self.operator, 

406 typing.cast("Token", self.value).value, 

407 ) 

408 

409 def canonical(self) -> str: 

410 if self.namespace: 

411 attrib = "%s|%s" % (self.namespace, self.attrib) 

412 else: 

413 attrib = self.attrib 

414 

415 if self.operator == "exists": 

416 op = attrib 

417 else: 

418 op = "%s%s%s" % (attrib, self.operator, typing.cast("Token", self.value).css()) 

419 

420 return "%s[%s]" % (self.selector.canonical(), op) 

421 

422 def specificity(self) -> Tuple[int, int, int]: 

423 a, b, c = self.selector.specificity() 

424 b += 1 

425 return a, b, c 

426 

427 

428class Element: 

429 """ 

430 Represents namespace|element 

431 

432 `None` is for the universal selector '*' 

433 

434 """ 

435 

436 def __init__(self, namespace: Optional[str] = None, element: Optional[str] = None) -> None: 

437 self.namespace = namespace 

438 self.element = element 

439 

440 def __repr__(self) -> str: 

441 return "%s[%s]" % (self.__class__.__name__, self.canonical()) 

442 

443 def canonical(self) -> str: 

444 element = self.element or "*" 

445 if self.namespace: 

446 element = "%s|%s" % (self.namespace, element) 

447 return element 

448 

449 def specificity(self) -> Tuple[int, int, int]: 

450 if self.element: 

451 return 0, 0, 1 

452 else: 

453 return 0, 0, 0 

454 

455 

456class Hash: 

457 """ 

458 Represents selector#id 

459 """ 

460 

461 def __init__(self, selector: Tree, id: str) -> None: 

462 self.selector = selector 

463 self.id = id 

464 

465 def __repr__(self) -> str: 

466 return "%s[%r#%s]" % (self.__class__.__name__, self.selector, self.id) 

467 

468 def canonical(self) -> str: 

469 return "%s#%s" % (self.selector.canonical(), self.id) 

470 

471 def specificity(self) -> Tuple[int, int, int]: 

472 a, b, c = self.selector.specificity() 

473 a += 1 

474 return a, b, c 

475 

476 

477class CombinedSelector: 

478 def __init__(self, selector: Tree, combinator: str, subselector: Tree) -> None: 

479 assert selector is not None 

480 self.selector = selector 

481 self.combinator = combinator 

482 self.subselector = subselector 

483 

484 def __repr__(self) -> str: 

485 if self.combinator == " ": 

486 comb = "<followed>" 

487 else: 

488 comb = self.combinator 

489 return "%s[%r %s %r]" % (self.__class__.__name__, self.selector, comb, self.subselector) 

490 

491 def canonical(self) -> str: 

492 subsel = self.subselector.canonical() 

493 if len(subsel) > 1: 

494 subsel = subsel.lstrip("*") 

495 return "%s %s %s" % (self.selector.canonical(), self.combinator, subsel) 

496 

497 def specificity(self) -> Tuple[int, int, int]: 

498 a1, b1, c1 = self.selector.specificity() 

499 a2, b2, c2 = self.subselector.specificity() 

500 return a1 + a2, b1 + b2, c1 + c2 

501 

502 

503#### Parser 

504 

505# foo 

506_el_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$") 

507 

508# foo#bar or #bar 

509_id_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$") 

510 

511# foo.bar or .bar 

512_class_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$") 

513 

514 

515def parse(css: str) -> List[Selector]: 

516 """Parse a CSS *group of selectors*. 

517 

518 If you don't care about pseudo-elements or selector specificity, 

519 you can skip this and use :meth:`~GenericTranslator.css_to_xpath`. 

520 

521 :param css: 

522 A *group of selectors* as a string. 

523 :raises: 

524 :class:`SelectorSyntaxError` on invalid selectors. 

525 :returns: 

526 A list of parsed :class:`Selector` objects, one for each 

527 selector in the comma-separated group. 

528 

529 """ 

530 # Fast path for simple cases 

531 match = _el_re.match(css) 

532 if match: 

533 return [Selector(Element(element=match.group(1)))] 

534 match = _id_re.match(css) 

535 if match is not None: 

536 return [Selector(Hash(Element(element=match.group(1) or None), match.group(2)))] 

537 match = _class_re.match(css) 

538 if match is not None: 

539 return [Selector(Class(Element(element=match.group(1) or None), match.group(2)))] 

540 

541 stream = TokenStream(tokenize(css)) 

542 stream.source = css 

543 return list(parse_selector_group(stream)) 

544 

545 

546# except SelectorSyntaxError: 

547# e = sys.exc_info()[1] 

548# message = "%s at %s -> %r" % ( 

549# e, stream.used, stream.peek()) 

550# e.msg = message 

551# e.args = tuple([message]) 

552# raise 

553 

554 

555def parse_selector_group(stream: "TokenStream") -> Iterator[Selector]: 

556 stream.skip_whitespace() 

557 while 1: 

558 yield Selector(*parse_selector(stream)) 

559 if stream.peek() == ("DELIM", ","): 

560 stream.next() 

561 stream.skip_whitespace() 

562 else: 

563 break 

564 

565 

566def parse_selector(stream: "TokenStream") -> Tuple[Tree, Optional[PseudoElement]]: 

567 result, pseudo_element = parse_simple_selector(stream) 

568 while 1: 

569 stream.skip_whitespace() 

570 peek = stream.peek() 

571 if peek in (("EOF", None), ("DELIM", ",")): 

572 break 

573 if pseudo_element: 

574 raise SelectorSyntaxError( 

575 "Got pseudo-element ::%s not at the end of a selector" % pseudo_element 

576 ) 

577 if peek.is_delim("+", ">", "~"): 

578 # A combinator 

579 combinator = typing.cast(str, stream.next().value) 

580 stream.skip_whitespace() 

581 else: 

582 # By exclusion, the last parse_simple_selector() ended 

583 # at peek == ' ' 

584 combinator = " " 

585 next_selector, pseudo_element = parse_simple_selector(stream) 

586 result = CombinedSelector(result, combinator, next_selector) 

587 return result, pseudo_element 

588 

589 

590def parse_simple_selector( 

591 stream: "TokenStream", inside_negation: bool = False 

592) -> Tuple[Tree, Optional[PseudoElement]]: 

593 stream.skip_whitespace() 

594 selector_start = len(stream.used) 

595 peek = stream.peek() 

596 if peek.type == "IDENT" or peek == ("DELIM", "*"): 

597 if peek.type == "IDENT": 

598 namespace = stream.next().value 

599 else: 

600 stream.next() 

601 namespace = None 

602 if stream.peek() == ("DELIM", "|"): 

603 stream.next() 

604 element = stream.next_ident_or_star() 

605 else: 

606 element = namespace 

607 namespace = None 

608 else: 

609 element = namespace = None 

610 result: Tree = Element(namespace, element) 

611 pseudo_element: Optional[PseudoElement] = None 

612 while 1: 

613 peek = stream.peek() 

614 if ( 

615 peek.type in ("S", "EOF") 

616 or peek.is_delim(",", "+", ">", "~") 

617 or (inside_negation and peek == ("DELIM", ")")) 

618 ): 

619 break 

620 if pseudo_element: 

621 raise SelectorSyntaxError( 

622 "Got pseudo-element ::%s not at the end of a selector" % pseudo_element 

623 ) 

624 if peek.type == "HASH": 

625 result = Hash(result, typing.cast(str, stream.next().value)) 

626 elif peek == ("DELIM", "."): 

627 stream.next() 

628 result = Class(result, stream.next_ident()) 

629 elif peek == ("DELIM", "|"): 

630 stream.next() 

631 result = Element(None, stream.next_ident()) 

632 elif peek == ("DELIM", "["): 

633 stream.next() 

634 result = parse_attrib(result, stream) 

635 elif peek == ("DELIM", ":"): 

636 stream.next() 

637 if stream.peek() == ("DELIM", ":"): 

638 stream.next() 

639 pseudo_element = stream.next_ident() 

640 if stream.peek() == ("DELIM", "("): 

641 stream.next() 

642 pseudo_element = FunctionalPseudoElement( 

643 pseudo_element, parse_arguments(stream) 

644 ) 

645 continue 

646 ident = stream.next_ident() 

647 if ident.lower() in ("first-line", "first-letter", "before", "after"): 

648 # Special case: CSS 2.1 pseudo-elements can have a single ':' 

649 # Any new pseudo-element must have two. 

650 pseudo_element = str(ident) 

651 continue 

652 if stream.peek() != ("DELIM", "("): 

653 result = Pseudo(result, ident) 

654 if repr(result) == "Pseudo[Element[*]:scope]": 

655 if not ( 

656 len(stream.used) == 2 

657 or (len(stream.used) == 3 and stream.used[0].type == "S") 

658 or (len(stream.used) >= 3 and stream.used[-3].is_delim(",")) 

659 or ( 

660 len(stream.used) >= 4 

661 and stream.used[-3].type == "S" 

662 and stream.used[-4].is_delim(",") 

663 ) 

664 ): 

665 raise SelectorSyntaxError( 

666 'Got immediate child pseudo-element ":scope" ' 

667 "not at the start of a selector" 

668 ) 

669 continue 

670 stream.next() 

671 stream.skip_whitespace() 

672 if ident.lower() == "not": 

673 if inside_negation: 

674 raise SelectorSyntaxError("Got nested :not()") 

675 argument, argument_pseudo_element = parse_simple_selector( 

676 stream, inside_negation=True 

677 ) 

678 next = stream.next() 

679 if argument_pseudo_element: 

680 raise SelectorSyntaxError( 

681 "Got pseudo-element ::%s inside :not() at %s" 

682 % (argument_pseudo_element, next.pos) 

683 ) 

684 if next != ("DELIM", ")"): 

685 raise SelectorSyntaxError("Expected ')', got %s" % (next,)) 

686 result = Negation(result, argument) 

687 elif ident.lower() == "has": 

688 combinator, arguments = parse_relative_selector(stream) 

689 result = Relation(result, combinator, arguments) 

690 

691 elif ident.lower() in ("matches", "is"): 

692 selectors = parse_simple_selector_arguments(stream) 

693 result = Matching(result, selectors) 

694 elif ident.lower() == "where": 

695 selectors = parse_simple_selector_arguments(stream) 

696 result = SpecificityAdjustment(result, selectors) 

697 else: 

698 result = Function(result, ident, parse_arguments(stream)) 

699 else: 

700 raise SelectorSyntaxError("Expected selector, got %s" % (peek,)) 

701 if len(stream.used) == selector_start: 

702 raise SelectorSyntaxError("Expected selector, got %s" % (stream.peek(),)) 

703 return result, pseudo_element 

704 

705 

706def parse_arguments(stream: "TokenStream") -> List["Token"]: 

707 arguments: List["Token"] = [] 

708 while 1: 

709 stream.skip_whitespace() 

710 next = stream.next() 

711 if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "+"), ("DELIM", "-")]: 

712 arguments.append(next) 

713 elif next == ("DELIM", ")"): 

714 return arguments 

715 else: 

716 raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) 

717 

718 

719def parse_relative_selector(stream: "TokenStream") -> Tuple["Token", Selector]: 

720 stream.skip_whitespace() 

721 subselector = "" 

722 next = stream.next() 

723 

724 if next in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: 

725 combinator = next 

726 stream.skip_whitespace() 

727 next = stream.next() 

728 else: 

729 combinator = Token("DELIM", " ", pos=0) 

730 

731 while 1: 

732 if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "."), ("DELIM", "*")]: 

733 subselector += typing.cast(str, next.value) 

734 elif next == ("DELIM", ")"): 

735 result = parse(subselector) 

736 return combinator, result[0] 

737 else: 

738 raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) 

739 next = stream.next() 

740 

741 

742def parse_simple_selector_arguments(stream: "TokenStream") -> List[Tree]: 

743 arguments = [] 

744 while 1: 

745 result, pseudo_element = parse_simple_selector(stream, True) 

746 if pseudo_element: 

747 raise SelectorSyntaxError( 

748 "Got pseudo-element ::%s inside function" % (pseudo_element,) 

749 ) 

750 stream.skip_whitespace() 

751 next = stream.next() 

752 if next in (("EOF", None), ("DELIM", ",")): 

753 stream.next() 

754 stream.skip_whitespace() 

755 arguments.append(result) 

756 elif next == ("DELIM", ")"): 

757 arguments.append(result) 

758 break 

759 else: 

760 raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) 

761 return arguments 

762 

763 

764def parse_attrib(selector: Tree, stream: "TokenStream") -> Attrib: 

765 stream.skip_whitespace() 

766 attrib = stream.next_ident_or_star() 

767 if attrib is None and stream.peek() != ("DELIM", "|"): 

768 raise SelectorSyntaxError("Expected '|', got %s" % (stream.peek(),)) 

769 namespace: Optional[str] 

770 op: Optional[str] 

771 if stream.peek() == ("DELIM", "|"): 

772 stream.next() 

773 if stream.peek() == ("DELIM", "="): 

774 namespace = None 

775 stream.next() 

776 op = "|=" 

777 else: 

778 namespace = attrib 

779 attrib = stream.next_ident() 

780 op = None 

781 else: 

782 namespace = op = None 

783 if op is None: 

784 stream.skip_whitespace() 

785 next = stream.next() 

786 if next == ("DELIM", "]"): 

787 return Attrib(selector, namespace, typing.cast(str, attrib), "exists", None) 

788 elif next == ("DELIM", "="): 

789 op = "=" 

790 elif next.is_delim("^", "$", "*", "~", "|", "!") and (stream.peek() == ("DELIM", "=")): 

791 op = typing.cast(str, next.value) + "=" 

792 stream.next() 

793 else: 

794 raise SelectorSyntaxError("Operator expected, got %s" % (next,)) 

795 stream.skip_whitespace() 

796 value = stream.next() 

797 if value.type not in ("IDENT", "STRING"): 

798 raise SelectorSyntaxError("Expected string or ident, got %s" % (value,)) 

799 stream.skip_whitespace() 

800 next = stream.next() 

801 if next != ("DELIM", "]"): 

802 raise SelectorSyntaxError("Expected ']', got %s" % (next,)) 

803 return Attrib(selector, namespace, typing.cast(str, attrib), op, value) 

804 

805 

806def parse_series(tokens: Iterable["Token"]) -> Tuple[int, int]: 

807 """ 

808 Parses the arguments for :nth-child() and friends. 

809 

810 :raises: A list of tokens 

811 :returns: :``(a, b)`` 

812 

813 """ 

814 for token in tokens: 

815 if token.type == "STRING": 

816 raise ValueError("String tokens not allowed in series.") 

817 s = "".join(typing.cast(str, token.value) for token in tokens).strip() 

818 if s == "odd": 

819 return 2, 1 

820 elif s == "even": 

821 return 2, 0 

822 elif s == "n": 

823 return 1, 0 

824 if "n" not in s: 

825 # Just b 

826 return 0, int(s) 

827 a, b = s.split("n", 1) 

828 a_as_int: int 

829 if not a: 

830 a_as_int = 1 

831 elif a == "-" or a == "+": 

832 a_as_int = int(a + "1") 

833 else: 

834 a_as_int = int(a) 

835 b_as_int: int 

836 if not b: 

837 b_as_int = 0 

838 else: 

839 b_as_int = int(b) 

840 return a_as_int, b_as_int 

841 

842 

843#### Token objects 

844 

845 

846class Token(Tuple[str, Optional[str]]): 

847 @typing.overload 

848 def __new__( 

849 cls, 

850 type_: 'typing.Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"]', 

851 value: str, 

852 pos: int, 

853 ) -> "Token": 

854 ... 

855 

856 @typing.overload 

857 def __new__(cls, type_: 'typing.Literal["EOF"]', value: None, pos: int) -> "Token": 

858 ... 

859 

860 def __new__(cls, type_: str, value: Optional[str], pos: int) -> "Token": 

861 obj = tuple.__new__(cls, (type_, value)) 

862 obj.pos = pos 

863 return obj 

864 

865 def __repr__(self) -> str: 

866 return "<%s '%s' at %i>" % (self.type, self.value, self.pos) 

867 

868 def is_delim(self, *values: str) -> bool: 

869 return self.type == "DELIM" and self.value in values 

870 

871 pos: int 

872 

873 @property 

874 def type(self) -> str: 

875 return self[0] 

876 

877 @property 

878 def value(self) -> Optional[str]: 

879 return self[1] 

880 

881 def css(self) -> str: 

882 if self.type == "STRING": 

883 return repr(self.value) 

884 else: 

885 return typing.cast(str, self.value) 

886 

887 

888class EOFToken(Token): 

889 def __new__(cls, pos: int) -> "EOFToken": 

890 return typing.cast("EOFToken", Token.__new__(cls, "EOF", None, pos)) 

891 

892 def __repr__(self) -> str: 

893 return "<%s at %i>" % (self.type, self.pos) 

894 

895 

896#### Tokenizer 

897 

898 

899class TokenMacros: 

900 unicode_escape = r"\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?" 

901 escape = unicode_escape + r"|\\[^\n\r\f0-9a-f]" 

902 string_escape = r"\\(?:\n|\r\n|\r|\f)|" + escape 

903 nonascii = r"[^\0-\177]" 

904 nmchar = "[_a-z0-9-]|%s|%s" % (escape, nonascii) 

905 nmstart = "[_a-z]|%s|%s" % (escape, nonascii) 

906 

907 

908if typing.TYPE_CHECKING: 

909 

910 class MatchFunc(typing.Protocol): 

911 def __call__( 

912 self, string: str, pos: int = ..., endpos: int = ... 

913 ) -> Optional["re.Match[str]"]: 

914 ... 

915 

916 

917def _compile(pattern: str) -> "MatchFunc": 

918 return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match 

919 

920 

921_match_whitespace = _compile(r"[ \t\r\n\f]+") 

922_match_number = _compile(r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)") 

923_match_hash = _compile("#(?:%(nmchar)s)+") 

924_match_ident = _compile("-?(?:%(nmstart)s)(?:%(nmchar)s)*") 

925_match_string_by_quote = { 

926 "'": _compile(r"([^\n\r\f\\']|%(string_escape)s)*"), 

927 '"': _compile(r'([^\n\r\f\\"]|%(string_escape)s)*'), 

928} 

929 

930_sub_simple_escape = re.compile(r"\\(.)").sub 

931_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.I).sub 

932_sub_newline_escape = re.compile(r"\\(?:\n|\r\n|\r|\f)").sub 

933 

934# Same as r'\1', but faster on CPython 

935_replace_simple = operator.methodcaller("group", 1) 

936 

937 

938def _replace_unicode(match: "re.Match[str]") -> str: 

939 codepoint = int(match.group(1), 16) 

940 if codepoint > sys.maxunicode: 

941 codepoint = 0xFFFD 

942 return chr(codepoint) 

943 

944 

945def unescape_ident(value: str) -> str: 

946 value = _sub_unicode_escape(_replace_unicode, value) 

947 value = _sub_simple_escape(_replace_simple, value) 

948 return value 

949 

950 

951def tokenize(s: str) -> Iterator[Token]: 

952 pos = 0 

953 len_s = len(s) 

954 while pos < len_s: 

955 match = _match_whitespace(s, pos=pos) 

956 if match: 

957 yield Token("S", " ", pos) 

958 pos = match.end() 

959 continue 

960 

961 match = _match_ident(s, pos=pos) 

962 if match: 

963 value = _sub_simple_escape( 

964 _replace_simple, _sub_unicode_escape(_replace_unicode, match.group()) 

965 ) 

966 yield Token("IDENT", value, pos) 

967 pos = match.end() 

968 continue 

969 

970 match = _match_hash(s, pos=pos) 

971 if match: 

972 value = _sub_simple_escape( 

973 _replace_simple, _sub_unicode_escape(_replace_unicode, match.group()[1:]) 

974 ) 

975 yield Token("HASH", value, pos) 

976 pos = match.end() 

977 continue 

978 

979 quote = s[pos] 

980 if quote in _match_string_by_quote: 

981 match = _match_string_by_quote[quote](s, pos=pos + 1) 

982 assert match, "Should have found at least an empty match" 

983 end_pos = match.end() 

984 if end_pos == len_s: 

985 raise SelectorSyntaxError("Unclosed string at %s" % pos) 

986 if s[end_pos] != quote: 

987 raise SelectorSyntaxError("Invalid string at %s" % pos) 

988 value = _sub_simple_escape( 

989 _replace_simple, 

990 _sub_unicode_escape(_replace_unicode, _sub_newline_escape("", match.group())), 

991 ) 

992 yield Token("STRING", value, pos) 

993 pos = end_pos + 1 

994 continue 

995 

996 match = _match_number(s, pos=pos) 

997 if match: 

998 value = match.group() 

999 yield Token("NUMBER", value, pos) 

1000 pos = match.end() 

1001 continue 

1002 

1003 pos2 = pos + 2 

1004 if s[pos:pos2] == "/*": 

1005 pos = s.find("*/", pos2) 

1006 if pos == -1: 

1007 pos = len_s 

1008 else: 

1009 pos += 2 

1010 continue 

1011 

1012 yield Token("DELIM", s[pos], pos) 

1013 pos += 1 

1014 

1015 assert pos == len_s 

1016 yield EOFToken(pos) 

1017 

1018 

1019class TokenStream: 

1020 def __init__(self, tokens: Iterable[Token], source: Optional[str] = None) -> None: 

1021 self.used: List[Token] = [] 

1022 self.tokens = iter(tokens) 

1023 self.source = source 

1024 self.peeked: Optional[Token] = None 

1025 self._peeking = False 

1026 self.next_token = self.tokens.__next__ 

1027 

1028 def next(self) -> Token: 

1029 if self._peeking: 

1030 self._peeking = False 

1031 self.used.append(typing.cast(Token, self.peeked)) 

1032 return typing.cast(Token, self.peeked) 

1033 else: 

1034 next = self.next_token() 

1035 self.used.append(next) 

1036 return next 

1037 

1038 def peek(self) -> Token: 

1039 if not self._peeking: 

1040 self.peeked = self.next_token() 

1041 self._peeking = True 

1042 return typing.cast(Token, self.peeked) 

1043 

1044 def next_ident(self) -> str: 

1045 next = self.next() 

1046 if next.type != "IDENT": 

1047 raise SelectorSyntaxError("Expected ident, got %s" % (next,)) 

1048 return typing.cast(str, next.value) 

1049 

1050 def next_ident_or_star(self) -> Optional[str]: 

1051 next = self.next() 

1052 if next.type == "IDENT": 

1053 return next.value 

1054 elif next == ("DELIM", "*"): 

1055 return None 

1056 else: 

1057 raise SelectorSyntaxError("Expected ident or '*', got %s" % (next,)) 

1058 

1059 def skip_whitespace(self) -> None: 

1060 peek = self.peek() 

1061 if peek.type == "S": 

1062 self.next()