Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/cssselect/parser.py: 80%
600 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-09 06:19 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-09 06:19 +0000
1# -*- coding: utf-8 -*-
2"""
3 cssselect.parser
4 ================
6 Tokenizer, parser and parsed objects for CSS selectors.
9 :copyright: (c) 2007-2012 Ian Bicking and contributors.
10 See AUTHORS for more details.
11 :license: BSD, see LICENSE for more details.
13"""
15import sys
16import re
17import operator
18import typing
19from typing import Iterable, Iterator, List, Optional, Sequence, Tuple, Union
22def ascii_lower(string: str) -> str:
23 """Lower-case, but only in the ASCII range."""
24 return string.encode("utf8").lower().decode("utf8")
27class SelectorError(Exception):
28 """Common parent for :class:`SelectorSyntaxError` and
29 :class:`ExpressionError`.
31 You can just use ``except SelectorError:`` when calling
32 :meth:`~GenericTranslator.css_to_xpath` and handle both exceptions types.
34 """
37class SelectorSyntaxError(SelectorError, SyntaxError):
38 """Parsing a selector that does not match the grammar."""
41#### Parsed objects
43Tree = Union[
44 "Element",
45 "Hash",
46 "Class",
47 "Function",
48 "Pseudo",
49 "Attrib",
50 "Negation",
51 "Relation",
52 "Matching",
53 "SpecificityAdjustment",
54 "CombinedSelector",
55]
56PseudoElement = Union["FunctionalPseudoElement", str]
59class Selector:
60 """
61 Represents a parsed selector.
63 :meth:`~GenericTranslator.selector_to_xpath` accepts this object,
64 but ignores :attr:`pseudo_element`. It is the user’s responsibility
65 to account for pseudo-elements and reject selectors with unknown
66 or unsupported pseudo-elements.
68 """
70 def __init__(self, tree: Tree, pseudo_element: Optional[PseudoElement] = None) -> None:
71 self.parsed_tree = tree
72 if pseudo_element is not None and not isinstance(pseudo_element, FunctionalPseudoElement):
73 pseudo_element = ascii_lower(pseudo_element)
74 #: A :class:`FunctionalPseudoElement`,
75 #: or the identifier for the pseudo-element as a string,
76 # or ``None``.
77 #:
78 #: +-------------------------+----------------+--------------------------------+
79 #: | | Selector | Pseudo-element |
80 #: +=========================+================+================================+
81 #: | CSS3 syntax | ``a::before`` | ``'before'`` |
82 #: +-------------------------+----------------+--------------------------------+
83 #: | Older syntax | ``a:before`` | ``'before'`` |
84 #: +-------------------------+----------------+--------------------------------+
85 #: | From the Lists3_ draft, | ``li::marker`` | ``'marker'`` |
86 #: | not in Selectors3 | | |
87 #: +-------------------------+----------------+--------------------------------+
88 #: | Invalid pseudo-class | ``li:marker`` | ``None`` |
89 #: +-------------------------+----------------+--------------------------------+
90 #: | Functional | ``a::foo(2)`` | ``FunctionalPseudoElement(…)`` |
91 #: +-------------------------+----------------+--------------------------------+
92 #:
93 #: .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement
94 self.pseudo_element = pseudo_element
96 def __repr__(self) -> str:
97 if isinstance(self.pseudo_element, FunctionalPseudoElement):
98 pseudo_element = repr(self.pseudo_element)
99 elif self.pseudo_element:
100 pseudo_element = "::%s" % self.pseudo_element
101 else:
102 pseudo_element = ""
103 return "%s[%r%s]" % (self.__class__.__name__, self.parsed_tree, pseudo_element)
105 def canonical(self) -> str:
106 """Return a CSS representation for this selector (a string)"""
107 if isinstance(self.pseudo_element, FunctionalPseudoElement):
108 pseudo_element = "::%s" % self.pseudo_element.canonical()
109 elif self.pseudo_element:
110 pseudo_element = "::%s" % self.pseudo_element
111 else:
112 pseudo_element = ""
113 res = "%s%s" % (self.parsed_tree.canonical(), pseudo_element)
114 if len(res) > 1:
115 res = res.lstrip("*")
116 return res
118 def specificity(self) -> Tuple[int, int, int]:
119 """Return the specificity_ of this selector as a tuple of 3 integers.
121 .. _specificity: http://www.w3.org/TR/selectors/#specificity
123 """
124 a, b, c = self.parsed_tree.specificity()
125 if self.pseudo_element:
126 c += 1
127 return a, b, c
130class Class:
131 """
132 Represents selector.class_name
133 """
135 def __init__(self, selector: Tree, class_name: str) -> None:
136 self.selector = selector
137 self.class_name = class_name
139 def __repr__(self) -> str:
140 return "%s[%r.%s]" % (self.__class__.__name__, self.selector, self.class_name)
142 def canonical(self) -> str:
143 return "%s.%s" % (self.selector.canonical(), self.class_name)
145 def specificity(self) -> Tuple[int, int, int]:
146 a, b, c = self.selector.specificity()
147 b += 1
148 return a, b, c
151class FunctionalPseudoElement:
152 """
153 Represents selector::name(arguments)
155 .. attribute:: name
157 The name (identifier) of the pseudo-element, as a string.
159 .. attribute:: arguments
161 The arguments of the pseudo-element, as a list of tokens.
163 **Note:** tokens are not part of the public API,
164 and may change between cssselect versions.
165 Use at your own risks.
167 """
169 def __init__(self, name: str, arguments: Sequence["Token"]):
170 self.name = ascii_lower(name)
171 self.arguments = arguments
173 def __repr__(self) -> str:
174 return "%s[::%s(%r)]" % (
175 self.__class__.__name__,
176 self.name,
177 [token.value for token in self.arguments],
178 )
180 def argument_types(self) -> List[str]:
181 return [token.type for token in self.arguments]
183 def canonical(self) -> str:
184 args = "".join(token.css() for token in self.arguments)
185 return "%s(%s)" % (self.name, args)
188class Function:
189 """
190 Represents selector:name(expr)
191 """
193 def __init__(self, selector: Tree, name: str, arguments: Sequence["Token"]) -> None:
194 self.selector = selector
195 self.name = ascii_lower(name)
196 self.arguments = arguments
198 def __repr__(self) -> str:
199 return "%s[%r:%s(%r)]" % (
200 self.__class__.__name__,
201 self.selector,
202 self.name,
203 [token.value for token in self.arguments],
204 )
206 def argument_types(self) -> List[str]:
207 return [token.type for token in self.arguments]
209 def canonical(self) -> str:
210 args = "".join(token.css() for token in self.arguments)
211 return "%s:%s(%s)" % (self.selector.canonical(), self.name, args)
213 def specificity(self) -> Tuple[int, int, int]:
214 a, b, c = self.selector.specificity()
215 b += 1
216 return a, b, c
219class Pseudo:
220 """
221 Represents selector:ident
222 """
224 def __init__(self, selector: Tree, ident: str) -> None:
225 self.selector = selector
226 self.ident = ascii_lower(ident)
228 def __repr__(self) -> str:
229 return "%s[%r:%s]" % (self.__class__.__name__, self.selector, self.ident)
231 def canonical(self) -> str:
232 return "%s:%s" % (self.selector.canonical(), self.ident)
234 def specificity(self) -> Tuple[int, int, int]:
235 a, b, c = self.selector.specificity()
236 b += 1
237 return a, b, c
240class Negation:
241 """
242 Represents selector:not(subselector)
243 """
245 def __init__(self, selector: Tree, subselector: Tree) -> None:
246 self.selector = selector
247 self.subselector = subselector
249 def __repr__(self) -> str:
250 return "%s[%r:not(%r)]" % (self.__class__.__name__, self.selector, self.subselector)
252 def canonical(self) -> str:
253 subsel = self.subselector.canonical()
254 if len(subsel) > 1:
255 subsel = subsel.lstrip("*")
256 return "%s:not(%s)" % (self.selector.canonical(), subsel)
258 def specificity(self) -> Tuple[int, int, int]:
259 a1, b1, c1 = self.selector.specificity()
260 a2, b2, c2 = self.subselector.specificity()
261 return a1 + a2, b1 + b2, c1 + c2
264class Relation:
265 """
266 Represents selector:has(subselector)
267 """
269 def __init__(self, selector: Tree, combinator: "Token", subselector: Selector):
270 self.selector = selector
271 self.combinator = combinator
272 self.subselector = subselector
274 def __repr__(self) -> str:
275 return "%s[%r:has(%r)]" % (
276 self.__class__.__name__,
277 self.selector,
278 self.subselector,
279 )
281 def canonical(self) -> str:
282 try:
283 subsel = self.subselector[0].canonical() # type: ignore
284 except TypeError:
285 subsel = self.subselector.canonical()
286 if len(subsel) > 1:
287 subsel = subsel.lstrip("*")
288 return "%s:has(%s)" % (self.selector.canonical(), subsel)
290 def specificity(self) -> Tuple[int, int, int]:
291 a1, b1, c1 = self.selector.specificity()
292 try:
293 a2, b2, c2 = self.subselector[-1].specificity() # type: ignore
294 except TypeError:
295 a2, b2, c2 = self.subselector.specificity()
296 return a1 + a2, b1 + b2, c1 + c2
299class Matching:
300 """
301 Represents selector:is(selector_list)
302 """
304 def __init__(self, selector: Tree, selector_list: Iterable[Tree]):
305 self.selector = selector
306 self.selector_list = selector_list
308 def __repr__(self) -> str:
309 return "%s[%r:is(%s)]" % (
310 self.__class__.__name__,
311 self.selector,
312 ", ".join(map(repr, self.selector_list)),
313 )
315 def canonical(self) -> str:
316 selector_arguments = []
317 for s in self.selector_list:
318 selarg = s.canonical()
319 selector_arguments.append(selarg.lstrip("*"))
320 return "%s:is(%s)" % (self.selector.canonical(), ", ".join(map(str, selector_arguments)))
322 def specificity(self) -> Tuple[int, int, int]:
323 return max(x.specificity() for x in self.selector_list)
326class SpecificityAdjustment:
327 """
328 Represents selector:where(selector_list)
329 Same as selector:is(selector_list), but its specificity is always 0
330 """
332 def __init__(self, selector: Tree, selector_list: List[Tree]):
333 self.selector = selector
334 self.selector_list = selector_list
336 def __repr__(self) -> str:
337 return "%s[%r:where(%s)]" % (
338 self.__class__.__name__,
339 self.selector,
340 ", ".join(map(repr, self.selector_list)),
341 )
343 def canonical(self) -> str:
344 selector_arguments = []
345 for s in self.selector_list:
346 selarg = s.canonical()
347 selector_arguments.append(selarg.lstrip("*"))
348 return "%s:where(%s)" % (
349 self.selector.canonical(),
350 ", ".join(map(str, selector_arguments)),
351 )
353 def specificity(self) -> Tuple[int, int, int]:
354 return 0, 0, 0
357class Attrib:
358 """
359 Represents selector[namespace|attrib operator value]
360 """
362 @typing.overload
363 def __init__(
364 self,
365 selector: Tree,
366 namespace: Optional[str],
367 attrib: str,
368 operator: 'typing.Literal["exists"]',
369 value: None,
370 ) -> None:
371 ...
373 @typing.overload
374 def __init__(
375 self, selector: Tree, namespace: Optional[str], attrib: str, operator: str, value: "Token"
376 ) -> None:
377 ...
379 def __init__(
380 self,
381 selector: Tree,
382 namespace: Optional[str],
383 attrib: str,
384 operator: str,
385 value: Optional["Token"],
386 ) -> None:
387 self.selector = selector
388 self.namespace = namespace
389 self.attrib = attrib
390 self.operator = operator
391 self.value = value
393 def __repr__(self) -> str:
394 if self.namespace:
395 attrib = "%s|%s" % (self.namespace, self.attrib)
396 else:
397 attrib = self.attrib
398 if self.operator == "exists":
399 return "%s[%r[%s]]" % (self.__class__.__name__, self.selector, attrib)
400 else:
401 return "%s[%r[%s %s %r]]" % (
402 self.__class__.__name__,
403 self.selector,
404 attrib,
405 self.operator,
406 typing.cast("Token", self.value).value,
407 )
409 def canonical(self) -> str:
410 if self.namespace:
411 attrib = "%s|%s" % (self.namespace, self.attrib)
412 else:
413 attrib = self.attrib
415 if self.operator == "exists":
416 op = attrib
417 else:
418 op = "%s%s%s" % (attrib, self.operator, typing.cast("Token", self.value).css())
420 return "%s[%s]" % (self.selector.canonical(), op)
422 def specificity(self) -> Tuple[int, int, int]:
423 a, b, c = self.selector.specificity()
424 b += 1
425 return a, b, c
428class Element:
429 """
430 Represents namespace|element
432 `None` is for the universal selector '*'
434 """
436 def __init__(self, namespace: Optional[str] = None, element: Optional[str] = None) -> None:
437 self.namespace = namespace
438 self.element = element
440 def __repr__(self) -> str:
441 return "%s[%s]" % (self.__class__.__name__, self.canonical())
443 def canonical(self) -> str:
444 element = self.element or "*"
445 if self.namespace:
446 element = "%s|%s" % (self.namespace, element)
447 return element
449 def specificity(self) -> Tuple[int, int, int]:
450 if self.element:
451 return 0, 0, 1
452 else:
453 return 0, 0, 0
456class Hash:
457 """
458 Represents selector#id
459 """
461 def __init__(self, selector: Tree, id: str) -> None:
462 self.selector = selector
463 self.id = id
465 def __repr__(self) -> str:
466 return "%s[%r#%s]" % (self.__class__.__name__, self.selector, self.id)
468 def canonical(self) -> str:
469 return "%s#%s" % (self.selector.canonical(), self.id)
471 def specificity(self) -> Tuple[int, int, int]:
472 a, b, c = self.selector.specificity()
473 a += 1
474 return a, b, c
477class CombinedSelector:
478 def __init__(self, selector: Tree, combinator: str, subselector: Tree) -> None:
479 assert selector is not None
480 self.selector = selector
481 self.combinator = combinator
482 self.subselector = subselector
484 def __repr__(self) -> str:
485 if self.combinator == " ":
486 comb = "<followed>"
487 else:
488 comb = self.combinator
489 return "%s[%r %s %r]" % (self.__class__.__name__, self.selector, comb, self.subselector)
491 def canonical(self) -> str:
492 subsel = self.subselector.canonical()
493 if len(subsel) > 1:
494 subsel = subsel.lstrip("*")
495 return "%s %s %s" % (self.selector.canonical(), self.combinator, subsel)
497 def specificity(self) -> Tuple[int, int, int]:
498 a1, b1, c1 = self.selector.specificity()
499 a2, b2, c2 = self.subselector.specificity()
500 return a1 + a2, b1 + b2, c1 + c2
503#### Parser
505# foo
506_el_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$")
508# foo#bar or #bar
509_id_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$")
511# foo.bar or .bar
512_class_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$")
515def parse(css: str) -> List[Selector]:
516 """Parse a CSS *group of selectors*.
518 If you don't care about pseudo-elements or selector specificity,
519 you can skip this and use :meth:`~GenericTranslator.css_to_xpath`.
521 :param css:
522 A *group of selectors* as a string.
523 :raises:
524 :class:`SelectorSyntaxError` on invalid selectors.
525 :returns:
526 A list of parsed :class:`Selector` objects, one for each
527 selector in the comma-separated group.
529 """
530 # Fast path for simple cases
531 match = _el_re.match(css)
532 if match:
533 return [Selector(Element(element=match.group(1)))]
534 match = _id_re.match(css)
535 if match is not None:
536 return [Selector(Hash(Element(element=match.group(1) or None), match.group(2)))]
537 match = _class_re.match(css)
538 if match is not None:
539 return [Selector(Class(Element(element=match.group(1) or None), match.group(2)))]
541 stream = TokenStream(tokenize(css))
542 stream.source = css
543 return list(parse_selector_group(stream))
546# except SelectorSyntaxError:
547# e = sys.exc_info()[1]
548# message = "%s at %s -> %r" % (
549# e, stream.used, stream.peek())
550# e.msg = message
551# e.args = tuple([message])
552# raise
555def parse_selector_group(stream: "TokenStream") -> Iterator[Selector]:
556 stream.skip_whitespace()
557 while 1:
558 yield Selector(*parse_selector(stream))
559 if stream.peek() == ("DELIM", ","):
560 stream.next()
561 stream.skip_whitespace()
562 else:
563 break
566def parse_selector(stream: "TokenStream") -> Tuple[Tree, Optional[PseudoElement]]:
567 result, pseudo_element = parse_simple_selector(stream)
568 while 1:
569 stream.skip_whitespace()
570 peek = stream.peek()
571 if peek in (("EOF", None), ("DELIM", ",")):
572 break
573 if pseudo_element:
574 raise SelectorSyntaxError(
575 "Got pseudo-element ::%s not at the end of a selector" % pseudo_element
576 )
577 if peek.is_delim("+", ">", "~"):
578 # A combinator
579 combinator = typing.cast(str, stream.next().value)
580 stream.skip_whitespace()
581 else:
582 # By exclusion, the last parse_simple_selector() ended
583 # at peek == ' '
584 combinator = " "
585 next_selector, pseudo_element = parse_simple_selector(stream)
586 result = CombinedSelector(result, combinator, next_selector)
587 return result, pseudo_element
590def parse_simple_selector(
591 stream: "TokenStream", inside_negation: bool = False
592) -> Tuple[Tree, Optional[PseudoElement]]:
593 stream.skip_whitespace()
594 selector_start = len(stream.used)
595 peek = stream.peek()
596 if peek.type == "IDENT" or peek == ("DELIM", "*"):
597 if peek.type == "IDENT":
598 namespace = stream.next().value
599 else:
600 stream.next()
601 namespace = None
602 if stream.peek() == ("DELIM", "|"):
603 stream.next()
604 element = stream.next_ident_or_star()
605 else:
606 element = namespace
607 namespace = None
608 else:
609 element = namespace = None
610 result: Tree = Element(namespace, element)
611 pseudo_element: Optional[PseudoElement] = None
612 while 1:
613 peek = stream.peek()
614 if (
615 peek.type in ("S", "EOF")
616 or peek.is_delim(",", "+", ">", "~")
617 or (inside_negation and peek == ("DELIM", ")"))
618 ):
619 break
620 if pseudo_element:
621 raise SelectorSyntaxError(
622 "Got pseudo-element ::%s not at the end of a selector" % pseudo_element
623 )
624 if peek.type == "HASH":
625 result = Hash(result, typing.cast(str, stream.next().value))
626 elif peek == ("DELIM", "."):
627 stream.next()
628 result = Class(result, stream.next_ident())
629 elif peek == ("DELIM", "|"):
630 stream.next()
631 result = Element(None, stream.next_ident())
632 elif peek == ("DELIM", "["):
633 stream.next()
634 result = parse_attrib(result, stream)
635 elif peek == ("DELIM", ":"):
636 stream.next()
637 if stream.peek() == ("DELIM", ":"):
638 stream.next()
639 pseudo_element = stream.next_ident()
640 if stream.peek() == ("DELIM", "("):
641 stream.next()
642 pseudo_element = FunctionalPseudoElement(
643 pseudo_element, parse_arguments(stream)
644 )
645 continue
646 ident = stream.next_ident()
647 if ident.lower() in ("first-line", "first-letter", "before", "after"):
648 # Special case: CSS 2.1 pseudo-elements can have a single ':'
649 # Any new pseudo-element must have two.
650 pseudo_element = str(ident)
651 continue
652 if stream.peek() != ("DELIM", "("):
653 result = Pseudo(result, ident)
654 if repr(result) == "Pseudo[Element[*]:scope]":
655 if not (
656 len(stream.used) == 2
657 or (len(stream.used) == 3 and stream.used[0].type == "S")
658 or (len(stream.used) >= 3 and stream.used[-3].is_delim(","))
659 or (
660 len(stream.used) >= 4
661 and stream.used[-3].type == "S"
662 and stream.used[-4].is_delim(",")
663 )
664 ):
665 raise SelectorSyntaxError(
666 'Got immediate child pseudo-element ":scope" '
667 "not at the start of a selector"
668 )
669 continue
670 stream.next()
671 stream.skip_whitespace()
672 if ident.lower() == "not":
673 if inside_negation:
674 raise SelectorSyntaxError("Got nested :not()")
675 argument, argument_pseudo_element = parse_simple_selector(
676 stream, inside_negation=True
677 )
678 next = stream.next()
679 if argument_pseudo_element:
680 raise SelectorSyntaxError(
681 "Got pseudo-element ::%s inside :not() at %s"
682 % (argument_pseudo_element, next.pos)
683 )
684 if next != ("DELIM", ")"):
685 raise SelectorSyntaxError("Expected ')', got %s" % (next,))
686 result = Negation(result, argument)
687 elif ident.lower() == "has":
688 combinator, arguments = parse_relative_selector(stream)
689 result = Relation(result, combinator, arguments)
691 elif ident.lower() in ("matches", "is"):
692 selectors = parse_simple_selector_arguments(stream)
693 result = Matching(result, selectors)
694 elif ident.lower() == "where":
695 selectors = parse_simple_selector_arguments(stream)
696 result = SpecificityAdjustment(result, selectors)
697 else:
698 result = Function(result, ident, parse_arguments(stream))
699 else:
700 raise SelectorSyntaxError("Expected selector, got %s" % (peek,))
701 if len(stream.used) == selector_start:
702 raise SelectorSyntaxError("Expected selector, got %s" % (stream.peek(),))
703 return result, pseudo_element
706def parse_arguments(stream: "TokenStream") -> List["Token"]:
707 arguments: List["Token"] = []
708 while 1:
709 stream.skip_whitespace()
710 next = stream.next()
711 if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "+"), ("DELIM", "-")]:
712 arguments.append(next)
713 elif next == ("DELIM", ")"):
714 return arguments
715 else:
716 raise SelectorSyntaxError("Expected an argument, got %s" % (next,))
719def parse_relative_selector(stream: "TokenStream") -> Tuple["Token", Selector]:
720 stream.skip_whitespace()
721 subselector = ""
722 next = stream.next()
724 if next in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]:
725 combinator = next
726 stream.skip_whitespace()
727 next = stream.next()
728 else:
729 combinator = Token("DELIM", " ", pos=0)
731 while 1:
732 if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "."), ("DELIM", "*")]:
733 subselector += typing.cast(str, next.value)
734 elif next == ("DELIM", ")"):
735 result = parse(subselector)
736 return combinator, result[0]
737 else:
738 raise SelectorSyntaxError("Expected an argument, got %s" % (next,))
739 next = stream.next()
742def parse_simple_selector_arguments(stream: "TokenStream") -> List[Tree]:
743 arguments = []
744 while 1:
745 result, pseudo_element = parse_simple_selector(stream, True)
746 if pseudo_element:
747 raise SelectorSyntaxError(
748 "Got pseudo-element ::%s inside function" % (pseudo_element,)
749 )
750 stream.skip_whitespace()
751 next = stream.next()
752 if next in (("EOF", None), ("DELIM", ",")):
753 stream.next()
754 stream.skip_whitespace()
755 arguments.append(result)
756 elif next == ("DELIM", ")"):
757 arguments.append(result)
758 break
759 else:
760 raise SelectorSyntaxError("Expected an argument, got %s" % (next,))
761 return arguments
764def parse_attrib(selector: Tree, stream: "TokenStream") -> Attrib:
765 stream.skip_whitespace()
766 attrib = stream.next_ident_or_star()
767 if attrib is None and stream.peek() != ("DELIM", "|"):
768 raise SelectorSyntaxError("Expected '|', got %s" % (stream.peek(),))
769 namespace: Optional[str]
770 op: Optional[str]
771 if stream.peek() == ("DELIM", "|"):
772 stream.next()
773 if stream.peek() == ("DELIM", "="):
774 namespace = None
775 stream.next()
776 op = "|="
777 else:
778 namespace = attrib
779 attrib = stream.next_ident()
780 op = None
781 else:
782 namespace = op = None
783 if op is None:
784 stream.skip_whitespace()
785 next = stream.next()
786 if next == ("DELIM", "]"):
787 return Attrib(selector, namespace, typing.cast(str, attrib), "exists", None)
788 elif next == ("DELIM", "="):
789 op = "="
790 elif next.is_delim("^", "$", "*", "~", "|", "!") and (stream.peek() == ("DELIM", "=")):
791 op = typing.cast(str, next.value) + "="
792 stream.next()
793 else:
794 raise SelectorSyntaxError("Operator expected, got %s" % (next,))
795 stream.skip_whitespace()
796 value = stream.next()
797 if value.type not in ("IDENT", "STRING"):
798 raise SelectorSyntaxError("Expected string or ident, got %s" % (value,))
799 stream.skip_whitespace()
800 next = stream.next()
801 if next != ("DELIM", "]"):
802 raise SelectorSyntaxError("Expected ']', got %s" % (next,))
803 return Attrib(selector, namespace, typing.cast(str, attrib), op, value)
806def parse_series(tokens: Iterable["Token"]) -> Tuple[int, int]:
807 """
808 Parses the arguments for :nth-child() and friends.
810 :raises: A list of tokens
811 :returns: :``(a, b)``
813 """
814 for token in tokens:
815 if token.type == "STRING":
816 raise ValueError("String tokens not allowed in series.")
817 s = "".join(typing.cast(str, token.value) for token in tokens).strip()
818 if s == "odd":
819 return 2, 1
820 elif s == "even":
821 return 2, 0
822 elif s == "n":
823 return 1, 0
824 if "n" not in s:
825 # Just b
826 return 0, int(s)
827 a, b = s.split("n", 1)
828 a_as_int: int
829 if not a:
830 a_as_int = 1
831 elif a == "-" or a == "+":
832 a_as_int = int(a + "1")
833 else:
834 a_as_int = int(a)
835 b_as_int: int
836 if not b:
837 b_as_int = 0
838 else:
839 b_as_int = int(b)
840 return a_as_int, b_as_int
843#### Token objects
846class Token(Tuple[str, Optional[str]]):
847 @typing.overload
848 def __new__(
849 cls,
850 type_: 'typing.Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"]',
851 value: str,
852 pos: int,
853 ) -> "Token":
854 ...
856 @typing.overload
857 def __new__(cls, type_: 'typing.Literal["EOF"]', value: None, pos: int) -> "Token":
858 ...
860 def __new__(cls, type_: str, value: Optional[str], pos: int) -> "Token":
861 obj = tuple.__new__(cls, (type_, value))
862 obj.pos = pos
863 return obj
865 def __repr__(self) -> str:
866 return "<%s '%s' at %i>" % (self.type, self.value, self.pos)
868 def is_delim(self, *values: str) -> bool:
869 return self.type == "DELIM" and self.value in values
871 pos: int
873 @property
874 def type(self) -> str:
875 return self[0]
877 @property
878 def value(self) -> Optional[str]:
879 return self[1]
881 def css(self) -> str:
882 if self.type == "STRING":
883 return repr(self.value)
884 else:
885 return typing.cast(str, self.value)
888class EOFToken(Token):
889 def __new__(cls, pos: int) -> "EOFToken":
890 return typing.cast("EOFToken", Token.__new__(cls, "EOF", None, pos))
892 def __repr__(self) -> str:
893 return "<%s at %i>" % (self.type, self.pos)
896#### Tokenizer
899class TokenMacros:
900 unicode_escape = r"\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?"
901 escape = unicode_escape + r"|\\[^\n\r\f0-9a-f]"
902 string_escape = r"\\(?:\n|\r\n|\r|\f)|" + escape
903 nonascii = r"[^\0-\177]"
904 nmchar = "[_a-z0-9-]|%s|%s" % (escape, nonascii)
905 nmstart = "[_a-z]|%s|%s" % (escape, nonascii)
908if typing.TYPE_CHECKING:
910 class MatchFunc(typing.Protocol):
911 def __call__(
912 self, string: str, pos: int = ..., endpos: int = ...
913 ) -> Optional["re.Match[str]"]:
914 ...
917def _compile(pattern: str) -> "MatchFunc":
918 return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match
921_match_whitespace = _compile(r"[ \t\r\n\f]+")
922_match_number = _compile(r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)")
923_match_hash = _compile("#(?:%(nmchar)s)+")
924_match_ident = _compile("-?(?:%(nmstart)s)(?:%(nmchar)s)*")
925_match_string_by_quote = {
926 "'": _compile(r"([^\n\r\f\\']|%(string_escape)s)*"),
927 '"': _compile(r'([^\n\r\f\\"]|%(string_escape)s)*'),
928}
930_sub_simple_escape = re.compile(r"\\(.)").sub
931_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.I).sub
932_sub_newline_escape = re.compile(r"\\(?:\n|\r\n|\r|\f)").sub
934# Same as r'\1', but faster on CPython
935_replace_simple = operator.methodcaller("group", 1)
938def _replace_unicode(match: "re.Match[str]") -> str:
939 codepoint = int(match.group(1), 16)
940 if codepoint > sys.maxunicode:
941 codepoint = 0xFFFD
942 return chr(codepoint)
945def unescape_ident(value: str) -> str:
946 value = _sub_unicode_escape(_replace_unicode, value)
947 value = _sub_simple_escape(_replace_simple, value)
948 return value
951def tokenize(s: str) -> Iterator[Token]:
952 pos = 0
953 len_s = len(s)
954 while pos < len_s:
955 match = _match_whitespace(s, pos=pos)
956 if match:
957 yield Token("S", " ", pos)
958 pos = match.end()
959 continue
961 match = _match_ident(s, pos=pos)
962 if match:
963 value = _sub_simple_escape(
964 _replace_simple, _sub_unicode_escape(_replace_unicode, match.group())
965 )
966 yield Token("IDENT", value, pos)
967 pos = match.end()
968 continue
970 match = _match_hash(s, pos=pos)
971 if match:
972 value = _sub_simple_escape(
973 _replace_simple, _sub_unicode_escape(_replace_unicode, match.group()[1:])
974 )
975 yield Token("HASH", value, pos)
976 pos = match.end()
977 continue
979 quote = s[pos]
980 if quote in _match_string_by_quote:
981 match = _match_string_by_quote[quote](s, pos=pos + 1)
982 assert match, "Should have found at least an empty match"
983 end_pos = match.end()
984 if end_pos == len_s:
985 raise SelectorSyntaxError("Unclosed string at %s" % pos)
986 if s[end_pos] != quote:
987 raise SelectorSyntaxError("Invalid string at %s" % pos)
988 value = _sub_simple_escape(
989 _replace_simple,
990 _sub_unicode_escape(_replace_unicode, _sub_newline_escape("", match.group())),
991 )
992 yield Token("STRING", value, pos)
993 pos = end_pos + 1
994 continue
996 match = _match_number(s, pos=pos)
997 if match:
998 value = match.group()
999 yield Token("NUMBER", value, pos)
1000 pos = match.end()
1001 continue
1003 pos2 = pos + 2
1004 if s[pos:pos2] == "/*":
1005 pos = s.find("*/", pos2)
1006 if pos == -1:
1007 pos = len_s
1008 else:
1009 pos += 2
1010 continue
1012 yield Token("DELIM", s[pos], pos)
1013 pos += 1
1015 assert pos == len_s
1016 yield EOFToken(pos)
1019class TokenStream:
1020 def __init__(self, tokens: Iterable[Token], source: Optional[str] = None) -> None:
1021 self.used: List[Token] = []
1022 self.tokens = iter(tokens)
1023 self.source = source
1024 self.peeked: Optional[Token] = None
1025 self._peeking = False
1026 self.next_token = self.tokens.__next__
1028 def next(self) -> Token:
1029 if self._peeking:
1030 self._peeking = False
1031 self.used.append(typing.cast(Token, self.peeked))
1032 return typing.cast(Token, self.peeked)
1033 else:
1034 next = self.next_token()
1035 self.used.append(next)
1036 return next
1038 def peek(self) -> Token:
1039 if not self._peeking:
1040 self.peeked = self.next_token()
1041 self._peeking = True
1042 return typing.cast(Token, self.peeked)
1044 def next_ident(self) -> str:
1045 next = self.next()
1046 if next.type != "IDENT":
1047 raise SelectorSyntaxError("Expected ident, got %s" % (next,))
1048 return typing.cast(str, next.value)
1050 def next_ident_or_star(self) -> Optional[str]:
1051 next = self.next()
1052 if next.type == "IDENT":
1053 return next.value
1054 elif next == ("DELIM", "*"):
1055 return None
1056 else:
1057 raise SelectorSyntaxError("Expected ident or '*', got %s" % (next,))
1059 def skip_whitespace(self) -> None:
1060 peek = self.peek()
1061 if peek.type == "S":
1062 self.next()