Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/cssselect/xpath.py: 27%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2cssselect.xpath
3===============
5Translation of parsed CSS selectors to XPath expressions.
8:copyright: (c) 2007-2012 Ian Bicking and contributors.
9See AUTHORS for more details.
10:license: BSD, see LICENSE for more details.
12"""
14from __future__ import annotations
16import re
17from typing import TYPE_CHECKING, cast
19from cssselect.parser import (
20 Attrib,
21 Class,
22 CombinedSelector,
23 Element,
24 Function,
25 Hash,
26 Matching,
27 Negation,
28 Pseudo,
29 PseudoElement,
30 Relation,
31 Selector,
32 SelectorError,
33 SpecificityAdjustment,
34 Tree,
35 parse,
36 parse_series,
37)
39if TYPE_CHECKING:
40 from collections.abc import Callable
42 # typing.Self requires Python 3.11
43 from typing_extensions import Self
46class ExpressionError(SelectorError, RuntimeError):
47 """Unknown or unsupported selector (eg. pseudo-class)."""
50#### XPath Helpers
53class XPathExpr:
54 def __init__(
55 self,
56 path: str = "",
57 element: str = "*",
58 condition: str = "",
59 star_prefix: bool = False,
60 ) -> None:
61 self.path = path
62 self.element = element
63 self.condition = condition
65 def __str__(self) -> str:
66 path = str(self.path) + str(self.element)
67 if self.condition:
68 path += f"[{self.condition}]"
69 return path
71 def __repr__(self) -> str:
72 return f"{self.__class__.__name__}[{self}]"
74 def add_condition(self, condition: str, conjuction: str = "and") -> Self:
75 if self.condition:
76 self.condition = f"({self.condition}) {conjuction} ({condition})"
77 else:
78 self.condition = condition
79 return self
81 def add_name_test(self) -> None:
82 if self.element == "*":
83 # We weren't doing a test anyway
84 return
85 self.add_condition(f"name() = {GenericTranslator.xpath_literal(self.element)}")
86 self.element = "*"
88 def add_star_prefix(self) -> None:
89 """
90 Append '*/' to the path to keep the context constrained
91 to a single parent.
92 """
93 self.path += "*/"
95 def join(
96 self,
97 combiner: str,
98 other: XPathExpr,
99 closing_combiner: str | None = None,
100 has_inner_condition: bool = False,
101 ) -> Self:
102 path = str(self) + combiner
103 # Any "star prefix" is redundant when joining.
104 if other.path != "*/":
105 path += other.path
106 self.path = path
107 if not has_inner_condition:
108 self.element = (
109 other.element + closing_combiner if closing_combiner else other.element
110 )
111 self.condition = other.condition
112 else:
113 self.element = other.element
114 if other.condition:
115 self.element += "[" + other.condition + "]"
116 if closing_combiner:
117 self.element += closing_combiner
118 return self
121split_at_single_quotes = re.compile("('+)").split
123# The spec is actually more permissive than that, but don’t bother.
124# This is just for the fast path.
125# http://www.w3.org/TR/REC-xml/#NT-NameStartChar
126is_safe_name = re.compile("^[a-zA-Z_][a-zA-Z0-9_.-]*$").match
128# Test that the string is not empty and does not contain whitespace
129is_non_whitespace = re.compile(r"^[^ \t\r\n\f]+$").match
132#### Translation
135class GenericTranslator:
136 """
137 Translator for "generic" XML documents.
139 Everything is case-sensitive, no assumption is made on the meaning
140 of element names and attribute names.
142 """
144 ####
145 #### HERE BE DRAGONS
146 ####
147 #### You are welcome to hook into this to change some behavior,
148 #### but do so at your own risks.
149 #### Until it has received a lot more work and review,
150 #### I reserve the right to change this API in backward-incompatible ways
151 #### with any minor version of cssselect.
152 #### See https://github.com/scrapy/cssselect/pull/22
153 #### -- Simon Sapin.
154 ####
156 combinator_mapping = {
157 " ": "descendant",
158 ">": "child",
159 "+": "direct_adjacent",
160 "~": "indirect_adjacent",
161 }
163 attribute_operator_mapping = {
164 "exists": "exists",
165 "=": "equals",
166 "~=": "includes",
167 "|=": "dashmatch",
168 "^=": "prefixmatch",
169 "$=": "suffixmatch",
170 "*=": "substringmatch",
171 "!=": "different", # XXX Not in Level 3 but meh
172 }
174 #: The attribute used for ID selectors depends on the document language:
175 #: http://www.w3.org/TR/selectors/#id-selectors
176 id_attribute = "id"
178 #: The attribute used for ``:lang()`` depends on the document language:
179 #: http://www.w3.org/TR/selectors/#lang-pseudo
180 lang_attribute = "xml:lang"
182 #: The case sensitivity of document language element names,
183 #: attribute names, and attribute values in selectors depends
184 #: on the document language.
185 #: http://www.w3.org/TR/selectors/#casesens
186 #:
187 #: When a document language defines one of these as case-insensitive,
188 #: cssselect assumes that the document parser makes the parsed values
189 #: lower-case. Making the selector lower-case too makes the comparaison
190 #: case-insensitive.
191 #:
192 #: In HTML, element names and attributes names (but not attribute values)
193 #: are case-insensitive. All of lxml.html, html5lib, BeautifulSoup4
194 #: and HTMLParser make them lower-case in their parse result, so
195 #: the assumption holds.
196 lower_case_element_names = False
197 lower_case_attribute_names = False
198 lower_case_attribute_values = False
200 # class used to represent and xpath expression
201 xpathexpr_cls = XPathExpr
203 def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
204 """Translate a *group of selectors* to XPath.
206 Pseudo-elements are not supported here since XPath only knows
207 about "real" elements.
209 :param css:
210 A *group of selectors* as a string.
211 :param prefix:
212 This string is prepended to the XPath expression for each selector.
213 The default makes selectors scoped to the context node’s subtree.
214 :raises:
215 :class:`~cssselect.SelectorSyntaxError` on invalid selectors,
216 :class:`ExpressionError` on unknown/unsupported selectors,
217 including pseudo-elements.
218 :returns:
219 The equivalent XPath 1.0 expression as a string.
221 """
222 return " | ".join(
223 self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
224 for selector in parse(css)
225 )
227 def selector_to_xpath(
228 self,
229 selector: Selector,
230 prefix: str = "descendant-or-self::",
231 translate_pseudo_elements: bool = False,
232 ) -> str:
233 """Translate a parsed selector to XPath.
236 :param selector:
237 A parsed :class:`Selector` object.
238 :param prefix:
239 This string is prepended to the resulting XPath expression.
240 The default makes selectors scoped to the context node’s subtree.
241 :param translate_pseudo_elements:
242 Unless this is set to ``True`` (as :meth:`css_to_xpath` does),
243 the :attr:`~Selector.pseudo_element` attribute of the selector
244 is ignored.
245 It is the caller's responsibility to reject selectors
246 with pseudo-elements, or to account for them somehow.
247 :raises:
248 :class:`ExpressionError` on unknown/unsupported selectors.
249 :returns:
250 The equivalent XPath 1.0 expression as a string.
252 """
253 tree = getattr(selector, "parsed_tree", None)
254 if not tree:
255 raise TypeError(f"Expected a parsed selector, got {selector!r}")
256 xpath = self.xpath(tree)
257 assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return'
258 if translate_pseudo_elements and selector.pseudo_element:
259 xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
260 return (prefix or "") + str(xpath)
262 def xpath_pseudo_element(
263 self, xpath: XPathExpr, pseudo_element: PseudoElement
264 ) -> XPathExpr:
265 """Translate a pseudo-element.
267 Defaults to not supporting pseudo-elements at all,
268 but can be overridden by sub-classes.
270 """
271 raise ExpressionError("Pseudo-elements are not supported.")
273 @staticmethod
274 def xpath_literal(s: str) -> str:
275 s = str(s)
276 if "'" not in s:
277 s = f"'{s}'"
278 elif '"' not in s:
279 s = f'"{s}"'
280 else:
281 parts_quoted = [
282 f'"{part}"' if "'" in part else f"'{part}'"
283 for part in split_at_single_quotes(s)
284 if part
285 ]
286 s = "concat({})".format(",".join(parts_quoted))
287 return s
289 def xpath(self, parsed_selector: Tree) -> XPathExpr:
290 """Translate any parsed selector object."""
291 type_name = type(parsed_selector).__name__
292 method = cast(
293 "Callable[[Tree], XPathExpr] | None",
294 getattr(self, f"xpath_{type_name.lower()}", None),
295 )
296 if method is None:
297 raise ExpressionError(f"{type_name} is not supported.")
298 return method(parsed_selector)
300 # Dispatched by parsed object type
302 def xpath_combinedselector(self, combined: CombinedSelector) -> XPathExpr:
303 """Translate a combined selector."""
304 combinator = self.combinator_mapping[combined.combinator]
305 method = cast(
306 "Callable[[XPathExpr, XPathExpr], XPathExpr]",
307 getattr(self, f"xpath_{combinator}_combinator"),
308 )
309 return method(self.xpath(combined.selector), self.xpath(combined.subselector))
311 def xpath_negation(self, negation: Negation) -> XPathExpr:
312 xpath = self.xpath(negation.selector)
313 sub_xpath = self.xpath(negation.subselector)
314 sub_xpath.add_name_test()
315 if sub_xpath.condition:
316 return xpath.add_condition(f"not({sub_xpath.condition})")
317 return xpath.add_condition("0")
319 def xpath_relation(self, relation: Relation) -> XPathExpr:
320 xpath = self.xpath(relation.selector)
321 combinator = relation.combinator
322 subselector = relation.subselector
323 right = self.xpath(subselector.parsed_tree)
324 method = cast(
325 "Callable[[XPathExpr, XPathExpr], XPathExpr]",
326 getattr(
327 self,
328 f"xpath_relation_{self.combinator_mapping[cast('str', combinator.value)]}_combinator",
329 ),
330 )
331 return method(xpath, right)
333 def xpath_matching(self, matching: Matching) -> XPathExpr:
334 xpath = self.xpath(matching.selector)
335 exprs = [self.xpath(selector) for selector in matching.selector_list]
336 for e in exprs:
337 e.add_name_test()
338 if e.condition:
339 xpath.add_condition(e.condition, "or")
340 return xpath
342 def xpath_specificityadjustment(self, matching: SpecificityAdjustment) -> XPathExpr:
343 xpath = self.xpath(matching.selector)
344 exprs = [self.xpath(selector) for selector in matching.selector_list]
345 for e in exprs:
346 e.add_name_test()
347 if e.condition:
348 xpath.add_condition(e.condition, "or")
349 return xpath
351 def xpath_function(self, function: Function) -> XPathExpr:
352 """Translate a functional pseudo-class."""
353 method_name = "xpath_{}_function".format(function.name.replace("-", "_"))
354 method = cast(
355 "Callable[[XPathExpr, Function], XPathExpr] | None",
356 getattr(self, method_name, None),
357 )
358 if not method:
359 raise ExpressionError(f"The pseudo-class :{function.name}() is unknown")
360 return method(self.xpath(function.selector), function)
362 def xpath_pseudo(self, pseudo: Pseudo) -> XPathExpr:
363 """Translate a pseudo-class."""
364 method_name = "xpath_{}_pseudo".format(pseudo.ident.replace("-", "_"))
365 method = cast(
366 "Callable[[XPathExpr], XPathExpr] | None",
367 getattr(self, method_name, None),
368 )
369 if not method:
370 # TODO: better error message for pseudo-elements?
371 raise ExpressionError(f"The pseudo-class :{pseudo.ident} is unknown")
372 return method(self.xpath(pseudo.selector))
374 def xpath_attrib(self, selector: Attrib) -> XPathExpr:
375 """Translate an attribute selector."""
376 operator = self.attribute_operator_mapping[selector.operator]
377 method = cast(
378 "Callable[[XPathExpr, str, str | None], XPathExpr]",
379 getattr(self, f"xpath_attrib_{operator}"),
380 )
381 if self.lower_case_attribute_names:
382 name = selector.attrib.lower()
383 else:
384 name = selector.attrib
385 safe = is_safe_name(name)
386 if selector.namespace:
387 name = f"{selector.namespace}:{name}"
388 safe = safe and is_safe_name(selector.namespace)
389 if safe:
390 attrib = "@" + name
391 else:
392 attrib = f"attribute::*[name() = {self.xpath_literal(name)}]"
393 if selector.value is None:
394 value = None
395 elif self.lower_case_attribute_values:
396 value = cast("str", selector.value.value).lower()
397 else:
398 value = selector.value.value
399 return method(self.xpath(selector.selector), attrib, value)
401 def xpath_class(self, class_selector: Class) -> XPathExpr:
402 """Translate a class selector."""
403 # .foo is defined as [class~=foo] in the spec.
404 xpath = self.xpath(class_selector.selector)
405 return self.xpath_attrib_includes(xpath, "@class", class_selector.class_name)
407 def xpath_hash(self, id_selector: Hash) -> XPathExpr:
408 """Translate an ID selector."""
409 xpath = self.xpath(id_selector.selector)
410 return self.xpath_attrib_equals(xpath, "@id", id_selector.id)
412 def xpath_element(self, selector: Element) -> XPathExpr:
413 """Translate a type or universal selector."""
414 element = selector.element
415 if not element:
416 element = "*"
417 safe = True
418 else:
419 safe = bool(is_safe_name(element))
420 if self.lower_case_element_names:
421 element = element.lower()
422 if selector.namespace:
423 # Namespace prefixes are case-sensitive.
424 # http://www.w3.org/TR/css3-namespace/#prefixes
425 element = f"{selector.namespace}:{element}"
426 safe = safe and bool(is_safe_name(selector.namespace))
427 xpath = self.xpathexpr_cls(element=element)
428 if not safe:
429 xpath.add_name_test()
430 return xpath
432 # CombinedSelector: dispatch by combinator
434 def xpath_descendant_combinator(
435 self, left: XPathExpr, right: XPathExpr
436 ) -> XPathExpr:
437 """right is a child, grand-child or further descendant of left"""
438 return left.join("/descendant-or-self::*/", right)
440 def xpath_child_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr:
441 """right is an immediate child of left"""
442 return left.join("/", right)
444 def xpath_direct_adjacent_combinator(
445 self, left: XPathExpr, right: XPathExpr
446 ) -> XPathExpr:
447 """right is a sibling immediately after left"""
448 xpath = left.join("/following-sibling::", right)
449 xpath.add_name_test()
450 return xpath.add_condition("position() = 1")
452 def xpath_indirect_adjacent_combinator(
453 self, left: XPathExpr, right: XPathExpr
454 ) -> XPathExpr:
455 """right is a sibling after left, immediately or not"""
456 return left.join("/following-sibling::", right)
458 def xpath_relation_descendant_combinator(
459 self, left: XPathExpr, right: XPathExpr
460 ) -> XPathExpr:
461 """right is a child, grand-child or further descendant of left; select left"""
462 return left.join(
463 "[descendant::", right, closing_combiner="]", has_inner_condition=True
464 )
466 def xpath_relation_child_combinator(
467 self, left: XPathExpr, right: XPathExpr
468 ) -> XPathExpr:
469 """right is an immediate child of left; select left"""
470 return left.join("[./", right, closing_combiner="]")
472 def xpath_relation_direct_adjacent_combinator(
473 self, left: XPathExpr, right: XPathExpr
474 ) -> XPathExpr:
475 """right is a sibling immediately after left; select left"""
476 return left.add_condition(
477 f"following-sibling::*[(name() = '{right.element}') and (position() = 1)]"
478 )
480 def xpath_relation_indirect_adjacent_combinator(
481 self, left: XPathExpr, right: XPathExpr
482 ) -> XPathExpr:
483 """right is a sibling after left, immediately or not; select left"""
484 return left.join("[following-sibling::", right, closing_combiner="]")
486 # Function: dispatch by function/pseudo-class name
488 def xpath_nth_child_function(
489 self,
490 xpath: XPathExpr,
491 function: Function,
492 last: bool = False,
493 add_name_test: bool = True,
494 ) -> XPathExpr:
495 try:
496 a, b = parse_series(function.arguments)
497 except ValueError as ex:
498 raise ExpressionError(f"Invalid series: '{function.arguments!r}'") from ex
500 # From https://www.w3.org/TR/css3-selectors/#structural-pseudos:
501 #
502 # :nth-child(an+b)
503 # an+b-1 siblings before
504 #
505 # :nth-last-child(an+b)
506 # an+b-1 siblings after
507 #
508 # :nth-of-type(an+b)
509 # an+b-1 siblings with the same expanded element name before
510 #
511 # :nth-last-of-type(an+b)
512 # an+b-1 siblings with the same expanded element name after
513 #
514 # So,
515 # for :nth-child and :nth-of-type
516 #
517 # count(preceding-sibling::<nodetest>) = an+b-1
518 #
519 # for :nth-last-child and :nth-last-of-type
520 #
521 # count(following-sibling::<nodetest>) = an+b-1
522 #
523 # therefore,
524 # count(...) - (b-1) ≡ 0 (mod a)
525 #
526 # if a == 0:
527 # ~~~~~~~~~~
528 # count(...) = b-1
529 #
530 # if a < 0:
531 # ~~~~~~~~~
532 # count(...) - b +1 <= 0
533 # -> count(...) <= b-1
534 #
535 # if a > 0:
536 # ~~~~~~~~~
537 # count(...) - b +1 >= 0
538 # -> count(...) >= b-1
540 # work with b-1 instead
541 b_min_1 = b - 1
543 # early-exit condition 1:
544 # ~~~~~~~~~~~~~~~~~~~~~~~
545 # for a == 1, nth-*(an+b) means n+b-1 siblings before/after,
546 # and since n ∈ {0, 1, 2, ...}, if b-1<=0,
547 # there is always an "n" matching any number of siblings (maybe none)
548 if a == 1 and b_min_1 <= 0:
549 return xpath
551 # early-exit condition 2:
552 # ~~~~~~~~~~~~~~~~~~~~~~~
553 # an+b-1 siblings with a<0 and (b-1)<0 is not possible
554 if a < 0 and b_min_1 < 0:
555 return xpath.add_condition("0")
557 # `add_name_test` boolean is inverted and somewhat counter-intuitive:
558 #
559 # nth_of_type() calls nth_child(add_name_test=False)
560 nodetest = "*" if add_name_test else f"{xpath.element}"
562 # count siblings before or after the element
563 if not last:
564 siblings_count = f"count(preceding-sibling::{nodetest})"
565 else:
566 siblings_count = f"count(following-sibling::{nodetest})"
568 # special case of fixed position: nth-*(0n+b)
569 # if a == 0:
570 # ~~~~~~~~~~
571 # count(***-sibling::***) = b-1
572 if a == 0:
573 return xpath.add_condition(f"{siblings_count} = {b_min_1}")
575 expressions = []
577 if a > 0:
578 # siblings count, an+b-1, is always >= 0,
579 # so if a>0, and (b-1)<=0, an "n" exists to satisfy this,
580 # therefore, the predicate is only interesting if (b-1)>0
581 if b_min_1 > 0:
582 expressions.append(f"{siblings_count} >= {b_min_1}")
583 else:
584 # if a<0, and (b-1)<0, no "n" satisfies this,
585 # this is tested above as an early exist condition
586 # otherwise,
587 expressions.append(f"{siblings_count} <= {b_min_1}")
589 # operations modulo 1 or -1 are simpler, one only needs to verify:
590 #
591 # - either:
592 # count(***-sibling::***) - (b-1) = n = 0, 1, 2, 3, etc.,
593 # i.e. count(***-sibling::***) >= (b-1)
594 #
595 # - or:
596 # count(***-sibling::***) - (b-1) = -n = 0, -1, -2, -3, etc.,
597 # i.e. count(***-sibling::***) <= (b-1)
598 # we we just did above.
599 #
600 if abs(a) != 1:
601 # count(***-sibling::***) - (b-1) ≡ 0 (mod a)
602 left = siblings_count
604 # apply "modulo a" on 2nd term, -(b-1),
605 # to simplify things like "(... +6) % -3",
606 # and also make it positive with |a|
607 b_neg = (-b_min_1) % abs(a)
609 if b_neg != 0:
610 left = f"({left} +{b_neg})"
612 expressions.append(f"{left} mod {a} = 0")
614 template = "(%s)" if len(expressions) > 1 else "%s"
615 xpath.add_condition(
616 " and ".join(template % expression for expression in expressions)
617 )
618 return xpath
620 def xpath_nth_last_child_function(
621 self, xpath: XPathExpr, function: Function
622 ) -> XPathExpr:
623 return self.xpath_nth_child_function(xpath, function, last=True)
625 def xpath_nth_of_type_function(
626 self, xpath: XPathExpr, function: Function
627 ) -> XPathExpr:
628 if xpath.element == "*":
629 raise ExpressionError("*:nth-of-type() is not implemented")
630 return self.xpath_nth_child_function(xpath, function, add_name_test=False)
632 def xpath_nth_last_of_type_function(
633 self, xpath: XPathExpr, function: Function
634 ) -> XPathExpr:
635 if xpath.element == "*":
636 raise ExpressionError("*:nth-of-type() is not implemented")
637 return self.xpath_nth_child_function(
638 xpath, function, last=True, add_name_test=False
639 )
641 def xpath_contains_function(
642 self, xpath: XPathExpr, function: Function
643 ) -> XPathExpr:
644 # Defined there, removed in later drafts:
645 # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
646 if function.argument_types() not in (["STRING"], ["IDENT"]):
647 raise ExpressionError(
648 f"Expected a single string or ident for :contains(), got {function.arguments!r}"
649 )
650 value = cast("str", function.arguments[0].value)
651 return xpath.add_condition(f"contains(., {self.xpath_literal(value)})")
653 def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
654 if function.argument_types() not in (["STRING"], ["IDENT"]):
655 raise ExpressionError(
656 f"Expected a single string or ident for :lang(), got {function.arguments!r}"
657 )
658 value = cast("str", function.arguments[0].value)
659 return xpath.add_condition(f"lang({self.xpath_literal(value)})")
661 # Pseudo: dispatch by pseudo-class name
663 def xpath_root_pseudo(self, xpath: XPathExpr) -> XPathExpr:
664 return xpath.add_condition("not(parent::*)")
666 # CSS immediate children (CSS ":scope > div" to XPath "child::div" or "./div")
667 # Works only at the start of a selector
668 # Needed to get immediate children of a processed selector in Scrapy
669 # for product in response.css('.product'):
670 # description = product.css(':scope > div::text').get()
671 def xpath_scope_pseudo(self, xpath: XPathExpr) -> XPathExpr:
672 return xpath.add_condition("1")
674 def xpath_first_child_pseudo(self, xpath: XPathExpr) -> XPathExpr:
675 return xpath.add_condition("count(preceding-sibling::*) = 0")
677 def xpath_last_child_pseudo(self, xpath: XPathExpr) -> XPathExpr:
678 return xpath.add_condition("count(following-sibling::*) = 0")
680 def xpath_first_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr:
681 if xpath.element == "*":
682 raise ExpressionError("*:first-of-type is not implemented")
683 return xpath.add_condition(f"count(preceding-sibling::{xpath.element}) = 0")
685 def xpath_last_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr:
686 if xpath.element == "*":
687 raise ExpressionError("*:last-of-type is not implemented")
688 return xpath.add_condition(f"count(following-sibling::{xpath.element}) = 0")
690 def xpath_only_child_pseudo(self, xpath: XPathExpr) -> XPathExpr:
691 return xpath.add_condition("count(parent::*/child::*) = 1")
693 def xpath_only_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr:
694 if xpath.element == "*":
695 raise ExpressionError("*:only-of-type is not implemented")
696 return xpath.add_condition(f"count(parent::*/child::{xpath.element}) = 1")
698 def xpath_empty_pseudo(self, xpath: XPathExpr) -> XPathExpr:
699 return xpath.add_condition("not(*) and not(string-length())")
701 def pseudo_never_matches(self, xpath: XPathExpr) -> XPathExpr:
702 """Common implementation for pseudo-classes that never match."""
703 return xpath.add_condition("0")
705 xpath_link_pseudo = pseudo_never_matches
706 xpath_visited_pseudo = pseudo_never_matches
707 xpath_hover_pseudo = pseudo_never_matches
708 xpath_active_pseudo = pseudo_never_matches
709 xpath_focus_pseudo = pseudo_never_matches
710 xpath_target_pseudo = pseudo_never_matches
711 xpath_enabled_pseudo = pseudo_never_matches
712 xpath_disabled_pseudo = pseudo_never_matches
713 xpath_checked_pseudo = pseudo_never_matches
715 # Attrib: dispatch by attribute operator
717 def xpath_attrib_exists(
718 self, xpath: XPathExpr, name: str, value: str | None
719 ) -> XPathExpr:
720 assert not value
721 xpath.add_condition(name)
722 return xpath
724 def xpath_attrib_equals(
725 self, xpath: XPathExpr, name: str, value: str | None
726 ) -> XPathExpr:
727 assert value is not None
728 xpath.add_condition(f"{name} = {self.xpath_literal(value)}")
729 return xpath
731 def xpath_attrib_different(
732 self, xpath: XPathExpr, name: str, value: str | None
733 ) -> XPathExpr:
734 assert value is not None
735 # FIXME: this seems like a weird hack...
736 if value:
737 xpath.add_condition(f"not({name}) or {name} != {self.xpath_literal(value)}")
738 else:
739 xpath.add_condition(f"{name} != {self.xpath_literal(value)}")
740 return xpath
742 def xpath_attrib_includes(
743 self, xpath: XPathExpr, name: str, value: str | None
744 ) -> XPathExpr:
745 if value and is_non_whitespace(value):
746 arg = self.xpath_literal(" " + value + " ")
747 xpath.add_condition(
748 f"{name} and contains(concat(' ', normalize-space({name}), ' '), {arg})"
749 )
750 else:
751 xpath.add_condition("0")
752 return xpath
754 def xpath_attrib_dashmatch(
755 self, xpath: XPathExpr, name: str, value: str | None
756 ) -> XPathExpr:
757 assert value is not None
758 arg = self.xpath_literal(value)
759 arg_dash = self.xpath_literal(value + "-")
760 # Weird, but true...
761 xpath.add_condition(
762 f"{name} and ({name} = {arg} or starts-with({name}, {arg_dash}))"
763 )
764 return xpath
766 def xpath_attrib_prefixmatch(
767 self, xpath: XPathExpr, name: str, value: str | None
768 ) -> XPathExpr:
769 if value:
770 xpath.add_condition(
771 f"{name} and starts-with({name}, {self.xpath_literal(value)})"
772 )
773 else:
774 xpath.add_condition("0")
775 return xpath
777 def xpath_attrib_suffixmatch(
778 self, xpath: XPathExpr, name: str, value: str | None
779 ) -> XPathExpr:
780 if value:
781 # Oddly there is a starts-with in XPath 1.0, but not ends-with
782 xpath.add_condition(
783 f"{name} and substring({name}, string-length({name})-{len(value) - 1}) = {self.xpath_literal(value)}"
784 )
785 else:
786 xpath.add_condition("0")
787 return xpath
789 def xpath_attrib_substringmatch(
790 self, xpath: XPathExpr, name: str, value: str | None
791 ) -> XPathExpr:
792 if value:
793 # Attribute selectors are case sensitive
794 xpath.add_condition(
795 f"{name} and contains({name}, {self.xpath_literal(value)})"
796 )
797 else:
798 xpath.add_condition("0")
799 return xpath
802class HTMLTranslator(GenericTranslator):
803 """
804 Translator for (X)HTML documents.
806 Has a more useful implementation of some pseudo-classes based on
807 HTML-specific element names and attribute names, as described in
808 the `HTML5 specification`_. It assumes no-quirks mode.
809 The API is the same as :class:`GenericTranslator`.
811 .. _HTML5 specification: http://www.w3.org/TR/html5/links.html#selectors
813 :param xhtml:
814 If false (the default), element names and attribute names
815 are case-insensitive.
817 """
819 lang_attribute = "lang"
821 def __init__(self, xhtml: bool = False) -> None:
822 self.xhtml = xhtml # Might be useful for sub-classes?
823 if not xhtml:
824 # See their definition in GenericTranslator.
825 self.lower_case_element_names = True
826 self.lower_case_attribute_names = True
828 def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override]
829 # FIXME: is this really all the elements?
830 return xpath.add_condition(
831 "(@selected and name(.) = 'option') or "
832 "(@checked "
833 "and (name(.) = 'input' or name(.) = 'command')"
834 "and (@type = 'checkbox' or @type = 'radio'))"
835 )
837 def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
838 if function.argument_types() not in (["STRING"], ["IDENT"]):
839 raise ExpressionError(
840 f"Expected a single string or ident for :lang(), got {function.arguments!r}"
841 )
842 value = function.arguments[0].value
843 assert value
844 arg = self.xpath_literal(value.lower() + "-")
845 return xpath.add_condition(
846 "ancestor-or-self::*[@lang][1][starts-with(concat("
847 # XPath 1.0 has no lower-case function...
848 f"translate(@{self.lang_attribute}, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
849 "'abcdefghijklmnopqrstuvwxyz'), "
850 f"'-'), {arg})]"
851 )
853 def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override]
854 return xpath.add_condition(
855 "@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area')"
856 )
858 # Links are never visited, the implementation for :visited is the same
859 # as in GenericTranslator
861 def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override]
862 # http://www.w3.org/TR/html5/section-index.html#attributes-1
863 return xpath.add_condition(
864 """
865 (
866 @disabled and
867 (
868 (name(.) = 'input' and @type != 'hidden') or
869 name(.) = 'button' or
870 name(.) = 'select' or
871 name(.) = 'textarea' or
872 name(.) = 'command' or
873 name(.) = 'fieldset' or
874 name(.) = 'optgroup' or
875 name(.) = 'option'
876 )
877 ) or (
878 (
879 (name(.) = 'input' and @type != 'hidden') or
880 name(.) = 'button' or
881 name(.) = 'select' or
882 name(.) = 'textarea'
883 )
884 and ancestor::fieldset[@disabled]
885 )
886 """
887 )
888 # FIXME: in the second half, add "and is not a descendant of that
889 # fieldset element's first legend element child, if any."
891 def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override]
892 # http://www.w3.org/TR/html5/section-index.html#attributes-1
893 return xpath.add_condition(
894 """
895 (
896 @href and (
897 name(.) = 'a' or
898 name(.) = 'link' or
899 name(.) = 'area'
900 )
901 ) or (
902 (
903 name(.) = 'command' or
904 name(.) = 'fieldset' or
905 name(.) = 'optgroup'
906 )
907 and not(@disabled)
908 ) or (
909 (
910 (name(.) = 'input' and @type != 'hidden') or
911 name(.) = 'button' or
912 name(.) = 'select' or
913 name(.) = 'textarea' or
914 name(.) = 'keygen'
915 )
916 and not (@disabled or ancestor::fieldset[@disabled])
917 ) or (
918 name(.) = 'option' and not(
919 @disabled or ancestor::optgroup[@disabled]
920 )
921 )
922 """
923 )
924 # FIXME: ... or "li elements that are children of menu elements,
925 # and that have a child element that defines a command, if the first
926 # such element's Disabled State facet is false (not disabled)".
927 # FIXME: after ancestor::fieldset[@disabled], add "and is not a
928 # descendant of that fieldset element's first legend element child,
929 # if any."