Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/cssselect/xpath.py: 75%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3 cssselect.xpath
4 ===============
6 Translation of parsed CSS selectors to XPath expressions.
9 :copyright: (c) 2007-2012 Ian Bicking and contributors.
10 See AUTHORS for more details.
11 :license: BSD, see LICENSE for more details.
13"""
15import re
16import typing
17import warnings
18from typing import Optional
20from cssselect.parser import (
21 Attrib,
22 Class,
23 CombinedSelector,
24 Element,
25 Function,
26 Hash,
27 Matching,
28 Negation,
29 Pseudo,
30 PseudoElement,
31 Relation,
32 Selector,
33 SelectorError,
34 SpecificityAdjustment,
35 Tree,
36 parse,
37 parse_series,
38)
41@typing.no_type_check
42def _unicode_safe_getattr(obj, name, default=None):
43 warnings.warn(
44 "_unicode_safe_getattr is deprecated and will be removed in the"
45 " next release, use getattr() instead",
46 DeprecationWarning,
47 stacklevel=2,
48 )
49 return getattr(obj, name, default)
52class ExpressionError(SelectorError, RuntimeError):
53 """Unknown or unsupported selector (eg. pseudo-class)."""
56#### XPath Helpers
59class XPathExpr:
60 def __init__(
61 self,
62 path: str = "",
63 element: str = "*",
64 condition: str = "",
65 star_prefix: bool = False,
66 ) -> None:
67 self.path = path
68 self.element = element
69 self.condition = condition
71 def __str__(self) -> str:
72 path = str(self.path) + str(self.element)
73 if self.condition:
74 path += "[%s]" % self.condition
75 return path
77 def __repr__(self) -> str:
78 return "%s[%s]" % (self.__class__.__name__, self)
80 def add_condition(self, condition: str, conjuction: str = "and") -> "XPathExpr":
81 if self.condition:
82 self.condition = "(%s) %s (%s)" % (self.condition, conjuction, condition)
83 else:
84 self.condition = condition
85 return self
87 def add_name_test(self) -> None:
88 if self.element == "*":
89 # We weren't doing a test anyway
90 return
91 self.add_condition(
92 "name() = %s" % GenericTranslator.xpath_literal(self.element)
93 )
94 self.element = "*"
96 def add_star_prefix(self) -> None:
97 """
98 Append '*/' to the path to keep the context constrained
99 to a single parent.
100 """
101 self.path += "*/"
103 def join(
104 self,
105 combiner: str,
106 other: "XPathExpr",
107 closing_combiner: Optional[str] = None,
108 has_inner_condition: bool = False,
109 ) -> "XPathExpr":
110 path = str(self) + combiner
111 # Any "star prefix" is redundant when joining.
112 if other.path != "*/":
113 path += other.path
114 self.path = path
115 if not has_inner_condition:
116 self.element = (
117 other.element + closing_combiner if closing_combiner else other.element
118 )
119 self.condition = other.condition
120 else:
121 self.element = other.element
122 if other.condition:
123 self.element += "[" + other.condition + "]"
124 if closing_combiner:
125 self.element += closing_combiner
126 return self
129split_at_single_quotes = re.compile("('+)").split
131# The spec is actually more permissive than that, but don’t bother.
132# This is just for the fast path.
133# http://www.w3.org/TR/REC-xml/#NT-NameStartChar
134is_safe_name = re.compile("^[a-zA-Z_][a-zA-Z0-9_.-]*$").match
136# Test that the string is not empty and does not contain whitespace
137is_non_whitespace = re.compile(r"^[^ \t\r\n\f]+$").match
140#### Translation
143class GenericTranslator:
144 """
145 Translator for "generic" XML documents.
147 Everything is case-sensitive, no assumption is made on the meaning
148 of element names and attribute names.
150 """
152 ####
153 #### HERE BE DRAGONS
154 ####
155 #### You are welcome to hook into this to change some behavior,
156 #### but do so at your own risks.
157 #### Until it has received a lot more work and review,
158 #### I reserve the right to change this API in backward-incompatible ways
159 #### with any minor version of cssselect.
160 #### See https://github.com/scrapy/cssselect/pull/22
161 #### -- Simon Sapin.
162 ####
164 combinator_mapping = {
165 " ": "descendant",
166 ">": "child",
167 "+": "direct_adjacent",
168 "~": "indirect_adjacent",
169 }
171 attribute_operator_mapping = {
172 "exists": "exists",
173 "=": "equals",
174 "~=": "includes",
175 "|=": "dashmatch",
176 "^=": "prefixmatch",
177 "$=": "suffixmatch",
178 "*=": "substringmatch",
179 "!=": "different", # XXX Not in Level 3 but meh
180 }
182 #: The attribute used for ID selectors depends on the document language:
183 #: http://www.w3.org/TR/selectors/#id-selectors
184 id_attribute = "id"
186 #: The attribute used for ``:lang()`` depends on the document language:
187 #: http://www.w3.org/TR/selectors/#lang-pseudo
188 lang_attribute = "xml:lang"
190 #: The case sensitivity of document language element names,
191 #: attribute names, and attribute values in selectors depends
192 #: on the document language.
193 #: http://www.w3.org/TR/selectors/#casesens
194 #:
195 #: When a document language defines one of these as case-insensitive,
196 #: cssselect assumes that the document parser makes the parsed values
197 #: lower-case. Making the selector lower-case too makes the comparaison
198 #: case-insensitive.
199 #:
200 #: In HTML, element names and attributes names (but not attribute values)
201 #: are case-insensitive. All of lxml.html, html5lib, BeautifulSoup4
202 #: and HTMLParser make them lower-case in their parse result, so
203 #: the assumption holds.
204 lower_case_element_names = False
205 lower_case_attribute_names = False
206 lower_case_attribute_values = False
208 # class used to represent and xpath expression
209 xpathexpr_cls = XPathExpr
211 def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
212 """Translate a *group of selectors* to XPath.
214 Pseudo-elements are not supported here since XPath only knows
215 about "real" elements.
217 :param css:
218 A *group of selectors* as a string.
219 :param prefix:
220 This string is prepended to the XPath expression for each selector.
221 The default makes selectors scoped to the context node’s subtree.
222 :raises:
223 :class:`~cssselect.SelectorSyntaxError` on invalid selectors,
224 :class:`ExpressionError` on unknown/unsupported selectors,
225 including pseudo-elements.
226 :returns:
227 The equivalent XPath 1.0 expression as a string.
229 """
230 return " | ".join(
231 self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
232 for selector in parse(css)
233 )
235 def selector_to_xpath(
236 self,
237 selector: Selector,
238 prefix: str = "descendant-or-self::",
239 translate_pseudo_elements: bool = False,
240 ) -> str:
241 """Translate a parsed selector to XPath.
244 :param selector:
245 A parsed :class:`Selector` object.
246 :param prefix:
247 This string is prepended to the resulting XPath expression.
248 The default makes selectors scoped to the context node’s subtree.
249 :param translate_pseudo_elements:
250 Unless this is set to ``True`` (as :meth:`css_to_xpath` does),
251 the :attr:`~Selector.pseudo_element` attribute of the selector
252 is ignored.
253 It is the caller's responsibility to reject selectors
254 with pseudo-elements, or to account for them somehow.
255 :raises:
256 :class:`ExpressionError` on unknown/unsupported selectors.
257 :returns:
258 The equivalent XPath 1.0 expression as a string.
260 """
261 tree = getattr(selector, "parsed_tree", None)
262 if not tree:
263 raise TypeError("Expected a parsed selector, got %r" % (selector,))
264 xpath = self.xpath(tree)
265 assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return'
266 if translate_pseudo_elements and selector.pseudo_element:
267 xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
268 return (prefix or "") + str(xpath)
270 def xpath_pseudo_element(
271 self, xpath: XPathExpr, pseudo_element: PseudoElement
272 ) -> XPathExpr:
273 """Translate a pseudo-element.
275 Defaults to not supporting pseudo-elements at all,
276 but can be overridden by sub-classes.
278 """
279 raise ExpressionError("Pseudo-elements are not supported.")
281 @staticmethod
282 def xpath_literal(s: str) -> str:
283 s = str(s)
284 if "'" not in s:
285 s = "'%s'" % s
286 elif '"' not in s:
287 s = '"%s"' % s
288 else:
289 s = "concat(%s)" % ",".join(
290 [
291 (("'" in part) and '"%s"' or "'%s'") % part
292 for part in split_at_single_quotes(s)
293 if part
294 ]
295 )
296 return s
298 def xpath(self, parsed_selector: Tree) -> XPathExpr:
299 """Translate any parsed selector object."""
300 type_name = type(parsed_selector).__name__
301 method = getattr(self, "xpath_%s" % type_name.lower(), None)
302 if method is None:
303 raise ExpressionError("%s is not supported." % type_name)
304 return typing.cast(XPathExpr, method(parsed_selector))
306 # Dispatched by parsed object type
308 def xpath_combinedselector(self, combined: CombinedSelector) -> XPathExpr:
309 """Translate a combined selector."""
310 combinator = self.combinator_mapping[combined.combinator]
311 method = getattr(self, "xpath_%s_combinator" % combinator)
312 return typing.cast(
313 XPathExpr,
314 method(self.xpath(combined.selector), self.xpath(combined.subselector)),
315 )
317 def xpath_negation(self, negation: Negation) -> XPathExpr:
318 xpath = self.xpath(negation.selector)
319 sub_xpath = self.xpath(negation.subselector)
320 sub_xpath.add_name_test()
321 if sub_xpath.condition:
322 return xpath.add_condition("not(%s)" % sub_xpath.condition)
323 else:
324 return xpath.add_condition("0")
326 def xpath_relation(self, relation: Relation) -> XPathExpr:
327 xpath = self.xpath(relation.selector)
328 combinator = relation.combinator
329 subselector = relation.subselector
330 right = self.xpath(subselector.parsed_tree)
331 method = getattr(
332 self,
333 "xpath_relation_%s_combinator"
334 % self.combinator_mapping[typing.cast(str, combinator.value)],
335 )
336 return typing.cast(XPathExpr, method(xpath, right))
338 def xpath_matching(self, matching: Matching) -> XPathExpr:
339 xpath = self.xpath(matching.selector)
340 exprs = [self.xpath(selector) for selector in matching.selector_list]
341 for e in exprs:
342 e.add_name_test()
343 if e.condition:
344 xpath.add_condition(e.condition, "or")
345 return xpath
347 def xpath_specificityadjustment(self, matching: SpecificityAdjustment) -> XPathExpr:
348 xpath = self.xpath(matching.selector)
349 exprs = [self.xpath(selector) for selector in matching.selector_list]
350 for e in exprs:
351 e.add_name_test()
352 if e.condition:
353 xpath.add_condition(e.condition, "or")
354 return xpath
356 def xpath_function(self, function: Function) -> XPathExpr:
357 """Translate a functional pseudo-class."""
358 method_name = "xpath_%s_function" % function.name.replace("-", "_")
359 method = getattr(self, method_name, None)
360 if not method:
361 raise ExpressionError("The pseudo-class :%s() is unknown" % function.name)
362 return typing.cast(XPathExpr, method(self.xpath(function.selector), function))
364 def xpath_pseudo(self, pseudo: Pseudo) -> XPathExpr:
365 """Translate a pseudo-class."""
366 method_name = "xpath_%s_pseudo" % pseudo.ident.replace("-", "_")
367 method = getattr(self, method_name, None)
368 if not method:
369 # TODO: better error message for pseudo-elements?
370 raise ExpressionError("The pseudo-class :%s is unknown" % pseudo.ident)
371 return typing.cast(XPathExpr, method(self.xpath(pseudo.selector)))
373 def xpath_attrib(self, selector: Attrib) -> XPathExpr:
374 """Translate an attribute selector."""
375 operator = self.attribute_operator_mapping[selector.operator]
376 method = getattr(self, "xpath_attrib_%s" % operator)
377 if self.lower_case_attribute_names:
378 name = selector.attrib.lower()
379 else:
380 name = selector.attrib
381 safe = is_safe_name(name)
382 if selector.namespace:
383 name = "%s:%s" % (selector.namespace, name)
384 safe = safe and is_safe_name(selector.namespace)
385 if safe:
386 attrib = "@" + name
387 else:
388 attrib = "attribute::*[name() = %s]" % self.xpath_literal(name)
389 if selector.value is None:
390 value = None
391 elif self.lower_case_attribute_values:
392 value = typing.cast(str, selector.value.value).lower()
393 else:
394 value = selector.value.value
395 return typing.cast(
396 XPathExpr, method(self.xpath(selector.selector), attrib, value)
397 )
399 def xpath_class(self, class_selector: Class) -> XPathExpr:
400 """Translate a class selector."""
401 # .foo is defined as [class~=foo] in the spec.
402 xpath = self.xpath(class_selector.selector)
403 return self.xpath_attrib_includes(xpath, "@class", class_selector.class_name)
405 def xpath_hash(self, id_selector: Hash) -> XPathExpr:
406 """Translate an ID selector."""
407 xpath = self.xpath(id_selector.selector)
408 return self.xpath_attrib_equals(xpath, "@id", id_selector.id)
410 def xpath_element(self, selector: Element) -> XPathExpr:
411 """Translate a type or universal selector."""
412 element = selector.element
413 if not element:
414 element = "*"
415 safe = True
416 else:
417 safe = bool(is_safe_name(element))
418 if self.lower_case_element_names:
419 element = element.lower()
420 if selector.namespace:
421 # Namespace prefixes are case-sensitive.
422 # http://www.w3.org/TR/css3-namespace/#prefixes
423 element = "%s:%s" % (selector.namespace, element)
424 safe = safe and bool(is_safe_name(selector.namespace))
425 xpath = self.xpathexpr_cls(element=element)
426 if not safe:
427 xpath.add_name_test()
428 return xpath
430 # CombinedSelector: dispatch by combinator
432 def xpath_descendant_combinator(
433 self, left: XPathExpr, right: XPathExpr
434 ) -> XPathExpr:
435 """right is a child, grand-child or further descendant of left"""
436 return left.join("/descendant-or-self::*/", right)
438 def xpath_child_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr:
439 """right is an immediate child of left"""
440 return left.join("/", right)
442 def xpath_direct_adjacent_combinator(
443 self, left: XPathExpr, right: XPathExpr
444 ) -> XPathExpr:
445 """right is a sibling immediately after left"""
446 xpath = left.join("/following-sibling::", right)
447 xpath.add_name_test()
448 return xpath.add_condition("position() = 1")
450 def xpath_indirect_adjacent_combinator(
451 self, left: XPathExpr, right: XPathExpr
452 ) -> XPathExpr:
453 """right is a sibling after left, immediately or not"""
454 return left.join("/following-sibling::", right)
456 def xpath_relation_descendant_combinator(
457 self, left: XPathExpr, right: XPathExpr
458 ) -> XPathExpr:
459 """right is a child, grand-child or further descendant of left; select left"""
460 return left.join(
461 "[descendant::", right, closing_combiner="]", has_inner_condition=True
462 )
464 def xpath_relation_child_combinator(
465 self, left: XPathExpr, right: XPathExpr
466 ) -> XPathExpr:
467 """right is an immediate child of left; select left"""
468 return left.join("[./", right, closing_combiner="]")
470 def xpath_relation_direct_adjacent_combinator(
471 self, left: XPathExpr, right: XPathExpr
472 ) -> XPathExpr:
473 """right is a sibling immediately after left; select left"""
474 xpath = left.add_condition(
475 "following-sibling::*[(name() = '{}') and (position() = 1)]".format(
476 right.element
477 )
478 )
479 return xpath
481 def xpath_relation_indirect_adjacent_combinator(
482 self, left: XPathExpr, right: XPathExpr
483 ) -> XPathExpr:
484 """right is a sibling after left, immediately or not; select left"""
485 return left.join("[following-sibling::", right, closing_combiner="]")
487 # Function: dispatch by function/pseudo-class name
489 def xpath_nth_child_function(
490 self,
491 xpath: XPathExpr,
492 function: Function,
493 last: bool = False,
494 add_name_test: bool = True,
495 ) -> XPathExpr:
496 try:
497 a, b = parse_series(function.arguments)
498 except ValueError:
499 raise ExpressionError("Invalid series: '%r'" % function.arguments)
501 # From https://www.w3.org/TR/css3-selectors/#structural-pseudos:
502 #
503 # :nth-child(an+b)
504 # an+b-1 siblings before
505 #
506 # :nth-last-child(an+b)
507 # an+b-1 siblings after
508 #
509 # :nth-of-type(an+b)
510 # an+b-1 siblings with the same expanded element name before
511 #
512 # :nth-last-of-type(an+b)
513 # an+b-1 siblings with the same expanded element name after
514 #
515 # So,
516 # for :nth-child and :nth-of-type
517 #
518 # count(preceding-sibling::<nodetest>) = an+b-1
519 #
520 # for :nth-last-child and :nth-last-of-type
521 #
522 # count(following-sibling::<nodetest>) = an+b-1
523 #
524 # therefore,
525 # count(...) - (b-1) ≡ 0 (mod a)
526 #
527 # if a == 0:
528 # ~~~~~~~~~~
529 # count(...) = b-1
530 #
531 # if a < 0:
532 # ~~~~~~~~~
533 # count(...) - b +1 <= 0
534 # -> count(...) <= b-1
535 #
536 # if a > 0:
537 # ~~~~~~~~~
538 # count(...) - b +1 >= 0
539 # -> count(...) >= b-1
541 # work with b-1 instead
542 b_min_1 = b - 1
544 # early-exit condition 1:
545 # ~~~~~~~~~~~~~~~~~~~~~~~
546 # for a == 1, nth-*(an+b) means n+b-1 siblings before/after,
547 # and since n ∈ {0, 1, 2, ...}, if b-1<=0,
548 # there is always an "n" matching any number of siblings (maybe none)
549 if a == 1 and b_min_1 <= 0:
550 return xpath
552 # early-exit condition 2:
553 # ~~~~~~~~~~~~~~~~~~~~~~~
554 # an+b-1 siblings with a<0 and (b-1)<0 is not possible
555 if a < 0 and b_min_1 < 0:
556 return xpath.add_condition("0")
558 # `add_name_test` boolean is inverted and somewhat counter-intuitive:
559 #
560 # nth_of_type() calls nth_child(add_name_test=False)
561 if add_name_test:
562 nodetest = "*"
563 else:
564 nodetest = "%s" % xpath.element
566 # count siblings before or after the element
567 if not last:
568 siblings_count = "count(preceding-sibling::%s)" % nodetest
569 else:
570 siblings_count = "count(following-sibling::%s)" % nodetest
572 # special case of fixed position: nth-*(0n+b)
573 # if a == 0:
574 # ~~~~~~~~~~
575 # count(***-sibling::***) = b-1
576 if a == 0:
577 return xpath.add_condition("%s = %s" % (siblings_count, b_min_1))
579 expressions = []
581 if a > 0:
582 # siblings count, an+b-1, is always >= 0,
583 # so if a>0, and (b-1)<=0, an "n" exists to satisfy this,
584 # therefore, the predicate is only interesting if (b-1)>0
585 if b_min_1 > 0:
586 expressions.append("%s >= %s" % (siblings_count, b_min_1))
587 else:
588 # if a<0, and (b-1)<0, no "n" satisfies this,
589 # this is tested above as an early exist condition
590 # otherwise,
591 expressions.append("%s <= %s" % (siblings_count, b_min_1))
593 # operations modulo 1 or -1 are simpler, one only needs to verify:
594 #
595 # - either:
596 # count(***-sibling::***) - (b-1) = n = 0, 1, 2, 3, etc.,
597 # i.e. count(***-sibling::***) >= (b-1)
598 #
599 # - or:
600 # count(***-sibling::***) - (b-1) = -n = 0, -1, -2, -3, etc.,
601 # i.e. count(***-sibling::***) <= (b-1)
602 # we we just did above.
603 #
604 if abs(a) != 1:
605 # count(***-sibling::***) - (b-1) ≡ 0 (mod a)
606 left = siblings_count
608 # apply "modulo a" on 2nd term, -(b-1),
609 # to simplify things like "(... +6) % -3",
610 # and also make it positive with |a|
611 b_neg = (-b_min_1) % abs(a)
613 if b_neg != 0:
614 b_neg_as_str = "+%s" % b_neg
615 left = "(%s %s)" % (left, b_neg_as_str)
617 expressions.append("%s mod %s = 0" % (left, a))
619 if len(expressions) > 1:
620 template = "(%s)"
621 else:
622 template = "%s"
623 xpath.add_condition(
624 " and ".join(template % expression for expression in expressions)
625 )
626 return xpath
628 def xpath_nth_last_child_function(
629 self, xpath: XPathExpr, function: Function
630 ) -> XPathExpr:
631 return self.xpath_nth_child_function(xpath, function, last=True)
633 def xpath_nth_of_type_function(
634 self, xpath: XPathExpr, function: Function
635 ) -> XPathExpr:
636 if xpath.element == "*":
637 raise ExpressionError("*:nth-of-type() is not implemented")
638 return self.xpath_nth_child_function(xpath, function, add_name_test=False)
640 def xpath_nth_last_of_type_function(
641 self, xpath: XPathExpr, function: Function
642 ) -> XPathExpr:
643 if xpath.element == "*":
644 raise ExpressionError("*:nth-of-type() is not implemented")
645 return self.xpath_nth_child_function(
646 xpath, function, last=True, add_name_test=False
647 )
649 def xpath_contains_function(
650 self, xpath: XPathExpr, function: Function
651 ) -> XPathExpr:
652 # Defined there, removed in later drafts:
653 # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
654 if function.argument_types() not in (["STRING"], ["IDENT"]):
655 raise ExpressionError(
656 "Expected a single string or ident for :contains(), got %r"
657 % function.arguments
658 )
659 value = typing.cast(str, function.arguments[0].value)
660 return xpath.add_condition("contains(., %s)" % self.xpath_literal(value))
662 def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
663 if function.argument_types() not in (["STRING"], ["IDENT"]):
664 raise ExpressionError(
665 "Expected a single string or ident for :lang(), got %r"
666 % function.arguments
667 )
668 value = typing.cast(str, function.arguments[0].value)
669 return xpath.add_condition("lang(%s)" % (self.xpath_literal(value)))
671 # Pseudo: dispatch by pseudo-class name
673 def xpath_root_pseudo(self, xpath: XPathExpr) -> XPathExpr:
674 return xpath.add_condition("not(parent::*)")
676 # CSS immediate children (CSS ":scope > div" to XPath "child::div" or "./div")
677 # Works only at the start of a selector
678 # Needed to get immediate children of a processed selector in Scrapy
679 # for product in response.css('.product'):
680 # description = product.css(':scope > div::text').get()
681 def xpath_scope_pseudo(self, xpath: XPathExpr) -> XPathExpr:
682 return xpath.add_condition("1")
684 def xpath_first_child_pseudo(self, xpath: XPathExpr) -> XPathExpr:
685 return xpath.add_condition("count(preceding-sibling::*) = 0")
687 def xpath_last_child_pseudo(self, xpath: XPathExpr) -> XPathExpr:
688 return xpath.add_condition("count(following-sibling::*) = 0")
690 def xpath_first_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr:
691 if xpath.element == "*":
692 raise ExpressionError("*:first-of-type is not implemented")
693 return xpath.add_condition("count(preceding-sibling::%s) = 0" % xpath.element)
695 def xpath_last_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr:
696 if xpath.element == "*":
697 raise ExpressionError("*:last-of-type is not implemented")
698 return xpath.add_condition("count(following-sibling::%s) = 0" % xpath.element)
700 def xpath_only_child_pseudo(self, xpath: XPathExpr) -> XPathExpr:
701 return xpath.add_condition("count(parent::*/child::*) = 1")
703 def xpath_only_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr:
704 if xpath.element == "*":
705 raise ExpressionError("*:only-of-type is not implemented")
706 return xpath.add_condition("count(parent::*/child::%s) = 1" % xpath.element)
708 def xpath_empty_pseudo(self, xpath: XPathExpr) -> XPathExpr:
709 return xpath.add_condition("not(*) and not(string-length())")
711 def pseudo_never_matches(self, xpath: XPathExpr) -> XPathExpr:
712 """Common implementation for pseudo-classes that never match."""
713 return xpath.add_condition("0")
715 xpath_link_pseudo = pseudo_never_matches
716 xpath_visited_pseudo = pseudo_never_matches
717 xpath_hover_pseudo = pseudo_never_matches
718 xpath_active_pseudo = pseudo_never_matches
719 xpath_focus_pseudo = pseudo_never_matches
720 xpath_target_pseudo = pseudo_never_matches
721 xpath_enabled_pseudo = pseudo_never_matches
722 xpath_disabled_pseudo = pseudo_never_matches
723 xpath_checked_pseudo = pseudo_never_matches
725 # Attrib: dispatch by attribute operator
727 def xpath_attrib_exists(
728 self, xpath: XPathExpr, name: str, value: Optional[str]
729 ) -> XPathExpr:
730 assert not value
731 xpath.add_condition(name)
732 return xpath
734 def xpath_attrib_equals(
735 self, xpath: XPathExpr, name: str, value: Optional[str]
736 ) -> XPathExpr:
737 assert value is not None
738 xpath.add_condition("%s = %s" % (name, self.xpath_literal(value)))
739 return xpath
741 def xpath_attrib_different(
742 self, xpath: XPathExpr, name: str, value: Optional[str]
743 ) -> XPathExpr:
744 assert value is not None
745 # FIXME: this seems like a weird hack...
746 if value:
747 xpath.add_condition(
748 "not(%s) or %s != %s" % (name, name, self.xpath_literal(value))
749 )
750 else:
751 xpath.add_condition("%s != %s" % (name, self.xpath_literal(value)))
752 return xpath
754 def xpath_attrib_includes(
755 self, xpath: XPathExpr, name: str, value: Optional[str]
756 ) -> XPathExpr:
757 if value and is_non_whitespace(value):
758 xpath.add_condition(
759 "%s and contains(concat(' ', normalize-space(%s), ' '), %s)"
760 % (name, name, self.xpath_literal(" " + value + " "))
761 )
762 else:
763 xpath.add_condition("0")
764 return xpath
766 def xpath_attrib_dashmatch(
767 self, xpath: XPathExpr, name: str, value: Optional[str]
768 ) -> XPathExpr:
769 assert value is not None
770 # Weird, but true...
771 xpath.add_condition(
772 "%s and (%s = %s or starts-with(%s, %s))"
773 % (
774 name,
775 name,
776 self.xpath_literal(value),
777 name,
778 self.xpath_literal(value + "-"),
779 )
780 )
781 return xpath
783 def xpath_attrib_prefixmatch(
784 self, xpath: XPathExpr, name: str, value: Optional[str]
785 ) -> XPathExpr:
786 if value:
787 xpath.add_condition(
788 "%s and starts-with(%s, %s)" % (name, name, self.xpath_literal(value))
789 )
790 else:
791 xpath.add_condition("0")
792 return xpath
794 def xpath_attrib_suffixmatch(
795 self, xpath: XPathExpr, name: str, value: Optional[str]
796 ) -> XPathExpr:
797 if value:
798 # Oddly there is a starts-with in XPath 1.0, but not ends-with
799 xpath.add_condition(
800 "%s and substring(%s, string-length(%s)-%s) = %s"
801 % (name, name, name, len(value) - 1, self.xpath_literal(value))
802 )
803 else:
804 xpath.add_condition("0")
805 return xpath
807 def xpath_attrib_substringmatch(
808 self, xpath: XPathExpr, name: str, value: Optional[str]
809 ) -> XPathExpr:
810 if value:
811 # Attribute selectors are case sensitive
812 xpath.add_condition(
813 "%s and contains(%s, %s)" % (name, name, self.xpath_literal(value))
814 )
815 else:
816 xpath.add_condition("0")
817 return xpath
820class HTMLTranslator(GenericTranslator):
821 """
822 Translator for (X)HTML documents.
824 Has a more useful implementation of some pseudo-classes based on
825 HTML-specific element names and attribute names, as described in
826 the `HTML5 specification`_. It assumes no-quirks mode.
827 The API is the same as :class:`GenericTranslator`.
829 .. _HTML5 specification: http://www.w3.org/TR/html5/links.html#selectors
831 :param xhtml:
832 If false (the default), element names and attribute names
833 are case-insensitive.
835 """
837 lang_attribute = "lang"
839 def __init__(self, xhtml: bool = False) -> None:
840 self.xhtml = xhtml # Might be useful for sub-classes?
841 if not xhtml:
842 # See their definition in GenericTranslator.
843 self.lower_case_element_names = True
844 self.lower_case_attribute_names = True
846 def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore
847 # FIXME: is this really all the elements?
848 return xpath.add_condition(
849 "(@selected and name(.) = 'option') or "
850 "(@checked "
851 "and (name(.) = 'input' or name(.) = 'command')"
852 "and (@type = 'checkbox' or @type = 'radio'))"
853 )
855 def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
856 if function.argument_types() not in (["STRING"], ["IDENT"]):
857 raise ExpressionError(
858 "Expected a single string or ident for :lang(), got %r"
859 % function.arguments
860 )
861 value = function.arguments[0].value
862 assert value
863 return xpath.add_condition(
864 "ancestor-or-self::*[@lang][1][starts-with(concat("
865 # XPath 1.0 has no lower-case function...
866 "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
867 "'abcdefghijklmnopqrstuvwxyz'), "
868 "'-'), %s)]"
869 % (self.lang_attribute, self.xpath_literal(value.lower() + "-"))
870 )
872 def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore
873 return xpath.add_condition(
874 "@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area')"
875 )
877 # Links are never visited, the implementation for :visited is the same
878 # as in GenericTranslator
880 def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore
881 # http://www.w3.org/TR/html5/section-index.html#attributes-1
882 return xpath.add_condition(
883 """
884 (
885 @disabled and
886 (
887 (name(.) = 'input' and @type != 'hidden') or
888 name(.) = 'button' or
889 name(.) = 'select' or
890 name(.) = 'textarea' or
891 name(.) = 'command' or
892 name(.) = 'fieldset' or
893 name(.) = 'optgroup' or
894 name(.) = 'option'
895 )
896 ) or (
897 (
898 (name(.) = 'input' and @type != 'hidden') or
899 name(.) = 'button' or
900 name(.) = 'select' or
901 name(.) = 'textarea'
902 )
903 and ancestor::fieldset[@disabled]
904 )
905 """
906 )
907 # FIXME: in the second half, add "and is not a descendant of that
908 # fieldset element's first legend element child, if any."
910 def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore
911 # http://www.w3.org/TR/html5/section-index.html#attributes-1
912 return xpath.add_condition(
913 """
914 (
915 @href and (
916 name(.) = 'a' or
917 name(.) = 'link' or
918 name(.) = 'area'
919 )
920 ) or (
921 (
922 name(.) = 'command' or
923 name(.) = 'fieldset' or
924 name(.) = 'optgroup'
925 )
926 and not(@disabled)
927 ) or (
928 (
929 (name(.) = 'input' and @type != 'hidden') or
930 name(.) = 'button' or
931 name(.) = 'select' or
932 name(.) = 'textarea' or
933 name(.) = 'keygen'
934 )
935 and not (@disabled or ancestor::fieldset[@disabled])
936 ) or (
937 name(.) = 'option' and not(
938 @disabled or ancestor::optgroup[@disabled]
939 )
940 )
941 """
942 )
943 # FIXME: ... or "li elements that are children of menu elements,
944 # and that have a child element that defines a command, if the first
945 # such element's Disabled State facet is false (not disabled)".
946 # FIXME: after ancestor::fieldset[@disabled], add "and is not a
947 # descendant of that fieldset element's first legend element child,
948 # if any."