Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/cssselect/xpath.py: 75%
343 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-09 06:19 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-09 06:19 +0000
1# -*- coding: utf-8 -*-
2"""
3 cssselect.xpath
4 ===============
6 Translation of parsed CSS selectors to XPath expressions.
9 :copyright: (c) 2007-2012 Ian Bicking and contributors.
10 See AUTHORS for more details.
11 :license: BSD, see LICENSE for more details.
13"""
15import re
16import typing
17import warnings
18from typing import Optional
20from cssselect.parser import (
21 parse,
22 parse_series,
23 PseudoElement,
24 Selector,
25 SelectorError,
26 Tree,
27 Element,
28 Hash,
29 Class,
30 Function,
31 Pseudo,
32 Attrib,
33 Negation,
34 Relation,
35 Matching,
36 SpecificityAdjustment,
37 CombinedSelector,
38)
41@typing.no_type_check
42def _unicode_safe_getattr(obj, name, default=None):
43 warnings.warn(
44 "_unicode_safe_getattr is deprecated and will be removed in the"
45 " next release, use getattr() instead",
46 DeprecationWarning,
47 stacklevel=2,
48 )
49 return getattr(obj, name, default)
52class ExpressionError(SelectorError, RuntimeError):
53 """Unknown or unsupported selector (eg. pseudo-class)."""
56#### XPath Helpers
59class XPathExpr:
60 def __init__(
61 self, path: str = "", element: str = "*", condition: str = "", star_prefix: bool = False
62 ) -> None:
63 self.path = path
64 self.element = element
65 self.condition = condition
67 def __str__(self) -> str:
68 path = str(self.path) + str(self.element)
69 if self.condition:
70 path += "[%s]" % self.condition
71 return path
73 def __repr__(self) -> str:
74 return "%s[%s]" % (self.__class__.__name__, self)
76 def add_condition(self, condition: str, conjuction: str = "and") -> "XPathExpr":
77 if self.condition:
78 self.condition = "(%s) %s (%s)" % (self.condition, conjuction, condition)
79 else:
80 self.condition = condition
81 return self
83 def add_name_test(self) -> None:
84 if self.element == "*":
85 # We weren't doing a test anyway
86 return
87 self.add_condition("name() = %s" % GenericTranslator.xpath_literal(self.element))
88 self.element = "*"
90 def add_star_prefix(self) -> None:
91 """
92 Append '*/' to the path to keep the context constrained
93 to a single parent.
94 """
95 self.path += "*/"
97 def join(
98 self,
99 combiner: str,
100 other: "XPathExpr",
101 closing_combiner: Optional[str] = None,
102 has_inner_condition: bool = False,
103 ) -> "XPathExpr":
104 path = str(self) + combiner
105 # Any "star prefix" is redundant when joining.
106 if other.path != "*/":
107 path += other.path
108 self.path = path
109 if not has_inner_condition:
110 self.element = other.element + closing_combiner if closing_combiner else other.element
111 self.condition = other.condition
112 else:
113 self.element = other.element
114 if other.condition:
115 self.element += "[" + other.condition + "]"
116 if closing_combiner:
117 self.element += closing_combiner
118 return self
121split_at_single_quotes = re.compile("('+)").split
123# The spec is actually more permissive than that, but don’t bother.
124# This is just for the fast path.
125# http://www.w3.org/TR/REC-xml/#NT-NameStartChar
126is_safe_name = re.compile("^[a-zA-Z_][a-zA-Z0-9_.-]*$").match
128# Test that the string is not empty and does not contain whitespace
129is_non_whitespace = re.compile(r"^[^ \t\r\n\f]+$").match
132#### Translation
135class GenericTranslator:
136 """
137 Translator for "generic" XML documents.
139 Everything is case-sensitive, no assumption is made on the meaning
140 of element names and attribute names.
142 """
144 ####
145 #### HERE BE DRAGONS
146 ####
147 #### You are welcome to hook into this to change some behavior,
148 #### but do so at your own risks.
149 #### Until it has received a lot more work and review,
150 #### I reserve the right to change this API in backward-incompatible ways
151 #### with any minor version of cssselect.
152 #### See https://github.com/scrapy/cssselect/pull/22
153 #### -- Simon Sapin.
154 ####
156 combinator_mapping = {
157 " ": "descendant",
158 ">": "child",
159 "+": "direct_adjacent",
160 "~": "indirect_adjacent",
161 }
163 attribute_operator_mapping = {
164 "exists": "exists",
165 "=": "equals",
166 "~=": "includes",
167 "|=": "dashmatch",
168 "^=": "prefixmatch",
169 "$=": "suffixmatch",
170 "*=": "substringmatch",
171 "!=": "different", # XXX Not in Level 3 but meh
172 }
174 #: The attribute used for ID selectors depends on the document language:
175 #: http://www.w3.org/TR/selectors/#id-selectors
176 id_attribute = "id"
178 #: The attribute used for ``:lang()`` depends on the document language:
179 #: http://www.w3.org/TR/selectors/#lang-pseudo
180 lang_attribute = "xml:lang"
182 #: The case sensitivity of document language element names,
183 #: attribute names, and attribute values in selectors depends
184 #: on the document language.
185 #: http://www.w3.org/TR/selectors/#casesens
186 #:
187 #: When a document language defines one of these as case-insensitive,
188 #: cssselect assumes that the document parser makes the parsed values
189 #: lower-case. Making the selector lower-case too makes the comparaison
190 #: case-insensitive.
191 #:
192 #: In HTML, element names and attributes names (but not attribute values)
193 #: are case-insensitive. All of lxml.html, html5lib, BeautifulSoup4
194 #: and HTMLParser make them lower-case in their parse result, so
195 #: the assumption holds.
196 lower_case_element_names = False
197 lower_case_attribute_names = False
198 lower_case_attribute_values = False
200 # class used to represent and xpath expression
201 xpathexpr_cls = XPathExpr
203 def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
204 """Translate a *group of selectors* to XPath.
206 Pseudo-elements are not supported here since XPath only knows
207 about "real" elements.
209 :param css:
210 A *group of selectors* as a string.
211 :param prefix:
212 This string is prepended to the XPath expression for each selector.
213 The default makes selectors scoped to the context node’s subtree.
214 :raises:
215 :class:`~cssselect.SelectorSyntaxError` on invalid selectors,
216 :class:`ExpressionError` on unknown/unsupported selectors,
217 including pseudo-elements.
218 :returns:
219 The equivalent XPath 1.0 expression as a string.
221 """
222 return " | ".join(
223 self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
224 for selector in parse(css)
225 )
227 def selector_to_xpath(
228 self,
229 selector: Selector,
230 prefix: str = "descendant-or-self::",
231 translate_pseudo_elements: bool = False,
232 ) -> str:
233 """Translate a parsed selector to XPath.
236 :param selector:
237 A parsed :class:`Selector` object.
238 :param prefix:
239 This string is prepended to the resulting XPath expression.
240 The default makes selectors scoped to the context node’s subtree.
241 :param translate_pseudo_elements:
242 Unless this is set to ``True`` (as :meth:`css_to_xpath` does),
243 the :attr:`~Selector.pseudo_element` attribute of the selector
244 is ignored.
245 It is the caller's responsibility to reject selectors
246 with pseudo-elements, or to account for them somehow.
247 :raises:
248 :class:`ExpressionError` on unknown/unsupported selectors.
249 :returns:
250 The equivalent XPath 1.0 expression as a string.
252 """
253 tree = getattr(selector, "parsed_tree", None)
254 if not tree:
255 raise TypeError("Expected a parsed selector, got %r" % (selector,))
256 xpath = self.xpath(tree)
257 assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return'
258 if translate_pseudo_elements and selector.pseudo_element:
259 xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
260 return (prefix or "") + str(xpath)
262 def xpath_pseudo_element(self, xpath: XPathExpr, pseudo_element: PseudoElement) -> XPathExpr:
263 """Translate a pseudo-element.
265 Defaults to not supporting pseudo-elements at all,
266 but can be overridden by sub-classes.
268 """
269 raise ExpressionError("Pseudo-elements are not supported.")
271 @staticmethod
272 def xpath_literal(s: str) -> str:
273 s = str(s)
274 if "'" not in s:
275 s = "'%s'" % s
276 elif '"' not in s:
277 s = '"%s"' % s
278 else:
279 s = "concat(%s)" % ",".join(
280 [
281 (("'" in part) and '"%s"' or "'%s'") % part
282 for part in split_at_single_quotes(s)
283 if part
284 ]
285 )
286 return s
288 def xpath(self, parsed_selector: Tree) -> XPathExpr:
289 """Translate any parsed selector object."""
290 type_name = type(parsed_selector).__name__
291 method = getattr(self, "xpath_%s" % type_name.lower(), None)
292 if method is None:
293 raise ExpressionError("%s is not supported." % type_name)
294 return typing.cast(XPathExpr, method(parsed_selector))
296 # Dispatched by parsed object type
298 def xpath_combinedselector(self, combined: CombinedSelector) -> XPathExpr:
299 """Translate a combined selector."""
300 combinator = self.combinator_mapping[combined.combinator]
301 method = getattr(self, "xpath_%s_combinator" % combinator)
302 return typing.cast(
303 XPathExpr, method(self.xpath(combined.selector), self.xpath(combined.subselector))
304 )
306 def xpath_negation(self, negation: Negation) -> XPathExpr:
307 xpath = self.xpath(negation.selector)
308 sub_xpath = self.xpath(negation.subselector)
309 sub_xpath.add_name_test()
310 if sub_xpath.condition:
311 return xpath.add_condition("not(%s)" % sub_xpath.condition)
312 else:
313 return xpath.add_condition("0")
315 def xpath_relation(self, relation: Relation) -> XPathExpr:
316 xpath = self.xpath(relation.selector)
317 combinator = relation.combinator
318 subselector = relation.subselector
319 right = self.xpath(subselector.parsed_tree)
320 method = getattr(
321 self,
322 "xpath_relation_%s_combinator"
323 % self.combinator_mapping[typing.cast(str, combinator.value)],
324 )
325 return typing.cast(XPathExpr, method(xpath, right))
327 def xpath_matching(self, matching: Matching) -> XPathExpr:
328 xpath = self.xpath(matching.selector)
329 exprs = [self.xpath(selector) for selector in matching.selector_list]
330 for e in exprs:
331 e.add_name_test()
332 if e.condition:
333 xpath.add_condition(e.condition, "or")
334 return xpath
336 def xpath_specificityadjustment(self, matching: SpecificityAdjustment) -> XPathExpr:
337 xpath = self.xpath(matching.selector)
338 exprs = [self.xpath(selector) for selector in matching.selector_list]
339 for e in exprs:
340 e.add_name_test()
341 if e.condition:
342 xpath.add_condition(e.condition, "or")
343 return xpath
345 def xpath_function(self, function: Function) -> XPathExpr:
346 """Translate a functional pseudo-class."""
347 method_name = "xpath_%s_function" % function.name.replace("-", "_")
348 method = getattr(self, method_name, None)
349 if not method:
350 raise ExpressionError("The pseudo-class :%s() is unknown" % function.name)
351 return typing.cast(XPathExpr, method(self.xpath(function.selector), function))
353 def xpath_pseudo(self, pseudo: Pseudo) -> XPathExpr:
354 """Translate a pseudo-class."""
355 method_name = "xpath_%s_pseudo" % pseudo.ident.replace("-", "_")
356 method = getattr(self, method_name, None)
357 if not method:
358 # TODO: better error message for pseudo-elements?
359 raise ExpressionError("The pseudo-class :%s is unknown" % pseudo.ident)
360 return typing.cast(XPathExpr, method(self.xpath(pseudo.selector)))
362 def xpath_attrib(self, selector: Attrib) -> XPathExpr:
363 """Translate an attribute selector."""
364 operator = self.attribute_operator_mapping[selector.operator]
365 method = getattr(self, "xpath_attrib_%s" % operator)
366 if self.lower_case_attribute_names:
367 name = selector.attrib.lower()
368 else:
369 name = selector.attrib
370 safe = is_safe_name(name)
371 if selector.namespace:
372 name = "%s:%s" % (selector.namespace, name)
373 safe = safe and is_safe_name(selector.namespace)
374 if safe:
375 attrib = "@" + name
376 else:
377 attrib = "attribute::*[name() = %s]" % self.xpath_literal(name)
378 if selector.value is None:
379 value = None
380 elif self.lower_case_attribute_values:
381 value = typing.cast(str, selector.value.value).lower()
382 else:
383 value = selector.value.value
384 return typing.cast(XPathExpr, method(self.xpath(selector.selector), attrib, value))
386 def xpath_class(self, class_selector: Class) -> XPathExpr:
387 """Translate a class selector."""
388 # .foo is defined as [class~=foo] in the spec.
389 xpath = self.xpath(class_selector.selector)
390 return self.xpath_attrib_includes(xpath, "@class", class_selector.class_name)
392 def xpath_hash(self, id_selector: Hash) -> XPathExpr:
393 """Translate an ID selector."""
394 xpath = self.xpath(id_selector.selector)
395 return self.xpath_attrib_equals(xpath, "@id", id_selector.id)
397 def xpath_element(self, selector: Element) -> XPathExpr:
398 """Translate a type or universal selector."""
399 element = selector.element
400 if not element:
401 element = "*"
402 safe = True
403 else:
404 safe = bool(is_safe_name(element))
405 if self.lower_case_element_names:
406 element = element.lower()
407 if selector.namespace:
408 # Namespace prefixes are case-sensitive.
409 # http://www.w3.org/TR/css3-namespace/#prefixes
410 element = "%s:%s" % (selector.namespace, element)
411 safe = safe and bool(is_safe_name(selector.namespace))
412 xpath = self.xpathexpr_cls(element=element)
413 if not safe:
414 xpath.add_name_test()
415 return xpath
417 # CombinedSelector: dispatch by combinator
419 def xpath_descendant_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr:
420 """right is a child, grand-child or further descendant of left"""
421 return left.join("/descendant-or-self::*/", right)
423 def xpath_child_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr:
424 """right is an immediate child of left"""
425 return left.join("/", right)
427 def xpath_direct_adjacent_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr:
428 """right is a sibling immediately after left"""
429 xpath = left.join("/following-sibling::", right)
430 xpath.add_name_test()
431 return xpath.add_condition("position() = 1")
433 def xpath_indirect_adjacent_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr:
434 """right is a sibling after left, immediately or not"""
435 return left.join("/following-sibling::", right)
437 def xpath_relation_descendant_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr:
438 """right is a child, grand-child or further descendant of left; select left"""
439 return left.join("[descendant::", right, closing_combiner="]", has_inner_condition=True)
441 def xpath_relation_child_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr:
442 """right is an immediate child of left; select left"""
443 return left.join("[./", right, closing_combiner="]")
445 def xpath_relation_direct_adjacent_combinator(
446 self, left: XPathExpr, right: XPathExpr
447 ) -> XPathExpr:
448 """right is a sibling immediately after left; select left"""
449 xpath = left.add_condition(
450 "following-sibling::*[(name() = '{}') and (position() = 1)]".format(right.element)
451 )
452 return xpath
454 def xpath_relation_indirect_adjacent_combinator(
455 self, left: XPathExpr, right: XPathExpr
456 ) -> XPathExpr:
457 """right is a sibling after left, immediately or not; select left"""
458 return left.join("[following-sibling::", right, closing_combiner="]")
460 # Function: dispatch by function/pseudo-class name
462 def xpath_nth_child_function(
463 self, xpath: XPathExpr, function: Function, last: bool = False, add_name_test: bool = True
464 ) -> XPathExpr:
465 try:
466 a, b = parse_series(function.arguments)
467 except ValueError:
468 raise ExpressionError("Invalid series: '%r'" % function.arguments)
470 # From https://www.w3.org/TR/css3-selectors/#structural-pseudos:
471 #
472 # :nth-child(an+b)
473 # an+b-1 siblings before
474 #
475 # :nth-last-child(an+b)
476 # an+b-1 siblings after
477 #
478 # :nth-of-type(an+b)
479 # an+b-1 siblings with the same expanded element name before
480 #
481 # :nth-last-of-type(an+b)
482 # an+b-1 siblings with the same expanded element name after
483 #
484 # So,
485 # for :nth-child and :nth-of-type
486 #
487 # count(preceding-sibling::<nodetest>) = an+b-1
488 #
489 # for :nth-last-child and :nth-last-of-type
490 #
491 # count(following-sibling::<nodetest>) = an+b-1
492 #
493 # therefore,
494 # count(...) - (b-1) ≡ 0 (mod a)
495 #
496 # if a == 0:
497 # ~~~~~~~~~~
498 # count(...) = b-1
499 #
500 # if a < 0:
501 # ~~~~~~~~~
502 # count(...) - b +1 <= 0
503 # -> count(...) <= b-1
504 #
505 # if a > 0:
506 # ~~~~~~~~~
507 # count(...) - b +1 >= 0
508 # -> count(...) >= b-1
510 # work with b-1 instead
511 b_min_1 = b - 1
513 # early-exit condition 1:
514 # ~~~~~~~~~~~~~~~~~~~~~~~
515 # for a == 1, nth-*(an+b) means n+b-1 siblings before/after,
516 # and since n ∈ {0, 1, 2, ...}, if b-1<=0,
517 # there is always an "n" matching any number of siblings (maybe none)
518 if a == 1 and b_min_1 <= 0:
519 return xpath
521 # early-exit condition 2:
522 # ~~~~~~~~~~~~~~~~~~~~~~~
523 # an+b-1 siblings with a<0 and (b-1)<0 is not possible
524 if a < 0 and b_min_1 < 0:
525 return xpath.add_condition("0")
527 # `add_name_test` boolean is inverted and somewhat counter-intuitive:
528 #
529 # nth_of_type() calls nth_child(add_name_test=False)
530 if add_name_test:
531 nodetest = "*"
532 else:
533 nodetest = "%s" % xpath.element
535 # count siblings before or after the element
536 if not last:
537 siblings_count = "count(preceding-sibling::%s)" % nodetest
538 else:
539 siblings_count = "count(following-sibling::%s)" % nodetest
541 # special case of fixed position: nth-*(0n+b)
542 # if a == 0:
543 # ~~~~~~~~~~
544 # count(***-sibling::***) = b-1
545 if a == 0:
546 return xpath.add_condition("%s = %s" % (siblings_count, b_min_1))
548 expressions = []
550 if a > 0:
551 # siblings count, an+b-1, is always >= 0,
552 # so if a>0, and (b-1)<=0, an "n" exists to satisfy this,
553 # therefore, the predicate is only interesting if (b-1)>0
554 if b_min_1 > 0:
555 expressions.append("%s >= %s" % (siblings_count, b_min_1))
556 else:
557 # if a<0, and (b-1)<0, no "n" satisfies this,
558 # this is tested above as an early exist condition
559 # otherwise,
560 expressions.append("%s <= %s" % (siblings_count, b_min_1))
562 # operations modulo 1 or -1 are simpler, one only needs to verify:
563 #
564 # - either:
565 # count(***-sibling::***) - (b-1) = n = 0, 1, 2, 3, etc.,
566 # i.e. count(***-sibling::***) >= (b-1)
567 #
568 # - or:
569 # count(***-sibling::***) - (b-1) = -n = 0, -1, -2, -3, etc.,
570 # i.e. count(***-sibling::***) <= (b-1)
571 # we we just did above.
572 #
573 if abs(a) != 1:
574 # count(***-sibling::***) - (b-1) ≡ 0 (mod a)
575 left = siblings_count
577 # apply "modulo a" on 2nd term, -(b-1),
578 # to simplify things like "(... +6) % -3",
579 # and also make it positive with |a|
580 b_neg = (-b_min_1) % abs(a)
582 if b_neg != 0:
583 b_neg_as_str = "+%s" % b_neg
584 left = "(%s %s)" % (left, b_neg_as_str)
586 expressions.append("%s mod %s = 0" % (left, a))
588 if len(expressions) > 1:
589 template = "(%s)"
590 else:
591 template = "%s"
592 xpath.add_condition(" and ".join(template % expression for expression in expressions))
593 return xpath
595 def xpath_nth_last_child_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
596 return self.xpath_nth_child_function(xpath, function, last=True)
598 def xpath_nth_of_type_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
599 if xpath.element == "*":
600 raise ExpressionError("*:nth-of-type() is not implemented")
601 return self.xpath_nth_child_function(xpath, function, add_name_test=False)
603 def xpath_nth_last_of_type_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
604 if xpath.element == "*":
605 raise ExpressionError("*:nth-of-type() is not implemented")
606 return self.xpath_nth_child_function(xpath, function, last=True, add_name_test=False)
608 def xpath_contains_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
609 # Defined there, removed in later drafts:
610 # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
611 if function.argument_types() not in (["STRING"], ["IDENT"]):
612 raise ExpressionError(
613 "Expected a single string or ident for :contains(), got %r" % function.arguments
614 )
615 value = typing.cast(str, function.arguments[0].value)
616 return xpath.add_condition("contains(., %s)" % self.xpath_literal(value))
618 def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
619 if function.argument_types() not in (["STRING"], ["IDENT"]):
620 raise ExpressionError(
621 "Expected a single string or ident for :lang(), got %r" % function.arguments
622 )
623 value = typing.cast(str, function.arguments[0].value)
624 return xpath.add_condition("lang(%s)" % (self.xpath_literal(value)))
626 # Pseudo: dispatch by pseudo-class name
628 def xpath_root_pseudo(self, xpath: XPathExpr) -> XPathExpr:
629 return xpath.add_condition("not(parent::*)")
631 # CSS immediate children (CSS ":scope > div" to XPath "child::div" or "./div")
632 # Works only at the start of a selector
633 # Needed to get immediate children of a processed selector in Scrapy
634 # for product in response.css('.product'):
635 # description = product.css(':scope > div::text').get()
636 def xpath_scope_pseudo(self, xpath: XPathExpr) -> XPathExpr:
637 return xpath.add_condition("1")
639 def xpath_first_child_pseudo(self, xpath: XPathExpr) -> XPathExpr:
640 return xpath.add_condition("count(preceding-sibling::*) = 0")
642 def xpath_last_child_pseudo(self, xpath: XPathExpr) -> XPathExpr:
643 return xpath.add_condition("count(following-sibling::*) = 0")
645 def xpath_first_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr:
646 if xpath.element == "*":
647 raise ExpressionError("*:first-of-type is not implemented")
648 return xpath.add_condition("count(preceding-sibling::%s) = 0" % xpath.element)
650 def xpath_last_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr:
651 if xpath.element == "*":
652 raise ExpressionError("*:last-of-type is not implemented")
653 return xpath.add_condition("count(following-sibling::%s) = 0" % xpath.element)
655 def xpath_only_child_pseudo(self, xpath: XPathExpr) -> XPathExpr:
656 return xpath.add_condition("count(parent::*/child::*) = 1")
658 def xpath_only_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr:
659 if xpath.element == "*":
660 raise ExpressionError("*:only-of-type is not implemented")
661 return xpath.add_condition("count(parent::*/child::%s) = 1" % xpath.element)
663 def xpath_empty_pseudo(self, xpath: XPathExpr) -> XPathExpr:
664 return xpath.add_condition("not(*) and not(string-length())")
666 def pseudo_never_matches(self, xpath: XPathExpr) -> XPathExpr:
667 """Common implementation for pseudo-classes that never match."""
668 return xpath.add_condition("0")
670 xpath_link_pseudo = pseudo_never_matches
671 xpath_visited_pseudo = pseudo_never_matches
672 xpath_hover_pseudo = pseudo_never_matches
673 xpath_active_pseudo = pseudo_never_matches
674 xpath_focus_pseudo = pseudo_never_matches
675 xpath_target_pseudo = pseudo_never_matches
676 xpath_enabled_pseudo = pseudo_never_matches
677 xpath_disabled_pseudo = pseudo_never_matches
678 xpath_checked_pseudo = pseudo_never_matches
680 # Attrib: dispatch by attribute operator
682 def xpath_attrib_exists(self, xpath: XPathExpr, name: str, value: Optional[str]) -> XPathExpr:
683 assert not value
684 xpath.add_condition(name)
685 return xpath
687 def xpath_attrib_equals(self, xpath: XPathExpr, name: str, value: Optional[str]) -> XPathExpr:
688 assert value is not None
689 xpath.add_condition("%s = %s" % (name, self.xpath_literal(value)))
690 return xpath
692 def xpath_attrib_different(
693 self, xpath: XPathExpr, name: str, value: Optional[str]
694 ) -> XPathExpr:
695 assert value is not None
696 # FIXME: this seems like a weird hack...
697 if value:
698 xpath.add_condition("not(%s) or %s != %s" % (name, name, self.xpath_literal(value)))
699 else:
700 xpath.add_condition("%s != %s" % (name, self.xpath_literal(value)))
701 return xpath
703 def xpath_attrib_includes(
704 self, xpath: XPathExpr, name: str, value: Optional[str]
705 ) -> XPathExpr:
706 if value and is_non_whitespace(value):
707 xpath.add_condition(
708 "%s and contains(concat(' ', normalize-space(%s), ' '), %s)"
709 % (name, name, self.xpath_literal(" " + value + " "))
710 )
711 else:
712 xpath.add_condition("0")
713 return xpath
715 def xpath_attrib_dashmatch(
716 self, xpath: XPathExpr, name: str, value: Optional[str]
717 ) -> XPathExpr:
718 assert value is not None
719 # Weird, but true...
720 xpath.add_condition(
721 "%s and (%s = %s or starts-with(%s, %s))"
722 % (name, name, self.xpath_literal(value), name, self.xpath_literal(value + "-"))
723 )
724 return xpath
726 def xpath_attrib_prefixmatch(
727 self, xpath: XPathExpr, name: str, value: Optional[str]
728 ) -> XPathExpr:
729 if value:
730 xpath.add_condition(
731 "%s and starts-with(%s, %s)" % (name, name, self.xpath_literal(value))
732 )
733 else:
734 xpath.add_condition("0")
735 return xpath
737 def xpath_attrib_suffixmatch(
738 self, xpath: XPathExpr, name: str, value: Optional[str]
739 ) -> XPathExpr:
740 if value:
741 # Oddly there is a starts-with in XPath 1.0, but not ends-with
742 xpath.add_condition(
743 "%s and substring(%s, string-length(%s)-%s) = %s"
744 % (name, name, name, len(value) - 1, self.xpath_literal(value))
745 )
746 else:
747 xpath.add_condition("0")
748 return xpath
750 def xpath_attrib_substringmatch(
751 self, xpath: XPathExpr, name: str, value: Optional[str]
752 ) -> XPathExpr:
753 if value:
754 # Attribute selectors are case sensitive
755 xpath.add_condition(
756 "%s and contains(%s, %s)" % (name, name, self.xpath_literal(value))
757 )
758 else:
759 xpath.add_condition("0")
760 return xpath
763class HTMLTranslator(GenericTranslator):
764 """
765 Translator for (X)HTML documents.
767 Has a more useful implementation of some pseudo-classes based on
768 HTML-specific element names and attribute names, as described in
769 the `HTML5 specification`_. It assumes no-quirks mode.
770 The API is the same as :class:`GenericTranslator`.
772 .. _HTML5 specification: http://www.w3.org/TR/html5/links.html#selectors
774 :param xhtml:
775 If false (the default), element names and attribute names
776 are case-insensitive.
778 """
780 lang_attribute = "lang"
782 def __init__(self, xhtml: bool = False) -> None:
783 self.xhtml = xhtml # Might be useful for sub-classes?
784 if not xhtml:
785 # See their definition in GenericTranslator.
786 self.lower_case_element_names = True
787 self.lower_case_attribute_names = True
789 def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore
790 # FIXME: is this really all the elements?
791 return xpath.add_condition(
792 "(@selected and name(.) = 'option') or "
793 "(@checked "
794 "and (name(.) = 'input' or name(.) = 'command')"
795 "and (@type = 'checkbox' or @type = 'radio'))"
796 )
798 def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
799 if function.argument_types() not in (["STRING"], ["IDENT"]):
800 raise ExpressionError(
801 "Expected a single string or ident for :lang(), got %r" % function.arguments
802 )
803 value = function.arguments[0].value
804 assert value
805 return xpath.add_condition(
806 "ancestor-or-self::*[@lang][1][starts-with(concat("
807 # XPath 1.0 has no lower-case function...
808 "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
809 "'abcdefghijklmnopqrstuvwxyz'), "
810 "'-'), %s)]" % (self.lang_attribute, self.xpath_literal(value.lower() + "-"))
811 )
813 def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore
814 return xpath.add_condition(
815 "@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area')"
816 )
818 # Links are never visited, the implementation for :visited is the same
819 # as in GenericTranslator
821 def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore
822 # http://www.w3.org/TR/html5/section-index.html#attributes-1
823 return xpath.add_condition(
824 """
825 (
826 @disabled and
827 (
828 (name(.) = 'input' and @type != 'hidden') or
829 name(.) = 'button' or
830 name(.) = 'select' or
831 name(.) = 'textarea' or
832 name(.) = 'command' or
833 name(.) = 'fieldset' or
834 name(.) = 'optgroup' or
835 name(.) = 'option'
836 )
837 ) or (
838 (
839 (name(.) = 'input' and @type != 'hidden') or
840 name(.) = 'button' or
841 name(.) = 'select' or
842 name(.) = 'textarea'
843 )
844 and ancestor::fieldset[@disabled]
845 )
846 """
847 )
848 # FIXME: in the second half, add "and is not a descendant of that
849 # fieldset element's first legend element child, if any."
851 def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore
852 # http://www.w3.org/TR/html5/section-index.html#attributes-1
853 return xpath.add_condition(
854 """
855 (
856 @href and (
857 name(.) = 'a' or
858 name(.) = 'link' or
859 name(.) = 'area'
860 )
861 ) or (
862 (
863 name(.) = 'command' or
864 name(.) = 'fieldset' or
865 name(.) = 'optgroup'
866 )
867 and not(@disabled)
868 ) or (
869 (
870 (name(.) = 'input' and @type != 'hidden') or
871 name(.) = 'button' or
872 name(.) = 'select' or
873 name(.) = 'textarea' or
874 name(.) = 'keygen'
875 )
876 and not (@disabled or ancestor::fieldset[@disabled])
877 ) or (
878 name(.) = 'option' and not(
879 @disabled or ancestor::optgroup[@disabled]
880 )
881 )
882 """
883 )
884 # FIXME: ... or "li elements that are children of menu elements,
885 # and that have a child element that defines a command, if the first
886 # such element's Disabled State facet is false (not disabled)".
887 # FIXME: after ancestor::fieldset[@disabled], add "and is not a
888 # descendant of that fieldset element's first legend element child,
889 # if any."