Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/cssselect/xpath.py: 75%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

343 statements  

1# -*- coding: utf-8 -*- 

2""" 

3 cssselect.xpath 

4 =============== 

5 

6 Translation of parsed CSS selectors to XPath expressions. 

7 

8 

9 :copyright: (c) 2007-2012 Ian Bicking and contributors. 

10 See AUTHORS for more details. 

11 :license: BSD, see LICENSE for more details. 

12 

13""" 

14 

15import re 

16import typing 

17import warnings 

18from typing import Optional 

19 

20from cssselect.parser import ( 

21 Attrib, 

22 Class, 

23 CombinedSelector, 

24 Element, 

25 Function, 

26 Hash, 

27 Matching, 

28 Negation, 

29 Pseudo, 

30 PseudoElement, 

31 Relation, 

32 Selector, 

33 SelectorError, 

34 SpecificityAdjustment, 

35 Tree, 

36 parse, 

37 parse_series, 

38) 

39 

40 

41@typing.no_type_check 

42def _unicode_safe_getattr(obj, name, default=None): 

43 warnings.warn( 

44 "_unicode_safe_getattr is deprecated and will be removed in the" 

45 " next release, use getattr() instead", 

46 DeprecationWarning, 

47 stacklevel=2, 

48 ) 

49 return getattr(obj, name, default) 

50 

51 

52class ExpressionError(SelectorError, RuntimeError): 

53 """Unknown or unsupported selector (eg. pseudo-class).""" 

54 

55 

56#### XPath Helpers 

57 

58 

59class XPathExpr: 

60 def __init__( 

61 self, 

62 path: str = "", 

63 element: str = "*", 

64 condition: str = "", 

65 star_prefix: bool = False, 

66 ) -> None: 

67 self.path = path 

68 self.element = element 

69 self.condition = condition 

70 

71 def __str__(self) -> str: 

72 path = str(self.path) + str(self.element) 

73 if self.condition: 

74 path += "[%s]" % self.condition 

75 return path 

76 

77 def __repr__(self) -> str: 

78 return "%s[%s]" % (self.__class__.__name__, self) 

79 

80 def add_condition(self, condition: str, conjuction: str = "and") -> "XPathExpr": 

81 if self.condition: 

82 self.condition = "(%s) %s (%s)" % (self.condition, conjuction, condition) 

83 else: 

84 self.condition = condition 

85 return self 

86 

87 def add_name_test(self) -> None: 

88 if self.element == "*": 

89 # We weren't doing a test anyway 

90 return 

91 self.add_condition( 

92 "name() = %s" % GenericTranslator.xpath_literal(self.element) 

93 ) 

94 self.element = "*" 

95 

96 def add_star_prefix(self) -> None: 

97 """ 

98 Append '*/' to the path to keep the context constrained 

99 to a single parent. 

100 """ 

101 self.path += "*/" 

102 

103 def join( 

104 self, 

105 combiner: str, 

106 other: "XPathExpr", 

107 closing_combiner: Optional[str] = None, 

108 has_inner_condition: bool = False, 

109 ) -> "XPathExpr": 

110 path = str(self) + combiner 

111 # Any "star prefix" is redundant when joining. 

112 if other.path != "*/": 

113 path += other.path 

114 self.path = path 

115 if not has_inner_condition: 

116 self.element = ( 

117 other.element + closing_combiner if closing_combiner else other.element 

118 ) 

119 self.condition = other.condition 

120 else: 

121 self.element = other.element 

122 if other.condition: 

123 self.element += "[" + other.condition + "]" 

124 if closing_combiner: 

125 self.element += closing_combiner 

126 return self 

127 

128 

129split_at_single_quotes = re.compile("('+)").split 

130 

131# The spec is actually more permissive than that, but don’t bother. 

132# This is just for the fast path. 

133# http://www.w3.org/TR/REC-xml/#NT-NameStartChar 

134is_safe_name = re.compile("^[a-zA-Z_][a-zA-Z0-9_.-]*$").match 

135 

136# Test that the string is not empty and does not contain whitespace 

137is_non_whitespace = re.compile(r"^[^ \t\r\n\f]+$").match 

138 

139 

140#### Translation 

141 

142 

143class GenericTranslator: 

144 """ 

145 Translator for "generic" XML documents. 

146 

147 Everything is case-sensitive, no assumption is made on the meaning 

148 of element names and attribute names. 

149 

150 """ 

151 

152 #### 

153 #### HERE BE DRAGONS 

154 #### 

155 #### You are welcome to hook into this to change some behavior, 

156 #### but do so at your own risks. 

157 #### Until it has received a lot more work and review, 

158 #### I reserve the right to change this API in backward-incompatible ways 

159 #### with any minor version of cssselect. 

160 #### See https://github.com/scrapy/cssselect/pull/22 

161 #### -- Simon Sapin. 

162 #### 

163 

164 combinator_mapping = { 

165 " ": "descendant", 

166 ">": "child", 

167 "+": "direct_adjacent", 

168 "~": "indirect_adjacent", 

169 } 

170 

171 attribute_operator_mapping = { 

172 "exists": "exists", 

173 "=": "equals", 

174 "~=": "includes", 

175 "|=": "dashmatch", 

176 "^=": "prefixmatch", 

177 "$=": "suffixmatch", 

178 "*=": "substringmatch", 

179 "!=": "different", # XXX Not in Level 3 but meh 

180 } 

181 

182 #: The attribute used for ID selectors depends on the document language: 

183 #: http://www.w3.org/TR/selectors/#id-selectors 

184 id_attribute = "id" 

185 

186 #: The attribute used for ``:lang()`` depends on the document language: 

187 #: http://www.w3.org/TR/selectors/#lang-pseudo 

188 lang_attribute = "xml:lang" 

189 

190 #: The case sensitivity of document language element names, 

191 #: attribute names, and attribute values in selectors depends 

192 #: on the document language. 

193 #: http://www.w3.org/TR/selectors/#casesens 

194 #: 

195 #: When a document language defines one of these as case-insensitive, 

196 #: cssselect assumes that the document parser makes the parsed values 

197 #: lower-case. Making the selector lower-case too makes the comparaison 

198 #: case-insensitive. 

199 #: 

200 #: In HTML, element names and attributes names (but not attribute values) 

201 #: are case-insensitive. All of lxml.html, html5lib, BeautifulSoup4 

202 #: and HTMLParser make them lower-case in their parse result, so 

203 #: the assumption holds. 

204 lower_case_element_names = False 

205 lower_case_attribute_names = False 

206 lower_case_attribute_values = False 

207 

208 # class used to represent and xpath expression 

209 xpathexpr_cls = XPathExpr 

210 

211 def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str: 

212 """Translate a *group of selectors* to XPath. 

213 

214 Pseudo-elements are not supported here since XPath only knows 

215 about "real" elements. 

216 

217 :param css: 

218 A *group of selectors* as a string. 

219 :param prefix: 

220 This string is prepended to the XPath expression for each selector. 

221 The default makes selectors scoped to the context node’s subtree. 

222 :raises: 

223 :class:`~cssselect.SelectorSyntaxError` on invalid selectors, 

224 :class:`ExpressionError` on unknown/unsupported selectors, 

225 including pseudo-elements. 

226 :returns: 

227 The equivalent XPath 1.0 expression as a string. 

228 

229 """ 

230 return " | ".join( 

231 self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True) 

232 for selector in parse(css) 

233 ) 

234 

235 def selector_to_xpath( 

236 self, 

237 selector: Selector, 

238 prefix: str = "descendant-or-self::", 

239 translate_pseudo_elements: bool = False, 

240 ) -> str: 

241 """Translate a parsed selector to XPath. 

242 

243 

244 :param selector: 

245 A parsed :class:`Selector` object. 

246 :param prefix: 

247 This string is prepended to the resulting XPath expression. 

248 The default makes selectors scoped to the context node’s subtree. 

249 :param translate_pseudo_elements: 

250 Unless this is set to ``True`` (as :meth:`css_to_xpath` does), 

251 the :attr:`~Selector.pseudo_element` attribute of the selector 

252 is ignored. 

253 It is the caller's responsibility to reject selectors 

254 with pseudo-elements, or to account for them somehow. 

255 :raises: 

256 :class:`ExpressionError` on unknown/unsupported selectors. 

257 :returns: 

258 The equivalent XPath 1.0 expression as a string. 

259 

260 """ 

261 tree = getattr(selector, "parsed_tree", None) 

262 if not tree: 

263 raise TypeError("Expected a parsed selector, got %r" % (selector,)) 

264 xpath = self.xpath(tree) 

265 assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return' 

266 if translate_pseudo_elements and selector.pseudo_element: 

267 xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) 

268 return (prefix or "") + str(xpath) 

269 

270 def xpath_pseudo_element( 

271 self, xpath: XPathExpr, pseudo_element: PseudoElement 

272 ) -> XPathExpr: 

273 """Translate a pseudo-element. 

274 

275 Defaults to not supporting pseudo-elements at all, 

276 but can be overridden by sub-classes. 

277 

278 """ 

279 raise ExpressionError("Pseudo-elements are not supported.") 

280 

281 @staticmethod 

282 def xpath_literal(s: str) -> str: 

283 s = str(s) 

284 if "'" not in s: 

285 s = "'%s'" % s 

286 elif '"' not in s: 

287 s = '"%s"' % s 

288 else: 

289 s = "concat(%s)" % ",".join( 

290 [ 

291 (("'" in part) and '"%s"' or "'%s'") % part 

292 for part in split_at_single_quotes(s) 

293 if part 

294 ] 

295 ) 

296 return s 

297 

298 def xpath(self, parsed_selector: Tree) -> XPathExpr: 

299 """Translate any parsed selector object.""" 

300 type_name = type(parsed_selector).__name__ 

301 method = getattr(self, "xpath_%s" % type_name.lower(), None) 

302 if method is None: 

303 raise ExpressionError("%s is not supported." % type_name) 

304 return typing.cast(XPathExpr, method(parsed_selector)) 

305 

306 # Dispatched by parsed object type 

307 

308 def xpath_combinedselector(self, combined: CombinedSelector) -> XPathExpr: 

309 """Translate a combined selector.""" 

310 combinator = self.combinator_mapping[combined.combinator] 

311 method = getattr(self, "xpath_%s_combinator" % combinator) 

312 return typing.cast( 

313 XPathExpr, 

314 method(self.xpath(combined.selector), self.xpath(combined.subselector)), 

315 ) 

316 

317 def xpath_negation(self, negation: Negation) -> XPathExpr: 

318 xpath = self.xpath(negation.selector) 

319 sub_xpath = self.xpath(negation.subselector) 

320 sub_xpath.add_name_test() 

321 if sub_xpath.condition: 

322 return xpath.add_condition("not(%s)" % sub_xpath.condition) 

323 else: 

324 return xpath.add_condition("0") 

325 

326 def xpath_relation(self, relation: Relation) -> XPathExpr: 

327 xpath = self.xpath(relation.selector) 

328 combinator = relation.combinator 

329 subselector = relation.subselector 

330 right = self.xpath(subselector.parsed_tree) 

331 method = getattr( 

332 self, 

333 "xpath_relation_%s_combinator" 

334 % self.combinator_mapping[typing.cast(str, combinator.value)], 

335 ) 

336 return typing.cast(XPathExpr, method(xpath, right)) 

337 

338 def xpath_matching(self, matching: Matching) -> XPathExpr: 

339 xpath = self.xpath(matching.selector) 

340 exprs = [self.xpath(selector) for selector in matching.selector_list] 

341 for e in exprs: 

342 e.add_name_test() 

343 if e.condition: 

344 xpath.add_condition(e.condition, "or") 

345 return xpath 

346 

347 def xpath_specificityadjustment(self, matching: SpecificityAdjustment) -> XPathExpr: 

348 xpath = self.xpath(matching.selector) 

349 exprs = [self.xpath(selector) for selector in matching.selector_list] 

350 for e in exprs: 

351 e.add_name_test() 

352 if e.condition: 

353 xpath.add_condition(e.condition, "or") 

354 return xpath 

355 

356 def xpath_function(self, function: Function) -> XPathExpr: 

357 """Translate a functional pseudo-class.""" 

358 method_name = "xpath_%s_function" % function.name.replace("-", "_") 

359 method = getattr(self, method_name, None) 

360 if not method: 

361 raise ExpressionError("The pseudo-class :%s() is unknown" % function.name) 

362 return typing.cast(XPathExpr, method(self.xpath(function.selector), function)) 

363 

364 def xpath_pseudo(self, pseudo: Pseudo) -> XPathExpr: 

365 """Translate a pseudo-class.""" 

366 method_name = "xpath_%s_pseudo" % pseudo.ident.replace("-", "_") 

367 method = getattr(self, method_name, None) 

368 if not method: 

369 # TODO: better error message for pseudo-elements? 

370 raise ExpressionError("The pseudo-class :%s is unknown" % pseudo.ident) 

371 return typing.cast(XPathExpr, method(self.xpath(pseudo.selector))) 

372 

373 def xpath_attrib(self, selector: Attrib) -> XPathExpr: 

374 """Translate an attribute selector.""" 

375 operator = self.attribute_operator_mapping[selector.operator] 

376 method = getattr(self, "xpath_attrib_%s" % operator) 

377 if self.lower_case_attribute_names: 

378 name = selector.attrib.lower() 

379 else: 

380 name = selector.attrib 

381 safe = is_safe_name(name) 

382 if selector.namespace: 

383 name = "%s:%s" % (selector.namespace, name) 

384 safe = safe and is_safe_name(selector.namespace) 

385 if safe: 

386 attrib = "@" + name 

387 else: 

388 attrib = "attribute::*[name() = %s]" % self.xpath_literal(name) 

389 if selector.value is None: 

390 value = None 

391 elif self.lower_case_attribute_values: 

392 value = typing.cast(str, selector.value.value).lower() 

393 else: 

394 value = selector.value.value 

395 return typing.cast( 

396 XPathExpr, method(self.xpath(selector.selector), attrib, value) 

397 ) 

398 

399 def xpath_class(self, class_selector: Class) -> XPathExpr: 

400 """Translate a class selector.""" 

401 # .foo is defined as [class~=foo] in the spec. 

402 xpath = self.xpath(class_selector.selector) 

403 return self.xpath_attrib_includes(xpath, "@class", class_selector.class_name) 

404 

405 def xpath_hash(self, id_selector: Hash) -> XPathExpr: 

406 """Translate an ID selector.""" 

407 xpath = self.xpath(id_selector.selector) 

408 return self.xpath_attrib_equals(xpath, "@id", id_selector.id) 

409 

410 def xpath_element(self, selector: Element) -> XPathExpr: 

411 """Translate a type or universal selector.""" 

412 element = selector.element 

413 if not element: 

414 element = "*" 

415 safe = True 

416 else: 

417 safe = bool(is_safe_name(element)) 

418 if self.lower_case_element_names: 

419 element = element.lower() 

420 if selector.namespace: 

421 # Namespace prefixes are case-sensitive. 

422 # http://www.w3.org/TR/css3-namespace/#prefixes 

423 element = "%s:%s" % (selector.namespace, element) 

424 safe = safe and bool(is_safe_name(selector.namespace)) 

425 xpath = self.xpathexpr_cls(element=element) 

426 if not safe: 

427 xpath.add_name_test() 

428 return xpath 

429 

430 # CombinedSelector: dispatch by combinator 

431 

432 def xpath_descendant_combinator( 

433 self, left: XPathExpr, right: XPathExpr 

434 ) -> XPathExpr: 

435 """right is a child, grand-child or further descendant of left""" 

436 return left.join("/descendant-or-self::*/", right) 

437 

438 def xpath_child_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr: 

439 """right is an immediate child of left""" 

440 return left.join("/", right) 

441 

442 def xpath_direct_adjacent_combinator( 

443 self, left: XPathExpr, right: XPathExpr 

444 ) -> XPathExpr: 

445 """right is a sibling immediately after left""" 

446 xpath = left.join("/following-sibling::", right) 

447 xpath.add_name_test() 

448 return xpath.add_condition("position() = 1") 

449 

450 def xpath_indirect_adjacent_combinator( 

451 self, left: XPathExpr, right: XPathExpr 

452 ) -> XPathExpr: 

453 """right is a sibling after left, immediately or not""" 

454 return left.join("/following-sibling::", right) 

455 

456 def xpath_relation_descendant_combinator( 

457 self, left: XPathExpr, right: XPathExpr 

458 ) -> XPathExpr: 

459 """right is a child, grand-child or further descendant of left; select left""" 

460 return left.join( 

461 "[descendant::", right, closing_combiner="]", has_inner_condition=True 

462 ) 

463 

464 def xpath_relation_child_combinator( 

465 self, left: XPathExpr, right: XPathExpr 

466 ) -> XPathExpr: 

467 """right is an immediate child of left; select left""" 

468 return left.join("[./", right, closing_combiner="]") 

469 

470 def xpath_relation_direct_adjacent_combinator( 

471 self, left: XPathExpr, right: XPathExpr 

472 ) -> XPathExpr: 

473 """right is a sibling immediately after left; select left""" 

474 xpath = left.add_condition( 

475 "following-sibling::*[(name() = '{}') and (position() = 1)]".format( 

476 right.element 

477 ) 

478 ) 

479 return xpath 

480 

481 def xpath_relation_indirect_adjacent_combinator( 

482 self, left: XPathExpr, right: XPathExpr 

483 ) -> XPathExpr: 

484 """right is a sibling after left, immediately or not; select left""" 

485 return left.join("[following-sibling::", right, closing_combiner="]") 

486 

487 # Function: dispatch by function/pseudo-class name 

488 

489 def xpath_nth_child_function( 

490 self, 

491 xpath: XPathExpr, 

492 function: Function, 

493 last: bool = False, 

494 add_name_test: bool = True, 

495 ) -> XPathExpr: 

496 try: 

497 a, b = parse_series(function.arguments) 

498 except ValueError: 

499 raise ExpressionError("Invalid series: '%r'" % function.arguments) 

500 

501 # From https://www.w3.org/TR/css3-selectors/#structural-pseudos: 

502 # 

503 # :nth-child(an+b) 

504 # an+b-1 siblings before 

505 # 

506 # :nth-last-child(an+b) 

507 # an+b-1 siblings after 

508 # 

509 # :nth-of-type(an+b) 

510 # an+b-1 siblings with the same expanded element name before 

511 # 

512 # :nth-last-of-type(an+b) 

513 # an+b-1 siblings with the same expanded element name after 

514 # 

515 # So, 

516 # for :nth-child and :nth-of-type 

517 # 

518 # count(preceding-sibling::<nodetest>) = an+b-1 

519 # 

520 # for :nth-last-child and :nth-last-of-type 

521 # 

522 # count(following-sibling::<nodetest>) = an+b-1 

523 # 

524 # therefore, 

525 # count(...) - (b-1) ≡ 0 (mod a) 

526 # 

527 # if a == 0: 

528 # ~~~~~~~~~~ 

529 # count(...) = b-1 

530 # 

531 # if a < 0: 

532 # ~~~~~~~~~ 

533 # count(...) - b +1 <= 0 

534 # -> count(...) <= b-1 

535 # 

536 # if a > 0: 

537 # ~~~~~~~~~ 

538 # count(...) - b +1 >= 0 

539 # -> count(...) >= b-1 

540 

541 # work with b-1 instead 

542 b_min_1 = b - 1 

543 

544 # early-exit condition 1: 

545 # ~~~~~~~~~~~~~~~~~~~~~~~ 

546 # for a == 1, nth-*(an+b) means n+b-1 siblings before/after, 

547 # and since n ∈ {0, 1, 2, ...}, if b-1<=0, 

548 # there is always an "n" matching any number of siblings (maybe none) 

549 if a == 1 and b_min_1 <= 0: 

550 return xpath 

551 

552 # early-exit condition 2: 

553 # ~~~~~~~~~~~~~~~~~~~~~~~ 

554 # an+b-1 siblings with a<0 and (b-1)<0 is not possible 

555 if a < 0 and b_min_1 < 0: 

556 return xpath.add_condition("0") 

557 

558 # `add_name_test` boolean is inverted and somewhat counter-intuitive: 

559 # 

560 # nth_of_type() calls nth_child(add_name_test=False) 

561 if add_name_test: 

562 nodetest = "*" 

563 else: 

564 nodetest = "%s" % xpath.element 

565 

566 # count siblings before or after the element 

567 if not last: 

568 siblings_count = "count(preceding-sibling::%s)" % nodetest 

569 else: 

570 siblings_count = "count(following-sibling::%s)" % nodetest 

571 

572 # special case of fixed position: nth-*(0n+b) 

573 # if a == 0: 

574 # ~~~~~~~~~~ 

575 # count(***-sibling::***) = b-1 

576 if a == 0: 

577 return xpath.add_condition("%s = %s" % (siblings_count, b_min_1)) 

578 

579 expressions = [] 

580 

581 if a > 0: 

582 # siblings count, an+b-1, is always >= 0, 

583 # so if a>0, and (b-1)<=0, an "n" exists to satisfy this, 

584 # therefore, the predicate is only interesting if (b-1)>0 

585 if b_min_1 > 0: 

586 expressions.append("%s >= %s" % (siblings_count, b_min_1)) 

587 else: 

588 # if a<0, and (b-1)<0, no "n" satisfies this, 

589 # this is tested above as an early exist condition 

590 # otherwise, 

591 expressions.append("%s <= %s" % (siblings_count, b_min_1)) 

592 

593 # operations modulo 1 or -1 are simpler, one only needs to verify: 

594 # 

595 # - either: 

596 # count(***-sibling::***) - (b-1) = n = 0, 1, 2, 3, etc., 

597 # i.e. count(***-sibling::***) >= (b-1) 

598 # 

599 # - or: 

600 # count(***-sibling::***) - (b-1) = -n = 0, -1, -2, -3, etc., 

601 # i.e. count(***-sibling::***) <= (b-1) 

602 # we we just did above. 

603 # 

604 if abs(a) != 1: 

605 # count(***-sibling::***) - (b-1) ≡ 0 (mod a) 

606 left = siblings_count 

607 

608 # apply "modulo a" on 2nd term, -(b-1), 

609 # to simplify things like "(... +6) % -3", 

610 # and also make it positive with |a| 

611 b_neg = (-b_min_1) % abs(a) 

612 

613 if b_neg != 0: 

614 b_neg_as_str = "+%s" % b_neg 

615 left = "(%s %s)" % (left, b_neg_as_str) 

616 

617 expressions.append("%s mod %s = 0" % (left, a)) 

618 

619 if len(expressions) > 1: 

620 template = "(%s)" 

621 else: 

622 template = "%s" 

623 xpath.add_condition( 

624 " and ".join(template % expression for expression in expressions) 

625 ) 

626 return xpath 

627 

628 def xpath_nth_last_child_function( 

629 self, xpath: XPathExpr, function: Function 

630 ) -> XPathExpr: 

631 return self.xpath_nth_child_function(xpath, function, last=True) 

632 

633 def xpath_nth_of_type_function( 

634 self, xpath: XPathExpr, function: Function 

635 ) -> XPathExpr: 

636 if xpath.element == "*": 

637 raise ExpressionError("*:nth-of-type() is not implemented") 

638 return self.xpath_nth_child_function(xpath, function, add_name_test=False) 

639 

640 def xpath_nth_last_of_type_function( 

641 self, xpath: XPathExpr, function: Function 

642 ) -> XPathExpr: 

643 if xpath.element == "*": 

644 raise ExpressionError("*:nth-of-type() is not implemented") 

645 return self.xpath_nth_child_function( 

646 xpath, function, last=True, add_name_test=False 

647 ) 

648 

649 def xpath_contains_function( 

650 self, xpath: XPathExpr, function: Function 

651 ) -> XPathExpr: 

652 # Defined there, removed in later drafts: 

653 # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors 

654 if function.argument_types() not in (["STRING"], ["IDENT"]): 

655 raise ExpressionError( 

656 "Expected a single string or ident for :contains(), got %r" 

657 % function.arguments 

658 ) 

659 value = typing.cast(str, function.arguments[0].value) 

660 return xpath.add_condition("contains(., %s)" % self.xpath_literal(value)) 

661 

662 def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: 

663 if function.argument_types() not in (["STRING"], ["IDENT"]): 

664 raise ExpressionError( 

665 "Expected a single string or ident for :lang(), got %r" 

666 % function.arguments 

667 ) 

668 value = typing.cast(str, function.arguments[0].value) 

669 return xpath.add_condition("lang(%s)" % (self.xpath_literal(value))) 

670 

671 # Pseudo: dispatch by pseudo-class name 

672 

673 def xpath_root_pseudo(self, xpath: XPathExpr) -> XPathExpr: 

674 return xpath.add_condition("not(parent::*)") 

675 

676 # CSS immediate children (CSS ":scope > div" to XPath "child::div" or "./div") 

677 # Works only at the start of a selector 

678 # Needed to get immediate children of a processed selector in Scrapy 

679 # for product in response.css('.product'): 

680 # description = product.css(':scope > div::text').get() 

681 def xpath_scope_pseudo(self, xpath: XPathExpr) -> XPathExpr: 

682 return xpath.add_condition("1") 

683 

684 def xpath_first_child_pseudo(self, xpath: XPathExpr) -> XPathExpr: 

685 return xpath.add_condition("count(preceding-sibling::*) = 0") 

686 

687 def xpath_last_child_pseudo(self, xpath: XPathExpr) -> XPathExpr: 

688 return xpath.add_condition("count(following-sibling::*) = 0") 

689 

690 def xpath_first_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr: 

691 if xpath.element == "*": 

692 raise ExpressionError("*:first-of-type is not implemented") 

693 return xpath.add_condition("count(preceding-sibling::%s) = 0" % xpath.element) 

694 

695 def xpath_last_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr: 

696 if xpath.element == "*": 

697 raise ExpressionError("*:last-of-type is not implemented") 

698 return xpath.add_condition("count(following-sibling::%s) = 0" % xpath.element) 

699 

700 def xpath_only_child_pseudo(self, xpath: XPathExpr) -> XPathExpr: 

701 return xpath.add_condition("count(parent::*/child::*) = 1") 

702 

703 def xpath_only_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr: 

704 if xpath.element == "*": 

705 raise ExpressionError("*:only-of-type is not implemented") 

706 return xpath.add_condition("count(parent::*/child::%s) = 1" % xpath.element) 

707 

708 def xpath_empty_pseudo(self, xpath: XPathExpr) -> XPathExpr: 

709 return xpath.add_condition("not(*) and not(string-length())") 

710 

711 def pseudo_never_matches(self, xpath: XPathExpr) -> XPathExpr: 

712 """Common implementation for pseudo-classes that never match.""" 

713 return xpath.add_condition("0") 

714 

715 xpath_link_pseudo = pseudo_never_matches 

716 xpath_visited_pseudo = pseudo_never_matches 

717 xpath_hover_pseudo = pseudo_never_matches 

718 xpath_active_pseudo = pseudo_never_matches 

719 xpath_focus_pseudo = pseudo_never_matches 

720 xpath_target_pseudo = pseudo_never_matches 

721 xpath_enabled_pseudo = pseudo_never_matches 

722 xpath_disabled_pseudo = pseudo_never_matches 

723 xpath_checked_pseudo = pseudo_never_matches 

724 

725 # Attrib: dispatch by attribute operator 

726 

727 def xpath_attrib_exists( 

728 self, xpath: XPathExpr, name: str, value: Optional[str] 

729 ) -> XPathExpr: 

730 assert not value 

731 xpath.add_condition(name) 

732 return xpath 

733 

734 def xpath_attrib_equals( 

735 self, xpath: XPathExpr, name: str, value: Optional[str] 

736 ) -> XPathExpr: 

737 assert value is not None 

738 xpath.add_condition("%s = %s" % (name, self.xpath_literal(value))) 

739 return xpath 

740 

741 def xpath_attrib_different( 

742 self, xpath: XPathExpr, name: str, value: Optional[str] 

743 ) -> XPathExpr: 

744 assert value is not None 

745 # FIXME: this seems like a weird hack... 

746 if value: 

747 xpath.add_condition( 

748 "not(%s) or %s != %s" % (name, name, self.xpath_literal(value)) 

749 ) 

750 else: 

751 xpath.add_condition("%s != %s" % (name, self.xpath_literal(value))) 

752 return xpath 

753 

754 def xpath_attrib_includes( 

755 self, xpath: XPathExpr, name: str, value: Optional[str] 

756 ) -> XPathExpr: 

757 if value and is_non_whitespace(value): 

758 xpath.add_condition( 

759 "%s and contains(concat(' ', normalize-space(%s), ' '), %s)" 

760 % (name, name, self.xpath_literal(" " + value + " ")) 

761 ) 

762 else: 

763 xpath.add_condition("0") 

764 return xpath 

765 

766 def xpath_attrib_dashmatch( 

767 self, xpath: XPathExpr, name: str, value: Optional[str] 

768 ) -> XPathExpr: 

769 assert value is not None 

770 # Weird, but true... 

771 xpath.add_condition( 

772 "%s and (%s = %s or starts-with(%s, %s))" 

773 % ( 

774 name, 

775 name, 

776 self.xpath_literal(value), 

777 name, 

778 self.xpath_literal(value + "-"), 

779 ) 

780 ) 

781 return xpath 

782 

783 def xpath_attrib_prefixmatch( 

784 self, xpath: XPathExpr, name: str, value: Optional[str] 

785 ) -> XPathExpr: 

786 if value: 

787 xpath.add_condition( 

788 "%s and starts-with(%s, %s)" % (name, name, self.xpath_literal(value)) 

789 ) 

790 else: 

791 xpath.add_condition("0") 

792 return xpath 

793 

794 def xpath_attrib_suffixmatch( 

795 self, xpath: XPathExpr, name: str, value: Optional[str] 

796 ) -> XPathExpr: 

797 if value: 

798 # Oddly there is a starts-with in XPath 1.0, but not ends-with 

799 xpath.add_condition( 

800 "%s and substring(%s, string-length(%s)-%s) = %s" 

801 % (name, name, name, len(value) - 1, self.xpath_literal(value)) 

802 ) 

803 else: 

804 xpath.add_condition("0") 

805 return xpath 

806 

807 def xpath_attrib_substringmatch( 

808 self, xpath: XPathExpr, name: str, value: Optional[str] 

809 ) -> XPathExpr: 

810 if value: 

811 # Attribute selectors are case sensitive 

812 xpath.add_condition( 

813 "%s and contains(%s, %s)" % (name, name, self.xpath_literal(value)) 

814 ) 

815 else: 

816 xpath.add_condition("0") 

817 return xpath 

818 

819 

820class HTMLTranslator(GenericTranslator): 

821 """ 

822 Translator for (X)HTML documents. 

823 

824 Has a more useful implementation of some pseudo-classes based on 

825 HTML-specific element names and attribute names, as described in 

826 the `HTML5 specification`_. It assumes no-quirks mode. 

827 The API is the same as :class:`GenericTranslator`. 

828 

829 .. _HTML5 specification: http://www.w3.org/TR/html5/links.html#selectors 

830 

831 :param xhtml: 

832 If false (the default), element names and attribute names 

833 are case-insensitive. 

834 

835 """ 

836 

837 lang_attribute = "lang" 

838 

839 def __init__(self, xhtml: bool = False) -> None: 

840 self.xhtml = xhtml # Might be useful for sub-classes? 

841 if not xhtml: 

842 # See their definition in GenericTranslator. 

843 self.lower_case_element_names = True 

844 self.lower_case_attribute_names = True 

845 

846 def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore 

847 # FIXME: is this really all the elements? 

848 return xpath.add_condition( 

849 "(@selected and name(.) = 'option') or " 

850 "(@checked " 

851 "and (name(.) = 'input' or name(.) = 'command')" 

852 "and (@type = 'checkbox' or @type = 'radio'))" 

853 ) 

854 

855 def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr: 

856 if function.argument_types() not in (["STRING"], ["IDENT"]): 

857 raise ExpressionError( 

858 "Expected a single string or ident for :lang(), got %r" 

859 % function.arguments 

860 ) 

861 value = function.arguments[0].value 

862 assert value 

863 return xpath.add_condition( 

864 "ancestor-or-self::*[@lang][1][starts-with(concat(" 

865 # XPath 1.0 has no lower-case function... 

866 "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " 

867 "'abcdefghijklmnopqrstuvwxyz'), " 

868 "'-'), %s)]" 

869 % (self.lang_attribute, self.xpath_literal(value.lower() + "-")) 

870 ) 

871 

872 def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore 

873 return xpath.add_condition( 

874 "@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area')" 

875 ) 

876 

877 # Links are never visited, the implementation for :visited is the same 

878 # as in GenericTranslator 

879 

880 def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore 

881 # http://www.w3.org/TR/html5/section-index.html#attributes-1 

882 return xpath.add_condition( 

883 """ 

884 ( 

885 @disabled and 

886 ( 

887 (name(.) = 'input' and @type != 'hidden') or 

888 name(.) = 'button' or 

889 name(.) = 'select' or 

890 name(.) = 'textarea' or 

891 name(.) = 'command' or 

892 name(.) = 'fieldset' or 

893 name(.) = 'optgroup' or 

894 name(.) = 'option' 

895 ) 

896 ) or ( 

897 ( 

898 (name(.) = 'input' and @type != 'hidden') or 

899 name(.) = 'button' or 

900 name(.) = 'select' or 

901 name(.) = 'textarea' 

902 ) 

903 and ancestor::fieldset[@disabled] 

904 ) 

905 """ 

906 ) 

907 # FIXME: in the second half, add "and is not a descendant of that 

908 # fieldset element's first legend element child, if any." 

909 

910 def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore 

911 # http://www.w3.org/TR/html5/section-index.html#attributes-1 

912 return xpath.add_condition( 

913 """ 

914 ( 

915 @href and ( 

916 name(.) = 'a' or 

917 name(.) = 'link' or 

918 name(.) = 'area' 

919 ) 

920 ) or ( 

921 ( 

922 name(.) = 'command' or 

923 name(.) = 'fieldset' or 

924 name(.) = 'optgroup' 

925 ) 

926 and not(@disabled) 

927 ) or ( 

928 ( 

929 (name(.) = 'input' and @type != 'hidden') or 

930 name(.) = 'button' or 

931 name(.) = 'select' or 

932 name(.) = 'textarea' or 

933 name(.) = 'keygen' 

934 ) 

935 and not (@disabled or ancestor::fieldset[@disabled]) 

936 ) or ( 

937 name(.) = 'option' and not( 

938 @disabled or ancestor::optgroup[@disabled] 

939 ) 

940 ) 

941 """ 

942 ) 

943 # FIXME: ... or "li elements that are children of menu elements, 

944 # and that have a child element that defines a command, if the first 

945 # such element's Disabled State facet is false (not disabled)". 

946 # FIXME: after ancestor::fieldset[@disabled], add "and is not a 

947 # descendant of that fieldset element's first legend element child, 

948 # if any."