Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/soupsieve/css_parser.py: 61%

583 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1"""CSS selector parser.""" 

2from __future__ import annotations 

3import re 

4from functools import lru_cache 

5from . import util 

6from . import css_match as cm 

7from . import css_types as ct 

8from .util import SelectorSyntaxError 

9import warnings 

10from typing import Match, Any, Iterator, cast 

11 

12UNICODE_REPLACEMENT_CHAR = 0xFFFD 

13 

14# Simple pseudo classes that take no parameters 

15PSEUDO_SIMPLE = { 

16 ":any-link", 

17 ":empty", 

18 ":first-child", 

19 ":first-of-type", 

20 ":in-range", 

21 ":out-of-range", 

22 ":last-child", 

23 ":last-of-type", 

24 ":link", 

25 ":only-child", 

26 ":only-of-type", 

27 ":root", 

28 ':checked', 

29 ':default', 

30 ':disabled', 

31 ':enabled', 

32 ':indeterminate', 

33 ':optional', 

34 ':placeholder-shown', 

35 ':read-only', 

36 ':read-write', 

37 ':required', 

38 ':scope', 

39 ':defined' 

40} 

41 

42# Supported, simple pseudo classes that match nothing in the Soup Sieve environment 

43PSEUDO_SIMPLE_NO_MATCH = { 

44 ':active', 

45 ':current', 

46 ':focus', 

47 ':focus-visible', 

48 ':focus-within', 

49 ':future', 

50 ':host', 

51 ':hover', 

52 ':local-link', 

53 ':past', 

54 ':paused', 

55 ':playing', 

56 ':target', 

57 ':target-within', 

58 ':user-invalid', 

59 ':visited' 

60} 

61 

62# Complex pseudo classes that take selector lists 

63PSEUDO_COMPLEX = { 

64 ':contains', 

65 ':-soup-contains', 

66 ':-soup-contains-own', 

67 ':has', 

68 ':is', 

69 ':matches', 

70 ':not', 

71 ':where' 

72} 

73 

74PSEUDO_COMPLEX_NO_MATCH = { 

75 ':current', 

76 ':host', 

77 ':host-context' 

78} 

79 

80# Complex pseudo classes that take very specific parameters and are handled special 

81PSEUDO_SPECIAL = { 

82 ':dir', 

83 ':lang', 

84 ':nth-child', 

85 ':nth-last-child', 

86 ':nth-last-of-type', 

87 ':nth-of-type' 

88} 

89 

90PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSEUDO_COMPLEX_NO_MATCH | PSEUDO_SPECIAL 

91 

92# Sub-patterns parts 

93# Whitespace 

94NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])' 

95WS = r'(?:[ \t]|{})'.format(NEWLINE) 

96# Comments 

97COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' 

98# Whitespace with comments included 

99WSC = r'(?:{ws}|{comments})'.format(ws=WS, comments=COMMENTS) 

100# CSS escapes 

101CSS_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$))'.format(ws=WS) 

102CSS_STRING_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$|{nl}))'.format(ws=WS, nl=NEWLINE) 

103# CSS Identifier 

104IDENTIFIER = r''' 

105(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})+|--) 

106(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})*) 

107'''.format(esc=CSS_ESCAPES) 

108# `nth` content 

109NTH = r'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){ws}*(?:[-+]){ws}*(?:[0-9]+))?'.format(ws=WSC) 

110# Value: quoted string or identifier 

111VALUE = r''' 

112(?:"(?:\\(?:.|{nl})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{nl})|[^\\'\r\n\f]+)*?'|{ident}+) 

113'''.format(nl=NEWLINE, ident=IDENTIFIER) 

114# Attribute value comparison. `!=` is handled special as it is non-standard. 

115ATTR = r''' 

116(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}*(?P<case>[is]))?)?{ws}*\] 

117'''.format(ws=WSC, value=VALUE) 

118 

119# Selector patterns 

120# IDs (`#id`) 

121PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER) 

122# Classes (`.class`) 

123PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER) 

124# Prefix:Tag (`prefix|tag`) 

125PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER) 

126# Attributes (`[attr]`, `[attr=value]`, etc.) 

127PAT_ATTR = r''' 

128\[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr} 

129'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR) 

130# Pseudo class (`:pseudo-class`, `:pseudo-class(`) 

131PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER) 

132# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes. 

133PAT_PSEUDO_CLASS_SPECIAL = r'(?P<name>:{ident})(?P<open>\({ws}*)'.format(ws=WSC, ident=IDENTIFIER) 

134# Custom pseudo class (`:--custom-pseudo`) 

135PAT_PSEUDO_CLASS_CUSTOM = r'(?P<name>:(?=--){ident})'.format(ident=IDENTIFIER) 

136# Closing pseudo group (`)`) 

137PAT_PSEUDO_CLOSE = r'{ws}*\)'.format(ws=WSC) 

138# Pseudo element (`::pseudo-element`) 

139PAT_PSEUDO_ELEMENT = r':{}'.format(PAT_PSEUDO_CLASS) 

140# At rule (`@page`, etc.) (not supported) 

141PAT_AT_RULE = r'@P{ident}'.format(ident=IDENTIFIER) 

142# Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.) 

143PAT_PSEUDO_NTH_CHILD = r''' 

144(?P<pseudo_nth_child>{name} 

145(?P<nth_child>{nth}|even|odd))(?:{wsc}*\)|(?P<of>{comments}*{ws}{wsc}*of{comments}*{ws}{wsc}*)) 

146'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, wsc=WSC, comments=COMMENTS, ws=WS, nth=NTH) 

147# Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.) 

148PAT_PSEUDO_NTH_TYPE = r''' 

149(?P<pseudo_nth_type>{name} 

150(?P<nth_type>{nth}|even|odd)){ws}*\) 

151'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, nth=NTH) 

152# Pseudo class language (`:lang("*-de", en)`) 

153PAT_PSEUDO_LANG = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format( 

154 name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE 

155) 

156# Pseudo class direction (`:dir(ltr)`) 

157PAT_PSEUDO_DIR = r'{name}(?P<dir>ltr|rtl){ws}*\)'.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC) 

158# Combining characters (`>`, `~`, ` `, `+`, `,`) 

159PAT_COMBINE = r'{wsc}*?(?P<relation>[,+>~]|{ws}(?![,+>~])){wsc}*'.format(ws=WS, wsc=WSC) 

160# Extra: Contains (`:contains(text)`) 

161PAT_PSEUDO_CONTAINS = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format( 

162 name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE 

163) 

164 

165# Regular expressions 

166# CSS escape pattern 

167RE_CSS_ESC = re.compile(r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$))'.format(ws=WSC), re.I) 

168RE_CSS_STR_ESC = re.compile( 

169 r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$)|(\\{nl}))'.format(ws=WS, nl=NEWLINE), re.I 

170) 

171# Pattern to break up `nth` specifiers 

172RE_NTH = re.compile( 

173 r'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){ws}*(?P<s2>[-+]){ws}*(?P<b>[0-9]+))?'.format(ws=WSC), 

174 re.I 

175) 

176# Pattern to iterate multiple values. 

177RE_VALUES = re.compile(r'(?:(?P<value>{value})|(?P<split>{ws}*,{ws}*))'.format(ws=WSC, value=VALUE), re.X) 

178# Whitespace checks 

179RE_WS = re.compile(WS) 

180RE_WS_BEGIN = re.compile('^{}*'.format(WSC)) 

181RE_WS_END = re.compile('{}*$'.format(WSC)) 

182RE_CUSTOM = re.compile(r'^{}$'.format(PAT_PSEUDO_CLASS_CUSTOM), re.X) 

183 

184# Constants 

185# List split token 

186COMMA_COMBINATOR = ',' 

187# Relation token for descendant 

188WS_COMBINATOR = " " 

189 

190# Parse flags 

191FLG_PSEUDO = 0x01 

192FLG_NOT = 0x02 

193FLG_RELATIVE = 0x04 

194FLG_DEFAULT = 0x08 

195FLG_HTML = 0x10 

196FLG_INDETERMINATE = 0x20 

197FLG_OPEN = 0x40 

198FLG_IN_RANGE = 0x80 

199FLG_OUT_OF_RANGE = 0x100 

200FLG_PLACEHOLDER_SHOWN = 0x200 

201FLG_FORGIVE = 0x400 

202 

203# Maximum cached patterns to store 

204_MAXCACHE = 500 

205 

206 

207@lru_cache(maxsize=_MAXCACHE) 

208def _cached_css_compile( 

209 pattern: str, 

210 namespaces: ct.Namespaces | None, 

211 custom: ct.CustomSelectors | None, 

212 flags: int 

213) -> cm.SoupSieve: 

214 """Cached CSS compile.""" 

215 

216 custom_selectors = process_custom(custom) 

217 return cm.SoupSieve( 

218 pattern, 

219 CSSParser( 

220 pattern, 

221 custom=custom_selectors, 

222 flags=flags 

223 ).process_selectors(), 

224 namespaces, 

225 custom, 

226 flags 

227 ) 

228 

229 

230def _purge_cache() -> None: 

231 """Purge the cache.""" 

232 

233 _cached_css_compile.cache_clear() 

234 

235 

236def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.SelectorList]: 

237 """Process custom.""" 

238 

239 custom_selectors = {} 

240 if custom is not None: 

241 for key, value in custom.items(): 

242 name = util.lower(key) 

243 if RE_CUSTOM.match(name) is None: 

244 raise SelectorSyntaxError("The name '{}' is not a valid custom pseudo-class name".format(name)) 

245 if name in custom_selectors: 

246 raise KeyError("The custom selector '{}' has already been registered".format(name)) 

247 custom_selectors[css_unescape(name)] = value 

248 return custom_selectors 

249 

250 

251def css_unescape(content: str, string: bool = False) -> str: 

252 """ 

253 Unescape CSS value. 

254 

255 Strings allow for spanning the value on multiple strings by escaping a new line. 

256 """ 

257 

258 def replace(m: Match[str]) -> str: 

259 """Replace with the appropriate substitute.""" 

260 

261 if m.group(1): 

262 codepoint = int(m.group(1)[1:], 16) 

263 if codepoint == 0: 

264 codepoint = UNICODE_REPLACEMENT_CHAR 

265 value = chr(codepoint) 

266 elif m.group(2): 

267 value = m.group(2)[1:] 

268 elif m.group(3): 

269 value = '\ufffd' 

270 else: 

271 value = '' 

272 

273 return value 

274 

275 return (RE_CSS_ESC if not string else RE_CSS_STR_ESC).sub(replace, content) 

276 

277 

278def escape(ident: str) -> str: 

279 """Escape identifier.""" 

280 

281 string = [] 

282 length = len(ident) 

283 start_dash = length > 0 and ident[0] == '-' 

284 if length == 1 and start_dash: 

285 # Need to escape identifier that is a single `-` with no other characters 

286 string.append('\\{}'.format(ident)) 

287 else: 

288 for index, c in enumerate(ident): 

289 codepoint = ord(c) 

290 if codepoint == 0x00: 

291 string.append('\ufffd') 

292 elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F: 

293 string.append('\\{:x} '.format(codepoint)) 

294 elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39): 

295 string.append('\\{:x} '.format(codepoint)) 

296 elif ( 

297 codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or 

298 (0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A) 

299 ): 

300 string.append(c) 

301 else: 

302 string.append('\\{}'.format(c)) 

303 return ''.join(string) 

304 

305 

306class SelectorPattern: 

307 """Selector pattern.""" 

308 

309 def __init__(self, name: str, pattern: str) -> None: 

310 """Initialize.""" 

311 

312 self.name = name 

313 self.re_pattern = re.compile(pattern, re.I | re.X | re.U) 

314 

315 def get_name(self) -> str: 

316 """Get name.""" 

317 

318 return self.name 

319 

320 def match(self, selector: str, index: int, flags: int) -> Match[str] | None: 

321 """Match the selector.""" 

322 

323 return self.re_pattern.match(selector, index) 

324 

325 

326class SpecialPseudoPattern(SelectorPattern): 

327 """Selector pattern.""" 

328 

329 def __init__(self, patterns: tuple[tuple[str, tuple[str, ...], str, type[SelectorPattern]], ...]) -> None: 

330 """Initialize.""" 

331 

332 self.patterns = {} 

333 for p in patterns: 

334 name = p[0] 

335 pattern = p[3](name, p[2]) 

336 for pseudo in p[1]: 

337 self.patterns[pseudo] = pattern 

338 

339 self.matched_name = None # type: SelectorPattern | None 

340 self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U) 

341 

342 def get_name(self) -> str: 

343 """Get name.""" 

344 

345 return '' if self.matched_name is None else self.matched_name.get_name() 

346 

347 def match(self, selector: str, index: int, flags: int) -> Match[str] | None: 

348 """Match the selector.""" 

349 

350 pseudo = None 

351 m = self.re_pseudo_name.match(selector, index) 

352 if m: 

353 name = util.lower(css_unescape(m.group('name'))) 

354 pattern = self.patterns.get(name) 

355 if pattern: 

356 pseudo = pattern.match(selector, index, flags) 

357 if pseudo: 

358 self.matched_name = pattern 

359 

360 return pseudo 

361 

362 

363class _Selector: 

364 """ 

365 Intermediate selector class. 

366 

367 This stores selector data for a compound selector as we are acquiring them. 

368 Once we are done collecting the data for a compound selector, we freeze 

369 the data in an object that can be pickled and hashed. 

370 """ 

371 

372 def __init__(self, **kwargs: Any) -> None: 

373 """Initialize.""" 

374 

375 self.tag = kwargs.get('tag', None) # type: ct.SelectorTag | None 

376 self.ids = kwargs.get('ids', []) # type: list[str] 

377 self.classes = kwargs.get('classes', []) # type: list[str] 

378 self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute] 

379 self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth] 

380 self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList] 

381 self.relations = kwargs.get('relations', []) # type: list[_Selector] 

382 self.rel_type = kwargs.get('rel_type', None) # type: str | None 

383 self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains] 

384 self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang] 

385 self.flags = kwargs.get('flags', 0) # type: int 

386 self.no_match = kwargs.get('no_match', False) # type: bool 

387 

388 def _freeze_relations(self, relations: list[_Selector]) -> ct.SelectorList: 

389 """Freeze relation.""" 

390 

391 if relations: 

392 sel = relations[0] 

393 sel.relations.extend(relations[1:]) 

394 return ct.SelectorList([sel.freeze()]) 

395 else: 

396 return ct.SelectorList() 

397 

398 def freeze(self) -> ct.Selector | ct.SelectorNull: 

399 """Freeze self.""" 

400 

401 if self.no_match: 

402 return ct.SelectorNull() 

403 else: 

404 return ct.Selector( 

405 self.tag, 

406 tuple(self.ids), 

407 tuple(self.classes), 

408 tuple(self.attributes), 

409 tuple(self.nth), 

410 tuple(self.selectors), 

411 self._freeze_relations(self.relations), 

412 self.rel_type, 

413 tuple(self.contains), 

414 tuple(self.lang), 

415 self.flags 

416 ) 

417 

418 def __str__(self) -> str: # pragma: no cover 

419 """String representation.""" 

420 

421 return ( 

422 '_Selector(tag={!r}, ids={!r}, classes={!r}, attributes={!r}, nth={!r}, selectors={!r}, ' 

423 'relations={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})' 

424 ).format( 

425 self.tag, self.ids, self.classes, self.attributes, self.nth, self.selectors, 

426 self.relations, self.rel_type, self.contains, self.lang, self.flags, self.no_match 

427 ) 

428 

429 __repr__ = __str__ 

430 

431 

432class CSSParser: 

433 """Parse CSS selectors.""" 

434 

435 css_tokens = ( 

436 SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE), 

437 SpecialPseudoPattern( 

438 ( 

439 ( 

440 "pseudo_contains", 

441 (':contains', ':-soup-contains', ':-soup-contains-own'), 

442 PAT_PSEUDO_CONTAINS, 

443 SelectorPattern 

444 ), 

445 ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern), 

446 ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern), 

447 ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern), 

448 ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern) 

449 ) 

450 ), 

451 SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM), 

452 SelectorPattern("pseudo_class", PAT_PSEUDO_CLASS), 

453 SelectorPattern("pseudo_element", PAT_PSEUDO_ELEMENT), 

454 SelectorPattern("at_rule", PAT_AT_RULE), 

455 SelectorPattern("id", PAT_ID), 

456 SelectorPattern("class", PAT_CLASS), 

457 SelectorPattern("tag", PAT_TAG), 

458 SelectorPattern("attribute", PAT_ATTR), 

459 SelectorPattern("combine", PAT_COMBINE) 

460 ) 

461 

462 def __init__( 

463 self, 

464 selector: str, 

465 custom: dict[str, str | ct.SelectorList] | None = None, 

466 flags: int = 0 

467 ) -> None: 

468 """Initialize.""" 

469 

470 self.pattern = selector.replace('\x00', '\ufffd') 

471 self.flags = flags 

472 self.debug = self.flags & util.DEBUG 

473 self.custom = {} if custom is None else custom 

474 

475 def parse_attribute_selector(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

476 """Create attribute selector from the returned regex match.""" 

477 

478 inverse = False 

479 op = m.group('cmp') 

480 case = util.lower(m.group('case')) if m.group('case') else None 

481 ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else '' 

482 attr = css_unescape(m.group('attr_name')) 

483 is_type = False 

484 pattern2 = None 

485 value = '' 

486 

487 if case: 

488 flags = (re.I if case == 'i' else 0) | re.DOTALL 

489 elif util.lower(attr) == 'type': 

490 flags = re.I | re.DOTALL 

491 is_type = True 

492 else: 

493 flags = re.DOTALL 

494 

495 if op: 

496 if m.group('value').startswith(('"', "'")): 

497 value = css_unescape(m.group('value')[1:-1], True) 

498 else: 

499 value = css_unescape(m.group('value')) 

500 

501 if not op: 

502 # Attribute name 

503 pattern = None 

504 elif op.startswith('^'): 

505 # Value start with 

506 pattern = re.compile(r'^%s.*' % re.escape(value), flags) 

507 elif op.startswith('$'): 

508 # Value ends with 

509 pattern = re.compile(r'.*?%s$' % re.escape(value), flags) 

510 elif op.startswith('*'): 

511 # Value contains 

512 pattern = re.compile(r'.*?%s.*' % re.escape(value), flags) 

513 elif op.startswith('~'): 

514 # Value contains word within space separated list 

515 # `~=` should match nothing if it is empty or contains whitespace, 

516 # so if either of these cases is present, use `[^\s\S]` which cannot be matched. 

517 value = r'[^\s\S]' if not value or RE_WS.search(value) else re.escape(value) 

518 pattern = re.compile(r'.*?(?:(?<=^)|(?<=[ \t\r\n\f]))%s(?=(?:[ \t\r\n\f]|$)).*' % value, flags) 

519 elif op.startswith('|'): 

520 # Value starts with word in dash separated list 

521 pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags) 

522 else: 

523 # Value matches 

524 pattern = re.compile(r'^%s$' % re.escape(value), flags) 

525 if op.startswith('!'): 

526 # Equivalent to `:not([attr=value])` 

527 inverse = True 

528 if is_type and pattern: 

529 pattern2 = re.compile(pattern.pattern) 

530 

531 # Append the attribute selector 

532 sel_attr = ct.SelectorAttribute(attr, ns, pattern, pattern2) 

533 if inverse: 

534 # If we are using `!=`, we need to nest the pattern under a `:not()`. 

535 sub_sel = _Selector() 

536 sub_sel.attributes.append(sel_attr) 

537 not_list = ct.SelectorList([sub_sel.freeze()], True, False) 

538 sel.selectors.append(not_list) 

539 else: 

540 sel.attributes.append(sel_attr) 

541 

542 has_selector = True 

543 return has_selector 

544 

545 def parse_tag_pattern(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

546 """Parse tag pattern from regex match.""" 

547 

548 prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None 

549 tag = css_unescape(m.group('tag_name')) 

550 sel.tag = ct.SelectorTag(tag, prefix) 

551 has_selector = True 

552 return has_selector 

553 

554 def parse_pseudo_class_custom(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

555 """ 

556 Parse custom pseudo class alias. 

557 

558 Compile custom selectors as we need them. When compiling a custom selector, 

559 set it to `None` in the dictionary so we can avoid an infinite loop. 

560 """ 

561 

562 pseudo = util.lower(css_unescape(m.group('name'))) 

563 selector = self.custom.get(pseudo) 

564 if selector is None: 

565 raise SelectorSyntaxError( 

566 "Undefined custom selector '{}' found at position {}".format(pseudo, m.end(0)), 

567 self.pattern, 

568 m.end(0) 

569 ) 

570 

571 if not isinstance(selector, ct.SelectorList): 

572 del self.custom[pseudo] 

573 selector = CSSParser( 

574 selector, custom=self.custom, flags=self.flags 

575 ).process_selectors(flags=FLG_PSEUDO) 

576 self.custom[pseudo] = selector 

577 

578 sel.selectors.append(selector) 

579 has_selector = True 

580 return has_selector 

581 

582 def parse_pseudo_class( 

583 self, 

584 sel: _Selector, 

585 m: Match[str], 

586 has_selector: bool, 

587 iselector: Iterator[tuple[str, Match[str]]], 

588 is_html: bool 

589 ) -> tuple[bool, bool]: 

590 """Parse pseudo class.""" 

591 

592 complex_pseudo = False 

593 pseudo = util.lower(css_unescape(m.group('name'))) 

594 if m.group('open'): 

595 complex_pseudo = True 

596 if complex_pseudo and pseudo in PSEUDO_COMPLEX: 

597 has_selector = self.parse_pseudo_open(sel, pseudo, has_selector, iselector, m.end(0)) 

598 elif not complex_pseudo and pseudo in PSEUDO_SIMPLE: 

599 if pseudo == ':root': 

600 sel.flags |= ct.SEL_ROOT 

601 elif pseudo == ':defined': 

602 sel.flags |= ct.SEL_DEFINED 

603 is_html = True 

604 elif pseudo == ':scope': 

605 sel.flags |= ct.SEL_SCOPE 

606 elif pseudo == ':empty': 

607 sel.flags |= ct.SEL_EMPTY 

608 elif pseudo in (':link', ':any-link'): 

609 sel.selectors.append(CSS_LINK) 

610 elif pseudo == ':checked': 

611 sel.selectors.append(CSS_CHECKED) 

612 elif pseudo == ':default': 

613 sel.selectors.append(CSS_DEFAULT) 

614 elif pseudo == ':indeterminate': 

615 sel.selectors.append(CSS_INDETERMINATE) 

616 elif pseudo == ":disabled": 

617 sel.selectors.append(CSS_DISABLED) 

618 elif pseudo == ":enabled": 

619 sel.selectors.append(CSS_ENABLED) 

620 elif pseudo == ":required": 

621 sel.selectors.append(CSS_REQUIRED) 

622 elif pseudo == ":optional": 

623 sel.selectors.append(CSS_OPTIONAL) 

624 elif pseudo == ":read-only": 

625 sel.selectors.append(CSS_READ_ONLY) 

626 elif pseudo == ":read-write": 

627 sel.selectors.append(CSS_READ_WRITE) 

628 elif pseudo == ":in-range": 

629 sel.selectors.append(CSS_IN_RANGE) 

630 elif pseudo == ":out-of-range": 

631 sel.selectors.append(CSS_OUT_OF_RANGE) 

632 elif pseudo == ":placeholder-shown": 

633 sel.selectors.append(CSS_PLACEHOLDER_SHOWN) 

634 elif pseudo == ':first-child': 

635 sel.nth.append(ct.SelectorNth(1, False, 0, False, False, ct.SelectorList())) 

636 elif pseudo == ':last-child': 

637 sel.nth.append(ct.SelectorNth(1, False, 0, False, True, ct.SelectorList())) 

638 elif pseudo == ':first-of-type': 

639 sel.nth.append(ct.SelectorNth(1, False, 0, True, False, ct.SelectorList())) 

640 elif pseudo == ':last-of-type': 

641 sel.nth.append(ct.SelectorNth(1, False, 0, True, True, ct.SelectorList())) 

642 elif pseudo == ':only-child': 

643 sel.nth.extend( 

644 [ 

645 ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()), 

646 ct.SelectorNth(1, False, 0, False, True, ct.SelectorList()) 

647 ] 

648 ) 

649 elif pseudo == ':only-of-type': 

650 sel.nth.extend( 

651 [ 

652 ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()), 

653 ct.SelectorNth(1, False, 0, True, True, ct.SelectorList()) 

654 ] 

655 ) 

656 has_selector = True 

657 elif complex_pseudo and pseudo in PSEUDO_COMPLEX_NO_MATCH: 

658 self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN) 

659 sel.no_match = True 

660 has_selector = True 

661 elif not complex_pseudo and pseudo in PSEUDO_SIMPLE_NO_MATCH: 

662 sel.no_match = True 

663 has_selector = True 

664 elif pseudo in PSEUDO_SUPPORTED: 

665 raise SelectorSyntaxError( 

666 "Invalid syntax for pseudo class '{}'".format(pseudo), 

667 self.pattern, 

668 m.start(0) 

669 ) 

670 else: 

671 raise NotImplementedError( 

672 "'{}' pseudo-class is not implemented at this time".format(pseudo) 

673 ) 

674 

675 return has_selector, is_html 

676 

677 def parse_pseudo_nth( 

678 self, 

679 sel: _Selector, 

680 m: Match[str], 

681 has_selector: bool, 

682 iselector: Iterator[tuple[str, Match[str]]] 

683 ) -> bool: 

684 """Parse `nth` pseudo.""" 

685 

686 mdict = m.groupdict() 

687 if mdict.get('pseudo_nth_child'): 

688 postfix = '_child' 

689 else: 

690 postfix = '_type' 

691 mdict['name'] = util.lower(css_unescape(mdict['name'])) 

692 content = util.lower(mdict.get('nth' + postfix)) 

693 if content == 'even': 

694 # 2n 

695 s1 = 2 

696 s2 = 0 

697 var = True 

698 elif content == 'odd': 

699 # 2n+1 

700 s1 = 2 

701 s2 = 1 

702 var = True 

703 else: 

704 nth_parts = cast(Match[str], RE_NTH.match(content)) 

705 _s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else '' 

706 a = nth_parts.group('a') 

707 var = a.endswith('n') 

708 if a.startswith('n'): 

709 _s1 += '1' 

710 elif var: 

711 _s1 += a[:-1] 

712 else: 

713 _s1 += a 

714 _s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else '' 

715 if nth_parts.group('b'): 

716 _s2 += nth_parts.group('b') 

717 else: 

718 _s2 = '0' 

719 s1 = int(_s1, 10) 

720 s2 = int(_s2, 10) 

721 

722 pseudo_sel = mdict['name'] 

723 if postfix == '_child': 

724 if m.group('of'): 

725 # Parse the rest of `of S`. 

726 nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN) 

727 else: 

728 # Use default `*|*` for `of S`. 

729 nth_sel = CSS_NTH_OF_S_DEFAULT 

730 if pseudo_sel == ':nth-child': 

731 sel.nth.append(ct.SelectorNth(s1, var, s2, False, False, nth_sel)) 

732 elif pseudo_sel == ':nth-last-child': 

733 sel.nth.append(ct.SelectorNth(s1, var, s2, False, True, nth_sel)) 

734 else: 

735 if pseudo_sel == ':nth-of-type': 

736 sel.nth.append(ct.SelectorNth(s1, var, s2, True, False, ct.SelectorList())) 

737 elif pseudo_sel == ':nth-last-of-type': 

738 sel.nth.append(ct.SelectorNth(s1, var, s2, True, True, ct.SelectorList())) 

739 has_selector = True 

740 return has_selector 

741 

742 def parse_pseudo_open( 

743 self, 

744 sel: _Selector, 

745 name: str, 

746 has_selector: bool, 

747 iselector: Iterator[tuple[str, Match[str]]], 

748 index: int 

749 ) -> bool: 

750 """Parse pseudo with opening bracket.""" 

751 

752 flags = FLG_PSEUDO | FLG_OPEN 

753 if name == ':not': 

754 flags |= FLG_NOT 

755 elif name == ':has': 

756 flags |= FLG_RELATIVE 

757 elif name in (':where', ':is'): 

758 flags |= FLG_FORGIVE 

759 

760 sel.selectors.append(self.parse_selectors(iselector, index, flags)) 

761 has_selector = True 

762 

763 return has_selector 

764 

765 def parse_has_combinator( 

766 self, 

767 sel: _Selector, 

768 m: Match[str], 

769 has_selector: bool, 

770 selectors: list[_Selector], 

771 rel_type: str, 

772 index: int 

773 ) -> tuple[bool, _Selector, str]: 

774 """Parse combinator tokens.""" 

775 

776 combinator = m.group('relation').strip() 

777 if not combinator: 

778 combinator = WS_COMBINATOR 

779 if combinator == COMMA_COMBINATOR: 

780 sel.rel_type = rel_type 

781 selectors[-1].relations.append(sel) 

782 rel_type = ":" + WS_COMBINATOR 

783 selectors.append(_Selector()) 

784 else: 

785 if has_selector: 

786 # End the current selector and associate the leading combinator with this selector. 

787 sel.rel_type = rel_type 

788 selectors[-1].relations.append(sel) 

789 elif rel_type[1:] != WS_COMBINATOR: 

790 # It's impossible to have two whitespace combinators after each other as the patterns 

791 # will gobble up trailing whitespace. It is also impossible to have a whitespace 

792 # combinator after any other kind for the same reason. But we could have 

793 # multiple non-whitespace combinators. So if the current combinator is not a whitespace, 

794 # then we've hit the multiple combinator case, so we should fail. 

795 raise SelectorSyntaxError( 

796 'The multiple combinators at position {}'.format(index), 

797 self.pattern, 

798 index 

799 ) 

800 

801 # Set the leading combinator for the next selector. 

802 rel_type = ':' + combinator 

803 

804 sel = _Selector() 

805 has_selector = False 

806 return has_selector, sel, rel_type 

807 

808 def parse_combinator( 

809 self, 

810 sel: _Selector, 

811 m: Match[str], 

812 has_selector: bool, 

813 selectors: list[_Selector], 

814 relations: list[_Selector], 

815 is_pseudo: bool, 

816 is_forgive: bool, 

817 index: int 

818 ) -> tuple[bool, _Selector]: 

819 """Parse combinator tokens.""" 

820 

821 combinator = m.group('relation').strip() 

822 if not combinator: 

823 combinator = WS_COMBINATOR 

824 if not has_selector: 

825 if not is_forgive or combinator != COMMA_COMBINATOR: 

826 raise SelectorSyntaxError( 

827 "The combinator '{}' at position {}, must have a selector before it".format(combinator, index), 

828 self.pattern, 

829 index 

830 ) 

831 

832 # If we are in a forgiving pseudo class, just make the selector a "no match" 

833 if combinator == COMMA_COMBINATOR: 

834 sel.no_match = True 

835 del relations[:] 

836 selectors.append(sel) 

837 else: 

838 if combinator == COMMA_COMBINATOR: 

839 if not sel.tag and not is_pseudo: 

840 # Implied `*` 

841 sel.tag = ct.SelectorTag('*', None) 

842 sel.relations.extend(relations) 

843 selectors.append(sel) 

844 del relations[:] 

845 else: 

846 sel.relations.extend(relations) 

847 sel.rel_type = combinator 

848 del relations[:] 

849 relations.append(sel) 

850 

851 sel = _Selector() 

852 has_selector = False 

853 

854 return has_selector, sel 

855 

856 def parse_class_id(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

857 """Parse HTML classes and ids.""" 

858 

859 selector = m.group(0) 

860 if selector.startswith('.'): 

861 sel.classes.append(css_unescape(selector[1:])) 

862 else: 

863 sel.ids.append(css_unescape(selector[1:])) 

864 has_selector = True 

865 return has_selector 

866 

867 def parse_pseudo_contains(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

868 """Parse contains.""" 

869 

870 pseudo = util.lower(css_unescape(m.group('name'))) 

871 if pseudo == ":contains": 

872 warnings.warn( 

873 "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.", 

874 FutureWarning 

875 ) 

876 contains_own = pseudo == ":-soup-contains-own" 

877 values = css_unescape(m.group('values')) 

878 patterns = [] 

879 for token in RE_VALUES.finditer(values): 

880 if token.group('split'): 

881 continue 

882 value = token.group('value') 

883 if value.startswith(("'", '"')): 

884 value = css_unescape(value[1:-1], True) 

885 else: 

886 value = css_unescape(value) 

887 patterns.append(value) 

888 sel.contains.append(ct.SelectorContains(patterns, contains_own)) 

889 has_selector = True 

890 return has_selector 

891 

892 def parse_pseudo_lang(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

893 """Parse pseudo language.""" 

894 

895 values = m.group('values') 

896 patterns = [] 

897 for token in RE_VALUES.finditer(values): 

898 if token.group('split'): 

899 continue 

900 value = token.group('value') 

901 if value.startswith(('"', "'")): 

902 value = css_unescape(value[1:-1], True) 

903 else: 

904 value = css_unescape(value) 

905 

906 patterns.append(value) 

907 

908 sel.lang.append(ct.SelectorLang(patterns)) 

909 has_selector = True 

910 

911 return has_selector 

912 

913 def parse_pseudo_dir(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

914 """Parse pseudo direction.""" 

915 

916 value = ct.SEL_DIR_LTR if util.lower(m.group('dir')) == 'ltr' else ct.SEL_DIR_RTL 

917 sel.flags |= value 

918 has_selector = True 

919 return has_selector 

920 

921 def parse_selectors( 

922 self, 

923 iselector: Iterator[tuple[str, Match[str]]], 

924 index: int = 0, 

925 flags: int = 0 

926 ) -> ct.SelectorList: 

927 """Parse selectors.""" 

928 

929 # Initialize important variables 

930 sel = _Selector() 

931 selectors = [] 

932 has_selector = False 

933 closed = False 

934 relations = [] # type: list[_Selector] 

935 rel_type = ":" + WS_COMBINATOR 

936 

937 # Setup various flags 

938 is_open = bool(flags & FLG_OPEN) 

939 is_pseudo = bool(flags & FLG_PSEUDO) 

940 is_relative = bool(flags & FLG_RELATIVE) 

941 is_not = bool(flags & FLG_NOT) 

942 is_html = bool(flags & FLG_HTML) 

943 is_default = bool(flags & FLG_DEFAULT) 

944 is_indeterminate = bool(flags & FLG_INDETERMINATE) 

945 is_in_range = bool(flags & FLG_IN_RANGE) 

946 is_out_of_range = bool(flags & FLG_OUT_OF_RANGE) 

947 is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN) 

948 is_forgive = bool(flags & FLG_FORGIVE) 

949 

950 # Print out useful debug stuff 

951 if self.debug: # pragma: no cover 

952 if is_pseudo: 

953 print(' is_pseudo: True') 

954 if is_open: 

955 print(' is_open: True') 

956 if is_relative: 

957 print(' is_relative: True') 

958 if is_not: 

959 print(' is_not: True') 

960 if is_html: 

961 print(' is_html: True') 

962 if is_default: 

963 print(' is_default: True') 

964 if is_indeterminate: 

965 print(' is_indeterminate: True') 

966 if is_in_range: 

967 print(' is_in_range: True') 

968 if is_out_of_range: 

969 print(' is_out_of_range: True') 

970 if is_placeholder_shown: 

971 print(' is_placeholder_shown: True') 

972 if is_forgive: 

973 print(' is_forgive: True') 

974 

975 # The algorithm for relative selectors require an initial selector in the selector list 

976 if is_relative: 

977 selectors.append(_Selector()) 

978 

979 try: 

980 while True: 

981 key, m = next(iselector) 

982 

983 # Handle parts 

984 if key == "at_rule": 

985 raise NotImplementedError("At-rules found at position {}".format(m.start(0))) 

986 elif key == 'pseudo_class_custom': 

987 has_selector = self.parse_pseudo_class_custom(sel, m, has_selector) 

988 elif key == 'pseudo_class': 

989 has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html) 

990 elif key == 'pseudo_element': 

991 raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0))) 

992 elif key == 'pseudo_contains': 

993 has_selector = self.parse_pseudo_contains(sel, m, has_selector) 

994 elif key in ('pseudo_nth_type', 'pseudo_nth_child'): 

995 has_selector = self.parse_pseudo_nth(sel, m, has_selector, iselector) 

996 elif key == 'pseudo_lang': 

997 has_selector = self.parse_pseudo_lang(sel, m, has_selector) 

998 elif key == 'pseudo_dir': 

999 has_selector = self.parse_pseudo_dir(sel, m, has_selector) 

1000 # Currently only supports HTML 

1001 is_html = True 

1002 elif key == 'pseudo_close': 

1003 if not has_selector: 

1004 if not is_forgive: 

1005 raise SelectorSyntaxError( 

1006 "Expected a selector at position {}".format(m.start(0)), 

1007 self.pattern, 

1008 m.start(0) 

1009 ) 

1010 sel.no_match = True 

1011 if is_open: 

1012 closed = True 

1013 break 

1014 else: 

1015 raise SelectorSyntaxError( 

1016 "Unmatched pseudo-class close at position {}".format(m.start(0)), 

1017 self.pattern, 

1018 m.start(0) 

1019 ) 

1020 elif key == 'combine': 

1021 if is_relative: 

1022 has_selector, sel, rel_type = self.parse_has_combinator( 

1023 sel, m, has_selector, selectors, rel_type, index 

1024 ) 

1025 else: 

1026 has_selector, sel = self.parse_combinator( 

1027 sel, m, has_selector, selectors, relations, is_pseudo, is_forgive, index 

1028 ) 

1029 elif key == 'attribute': 

1030 has_selector = self.parse_attribute_selector(sel, m, has_selector) 

1031 elif key == 'tag': 

1032 if has_selector: 

1033 raise SelectorSyntaxError( 

1034 "Tag name found at position {} instead of at the start".format(m.start(0)), 

1035 self.pattern, 

1036 m.start(0) 

1037 ) 

1038 has_selector = self.parse_tag_pattern(sel, m, has_selector) 

1039 elif key in ('class', 'id'): 

1040 has_selector = self.parse_class_id(sel, m, has_selector) 

1041 

1042 index = m.end(0) 

1043 except StopIteration: 

1044 pass 

1045 

1046 # Handle selectors that are not closed 

1047 if is_open and not closed: 

1048 raise SelectorSyntaxError( 

1049 "Unclosed pseudo-class at position {}".format(index), 

1050 self.pattern, 

1051 index 

1052 ) 

1053 

1054 # Cleanup completed selector piece 

1055 if has_selector: 

1056 if not sel.tag and not is_pseudo: 

1057 # Implied `*` 

1058 sel.tag = ct.SelectorTag('*', None) 

1059 if is_relative: 

1060 sel.rel_type = rel_type 

1061 selectors[-1].relations.append(sel) 

1062 else: 

1063 sel.relations.extend(relations) 

1064 del relations[:] 

1065 selectors.append(sel) 

1066 

1067 # Forgive empty slots in pseudo-classes that have lists (and are forgiving) 

1068 elif is_forgive and (not selectors or not relations): 

1069 # Handle normal pseudo-classes with empty slots like `:is()` etc. 

1070 sel.no_match = True 

1071 del relations[:] 

1072 selectors.append(sel) 

1073 has_selector = True 

1074 

1075 if not has_selector: 

1076 # We will always need to finish a selector when `:has()` is used as it leads with combining. 

1077 # May apply to others as well. 

1078 raise SelectorSyntaxError( 

1079 'Expected a selector at position {}'.format(index), 

1080 self.pattern, 

1081 index 

1082 ) 

1083 

1084 # Some patterns require additional logic, such as default. We try to make these the 

1085 # last pattern, and append the appropriate flag to that selector which communicates 

1086 # to the matcher what additional logic is required. 

1087 if is_default: 

1088 selectors[-1].flags = ct.SEL_DEFAULT 

1089 if is_indeterminate: 

1090 selectors[-1].flags = ct.SEL_INDETERMINATE 

1091 if is_in_range: 

1092 selectors[-1].flags = ct.SEL_IN_RANGE 

1093 if is_out_of_range: 

1094 selectors[-1].flags = ct.SEL_OUT_OF_RANGE 

1095 if is_placeholder_shown: 

1096 selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN 

1097 

1098 # Return selector list 

1099 return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html) 

1100 

1101 def selector_iter(self, pattern: str) -> Iterator[tuple[str, Match[str]]]: 

1102 """Iterate selector tokens.""" 

1103 

1104 # Ignore whitespace and comments at start and end of pattern 

1105 m = RE_WS_BEGIN.search(pattern) 

1106 index = m.end(0) if m else 0 

1107 m = RE_WS_END.search(pattern) 

1108 end = (m.start(0) - 1) if m else (len(pattern) - 1) 

1109 

1110 if self.debug: # pragma: no cover 

1111 print('## PARSING: {!r}'.format(pattern)) 

1112 while index <= end: 

1113 m = None 

1114 for v in self.css_tokens: 

1115 m = v.match(pattern, index, self.flags) 

1116 if m: 

1117 name = v.get_name() 

1118 if self.debug: # pragma: no cover 

1119 print("TOKEN: '{}' --> {!r} at position {}".format(name, m.group(0), m.start(0))) 

1120 index = m.end(0) 

1121 yield name, m 

1122 break 

1123 if m is None: 

1124 c = pattern[index] 

1125 # If the character represents the start of one of the known selector types, 

1126 # throw an exception mentioning that the known selector type is in error; 

1127 # otherwise, report the invalid character. 

1128 if c == '[': 

1129 msg = "Malformed attribute selector at position {}".format(index) 

1130 elif c == '.': 

1131 msg = "Malformed class selector at position {}".format(index) 

1132 elif c == '#': 

1133 msg = "Malformed id selector at position {}".format(index) 

1134 elif c == ':': 

1135 msg = "Malformed pseudo-class selector at position {}".format(index) 

1136 else: 

1137 msg = "Invalid character {!r} position {}".format(c, index) 

1138 raise SelectorSyntaxError(msg, self.pattern, index) 

1139 if self.debug: # pragma: no cover 

1140 print('## END PARSING') 

1141 

1142 def process_selectors(self, index: int = 0, flags: int = 0) -> ct.SelectorList: 

1143 """Process selectors.""" 

1144 

1145 return self.parse_selectors(self.selector_iter(self.pattern), index, flags) 

1146 

1147 

1148# Precompile CSS selector lists for pseudo-classes (additional logic may be required beyond the pattern) 

1149# A few patterns are order dependent as they use patterns previous compiled. 

1150 

1151# CSS pattern for `:link` and `:any-link` 

1152CSS_LINK = CSSParser( 

1153 'html|*:is(a, area)[href]' 

1154).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1155# CSS pattern for `:checked` 

1156CSS_CHECKED = CSSParser( 

1157 ''' 

1158 html|*:is(input[type=checkbox], input[type=radio])[checked], html|option[selected] 

1159 ''' 

1160).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1161# CSS pattern for `:default` (must compile CSS_CHECKED first) 

1162CSS_DEFAULT = CSSParser( 

1163 ''' 

1164 :checked, 

1165 

1166 /* 

1167 This pattern must be at the end. 

1168 Special logic is applied to the last selector. 

1169 */ 

1170 html|form html|*:is(button, input)[type="submit"] 

1171 ''' 

1172).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_DEFAULT) 

1173# CSS pattern for `:indeterminate` 

1174CSS_INDETERMINATE = CSSParser( 

1175 ''' 

1176 html|input[type="checkbox"][indeterminate], 

1177 html|input[type="radio"]:is(:not([name]), [name=""]):not([checked]), 

1178 html|progress:not([value]), 

1179 

1180 /* 

1181 This pattern must be at the end. 

1182 Special logic is applied to the last selector. 

1183 */ 

1184 html|input[type="radio"][name]:not([name='']):not([checked]) 

1185 ''' 

1186).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE) 

1187# CSS pattern for `:disabled` 

1188CSS_DISABLED = CSSParser( 

1189 ''' 

1190 html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled], 

1191 html|optgroup[disabled] > html|option, 

1192 html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset), 

1193 html|fieldset[disabled] > 

1194 html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset) 

1195 ''' 

1196).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1197# CSS pattern for `:enabled` 

1198CSS_ENABLED = CSSParser( 

1199 ''' 

1200 html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled) 

1201 ''' 

1202).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1203# CSS pattern for `:required` 

1204CSS_REQUIRED = CSSParser( 

1205 'html|*:is(input, textarea, select)[required]' 

1206).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1207# CSS pattern for `:optional` 

1208CSS_OPTIONAL = CSSParser( 

1209 'html|*:is(input, textarea, select):not([required])' 

1210).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1211# CSS pattern for `:placeholder-shown` 

1212CSS_PLACEHOLDER_SHOWN = CSSParser( 

1213 ''' 

1214 html|input:is( 

1215 :not([type]), 

1216 [type=""], 

1217 [type=text], 

1218 [type=search], 

1219 [type=url], 

1220 [type=tel], 

1221 [type=email], 

1222 [type=password], 

1223 [type=number] 

1224 )[placeholder]:not([placeholder='']):is(:not([value]), [value=""]), 

1225 html|textarea[placeholder]:not([placeholder='']) 

1226 ''' 

1227).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN) 

1228# CSS pattern default for `:nth-child` "of S" feature 

1229CSS_NTH_OF_S_DEFAULT = CSSParser( 

1230 '*|*' 

1231).process_selectors(flags=FLG_PSEUDO) 

1232# CSS pattern for `:read-write` (CSS_DISABLED must be compiled first) 

1233CSS_READ_WRITE = CSSParser( 

1234 ''' 

1235 html|*:is( 

1236 textarea, 

1237 input:is( 

1238 :not([type]), 

1239 [type=""], 

1240 [type=text], 

1241 [type=search], 

1242 [type=url], 

1243 [type=tel], 

1244 [type=email], 

1245 [type=number], 

1246 [type=password], 

1247 [type=date], 

1248 [type=datetime-local], 

1249 [type=month], 

1250 [type=time], 

1251 [type=week] 

1252 ) 

1253 ):not([readonly], :disabled), 

1254 html|*:is([contenteditable=""], [contenteditable="true" i]) 

1255 ''' 

1256).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1257# CSS pattern for `:read-only` 

1258CSS_READ_ONLY = CSSParser( 

1259 ''' 

1260 html|*:not(:read-write) 

1261 ''' 

1262).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1263# CSS pattern for `:in-range` 

1264CSS_IN_RANGE = CSSParser( 

1265 ''' 

1266 html|input:is( 

1267 [type="date"], 

1268 [type="month"], 

1269 [type="week"], 

1270 [type="time"], 

1271 [type="datetime-local"], 

1272 [type="number"], 

1273 [type="range"] 

1274 ):is( 

1275 [min], 

1276 [max] 

1277 ) 

1278 ''' 

1279).process_selectors(flags=FLG_PSEUDO | FLG_IN_RANGE | FLG_HTML) 

1280# CSS pattern for `:out-of-range` 

1281CSS_OUT_OF_RANGE = CSSParser( 

1282 ''' 

1283 html|input:is( 

1284 [type="date"], 

1285 [type="month"], 

1286 [type="week"], 

1287 [type="time"], 

1288 [type="datetime-local"], 

1289 [type="number"], 

1290 [type="range"] 

1291 ):is( 

1292 [min], 

1293 [max] 

1294 ) 

1295 ''' 

1296).process_selectors(flags=FLG_PSEUDO | FLG_OUT_OF_RANGE | FLG_HTML)