Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/soupsieve/css_parser.py: 82%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

595 statements  

1"""CSS selector parser.""" 

2from __future__ import annotations 

3import re 

4from functools import lru_cache 

5from . import util 

6from . import css_match as cm 

7from . import css_types as ct 

8from .util import SelectorSyntaxError 

9import warnings 

10from typing import Match, Any, Iterator, cast 

11 

12UNICODE_REPLACEMENT_CHAR = 0xFFFD 

13 

14# Simple pseudo classes that take no parameters 

15PSEUDO_SIMPLE = { 

16 ":any-link", 

17 ":empty", 

18 ":first-child", 

19 ":first-of-type", 

20 ":in-range", 

21 ":open", 

22 ":out-of-range", 

23 ":last-child", 

24 ":last-of-type", 

25 ":link", 

26 ":only-child", 

27 ":only-of-type", 

28 ":root", 

29 ':checked', 

30 ':default', 

31 ':disabled', 

32 ':enabled', 

33 ':indeterminate', 

34 ':optional', 

35 ':placeholder-shown', 

36 ':read-only', 

37 ':read-write', 

38 ':required', 

39 ':scope', 

40 ':defined', 

41 ':muted' 

42} 

43 

44# Supported, simple pseudo classes that match nothing in the Soup Sieve environment 

45PSEUDO_SIMPLE_NO_MATCH = { 

46 ':active', 

47 ':autofill', 

48 ':buffering', 

49 ':current', 

50 ':focus', 

51 ':focus-visible', 

52 ':focus-within', 

53 ':fullscreen', 

54 ':future', 

55 ':host', 

56 ':hover', 

57 ':local-link', 

58 ':past', 

59 ':paused', 

60 ':picture-in-picture', 

61 ':playing', 

62 ':popover-open', 

63 ':seeking', 

64 ':stalled', 

65 ':target', 

66 ':target-within', 

67 ':user-invalid', 

68 ':volume-locked', 

69 ':visited' 

70} 

71 

72# Complex pseudo classes that take selector lists 

73PSEUDO_COMPLEX = { 

74 ':contains', 

75 ':-soup-contains', 

76 ':-soup-contains-own', 

77 ':has', 

78 ':is', 

79 ':matches', 

80 ':not', 

81 ':where' 

82} 

83 

84PSEUDO_COMPLEX_NO_MATCH = { 

85 ':current', 

86 ':host', 

87 ':host-context' 

88} 

89 

90# Complex pseudo classes that take very specific parameters and are handled special 

91PSEUDO_SPECIAL = { 

92 ':dir', 

93 ':lang', 

94 ':nth-child', 

95 ':nth-last-child', 

96 ':nth-last-of-type', 

97 ':nth-of-type' 

98} 

99 

100PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSEUDO_COMPLEX_NO_MATCH | PSEUDO_SPECIAL 

101 

102# Sub-patterns parts 

103# Whitespace 

104NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])' 

105WS = fr'(?:[ \t]|{NEWLINE})' 

106# Comments 

107COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' 

108# Whitespace with comments included 

109WSC = fr'(?:{WS}|{COMMENTS})' 

110# CSS escapes 

111CSS_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$))' 

112CSS_STRING_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$|{NEWLINE}))' 

113# CSS Identifier 

114IDENTIFIER = fr''' 

115(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})+|--) 

116(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})*) 

117''' 

118# `nth` content 

119NTH = fr'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){WSC}*(?:[-+]){WSC}*(?:[0-9]+))?' 

120# Value: quoted string or identifier 

121VALUE = fr'''(?:"(?:\\(?:.|{NEWLINE})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{NEWLINE})|[^\\'\r\n\f]+)*?'|{IDENTIFIER}+)''' 

122# Attribute value comparison. `!=` is handled special as it is non-standard. 

123ATTR = fr'(?:{WSC}*(?P<cmp>[!~^|*$]?=){WSC}*(?P<value>{VALUE})(?:{WSC}*(?P<case>[is]))?)?{WSC}*\]' 

124 

125# Selector patterns 

126# IDs (`#id`) 

127PAT_ID = fr'\#{IDENTIFIER}' 

128# Classes (`.class`) 

129PAT_CLASS = fr'\.{IDENTIFIER}' 

130# Prefix:Tag (`prefix|tag`) 

131PAT_TAG = fr'(?P<tag_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<tag_name>{IDENTIFIER}|\*)' 

132# Attributes (`[attr]`, `[attr=value]`, etc.) 

133PAT_ATTR = fr'\[{WSC}*(?P<attr_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<attr_name>{IDENTIFIER}){ATTR}' 

134# Pseudo class (`:pseudo-class`, `:pseudo-class(`) 

135PAT_PSEUDO_CLASS = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)?' 

136# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes. 

137PAT_PSEUDO_CLASS_SPECIAL = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)' 

138# Custom pseudo class (`:--custom-pseudo`) 

139PAT_PSEUDO_CLASS_CUSTOM = fr'(?P<name>:(?=--){IDENTIFIER})' 

140# Nesting ampersand selector. Matches `&` 

141PAT_AMP = r'&' 

142# Closing pseudo group (`)`) 

143PAT_PSEUDO_CLOSE = fr'{WSC}*\)' 

144# Pseudo element (`::pseudo-element`) 

145PAT_PSEUDO_ELEMENT = fr':{PAT_PSEUDO_CLASS}' 

146# At rule (`@page`, etc.) (not supported) 

147PAT_AT_RULE = fr'@P{IDENTIFIER}' 

148# Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.) 

149PAT_PSEUDO_NTH_CHILD = fr''' 

150(?P<pseudo_nth_child>{PAT_PSEUDO_CLASS_SPECIAL} 

151(?P<nth_child>{NTH}|even|odd))(?:{WSC}*\)|(?P<of>{COMMENTS}*{WS}{WSC}*of{COMMENTS}*{WS}{WSC}*)) 

152''' 

153# Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.) 

154PAT_PSEUDO_NTH_TYPE = fr''' 

155(?P<pseudo_nth_type>{PAT_PSEUDO_CLASS_SPECIAL} 

156(?P<nth_type>{NTH}|even|odd)){WSC}*\) 

157''' 

158# Pseudo class language (`:lang("*-de", en)`) 

159PAT_PSEUDO_LANG = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)' 

160# Pseudo class direction (`:dir(ltr)`) 

161PAT_PSEUDO_DIR = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<dir>ltr|rtl){WSC}*\)' 

162# Combining characters (`>`, `~`, ` `, `+`, `,`) 

163PAT_COMBINE = fr'{WSC}*?(?P<relation>[,+>~]|{WS}(?![,+>~])){WSC}*' 

164# Extra: Contains (`:contains(text)`) 

165PAT_PSEUDO_CONTAINS = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)' 

166 

167# Regular expressions 

168# CSS escape pattern 

169RE_CSS_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WSC}?)|(\\[^\r\n\f])|(\\$))', re.I) 

170RE_CSS_STR_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WS}?)|(\\[^\r\n\f])|(\\$)|(\\{NEWLINE}))', re.I) 

171# Pattern to break up `nth` specifiers 

172RE_NTH = re.compile(fr'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){WSC}*(?P<s2>[-+]){WSC}*(?P<b>[0-9]+))?', re.I) 

173# Pattern to iterate multiple values. 

174RE_VALUES = re.compile(fr'(?:(?P<value>{VALUE})|(?P<split>{WSC}*,{WSC}*))', re.X) 

175# Whitespace checks 

176RE_WS = re.compile(WS) 

177RE_WS_BEGIN = re.compile(fr'^{WSC}*') 

178RE_WS_END = re.compile(fr'{WSC}*$') 

179RE_CUSTOM = re.compile(fr'^{PAT_PSEUDO_CLASS_CUSTOM}$', re.X) 

180 

181# Constants 

182# List split token 

183COMMA_COMBINATOR = ',' 

184# Relation token for descendant 

185WS_COMBINATOR = " " 

186 

187# Parse flags 

188FLG_PSEUDO = 0x01 

189FLG_NOT = 0x02 

190FLG_RELATIVE = 0x04 

191FLG_DEFAULT = 0x08 

192FLG_HTML = 0x10 

193FLG_INDETERMINATE = 0x20 

194FLG_OPEN = 0x40 

195FLG_IN_RANGE = 0x80 

196FLG_OUT_OF_RANGE = 0x100 

197FLG_PLACEHOLDER_SHOWN = 0x200 

198FLG_FORGIVE = 0x400 

199 

200# Maximum cached patterns to store 

201_MAXCACHE = 500 

202 

203 

204@lru_cache(maxsize=_MAXCACHE) 

205def _cached_css_compile( 

206 pattern: str, 

207 namespaces: ct.Namespaces | None, 

208 custom: ct.CustomSelectors | None, 

209 flags: int 

210) -> cm.SoupSieve: 

211 """Cached CSS compile.""" 

212 

213 custom_selectors = process_custom(custom) 

214 return cm.SoupSieve( 

215 pattern, 

216 CSSParser( 

217 pattern, 

218 custom=custom_selectors, 

219 flags=flags 

220 ).process_selectors(), 

221 namespaces, 

222 custom, 

223 flags 

224 ) 

225 

226 

227def _purge_cache() -> None: 

228 """Purge the cache.""" 

229 

230 _cached_css_compile.cache_clear() 

231 

232 

233def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.SelectorList]: 

234 """Process custom.""" 

235 

236 custom_selectors = {} 

237 if custom is not None: 

238 for key, value in custom.items(): 

239 name = util.lower(key) 

240 if RE_CUSTOM.match(name) is None: 

241 raise SelectorSyntaxError(f"The name '{name}' is not a valid custom pseudo-class name") 

242 if name in custom_selectors: 

243 raise KeyError(f"The custom selector '{name}' has already been registered") 

244 custom_selectors[css_unescape(name)] = value 

245 return custom_selectors 

246 

247 

248def css_unescape(content: str, string: bool = False) -> str: 

249 """ 

250 Unescape CSS value. 

251 

252 Strings allow for spanning the value on multiple strings by escaping a new line. 

253 """ 

254 

255 def replace(m: Match[str]) -> str: 

256 """Replace with the appropriate substitute.""" 

257 

258 if m.group(1): 

259 codepoint = int(m.group(1)[1:], 16) 

260 if codepoint == 0: 

261 codepoint = UNICODE_REPLACEMENT_CHAR 

262 value = chr(codepoint) 

263 elif m.group(2): 

264 value = m.group(2)[1:] 

265 elif m.group(3): 

266 value = '\ufffd' 

267 else: 

268 value = '' 

269 

270 return value 

271 

272 return (RE_CSS_ESC if not string else RE_CSS_STR_ESC).sub(replace, content) 

273 

274 

275def escape(ident: str) -> str: 

276 """Escape identifier.""" 

277 

278 string = [] 

279 length = len(ident) 

280 start_dash = length > 0 and ident[0] == '-' 

281 if length == 1 and start_dash: 

282 # Need to escape identifier that is a single `-` with no other characters 

283 string.append(f'\\{ident}') 

284 else: 

285 for index, c in enumerate(ident): 

286 codepoint = ord(c) 

287 if codepoint == 0x00: 

288 string.append('\ufffd') 

289 elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F: 

290 string.append(f'\\{codepoint:x} ') 

291 elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39): 

292 string.append(f'\\{codepoint:x} ') 

293 elif ( 

294 codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or 

295 (0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A) 

296 ): 

297 string.append(c) 

298 else: 

299 string.append(f'\\{c}') 

300 return ''.join(string) 

301 

302 

303class SelectorPattern: 

304 """Selector pattern.""" 

305 

306 def __init__(self, name: str, pattern: str) -> None: 

307 """Initialize.""" 

308 

309 self.name = name 

310 self.re_pattern = re.compile(pattern, re.I | re.X | re.U) 

311 

312 def get_name(self) -> str: 

313 """Get name.""" 

314 

315 return self.name 

316 

317 def match(self, selector: str, index: int, flags: int) -> Match[str] | None: 

318 """Match the selector.""" 

319 

320 return self.re_pattern.match(selector, index) 

321 

322 

323class SpecialPseudoPattern(SelectorPattern): 

324 """Selector pattern.""" 

325 

326 def __init__(self, patterns: tuple[tuple[str, tuple[str, ...], str, type[SelectorPattern]], ...]) -> None: 

327 """Initialize.""" 

328 

329 self.patterns = {} 

330 for p in patterns: 

331 name = p[0] 

332 pattern = p[3](name, p[2]) 

333 for pseudo in p[1]: 

334 self.patterns[pseudo] = pattern 

335 

336 self.matched_name = None # type: SelectorPattern | None 

337 self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U) 

338 

339 def get_name(self) -> str: 

340 """Get name.""" 

341 

342 return '' if self.matched_name is None else self.matched_name.get_name() 

343 

344 def match(self, selector: str, index: int, flags: int) -> Match[str] | None: 

345 """Match the selector.""" 

346 

347 pseudo = None 

348 m = self.re_pseudo_name.match(selector, index) 

349 if m: 

350 name = util.lower(css_unescape(m.group('name'))) 

351 pattern = self.patterns.get(name) 

352 if pattern: 

353 pseudo = pattern.match(selector, index, flags) 

354 if pseudo: 

355 self.matched_name = pattern 

356 

357 return pseudo 

358 

359 

360class _Selector: 

361 """ 

362 Intermediate selector class. 

363 

364 This stores selector data for a compound selector as we are acquiring them. 

365 Once we are done collecting the data for a compound selector, we freeze 

366 the data in an object that can be pickled and hashed. 

367 """ 

368 

369 def __init__(self, **kwargs: Any) -> None: 

370 """Initialize.""" 

371 

372 self.tag = kwargs.get('tag', None) # type: ct.SelectorTag | None 

373 self.ids = kwargs.get('ids', []) # type: list[str] 

374 self.classes = kwargs.get('classes', []) # type: list[str] 

375 self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute] 

376 self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth] 

377 self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList] 

378 self.relations = kwargs.get('relations', []) # type: list[_Selector] 

379 self.rel_type = kwargs.get('rel_type', None) # type: str | None 

380 self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains] 

381 self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang] 

382 self.flags = kwargs.get('flags', 0) # type: int 

383 self.no_match = kwargs.get('no_match', False) # type: bool 

384 

385 def _freeze_relations(self, relations: list[_Selector]) -> ct.SelectorList: 

386 """Freeze relation.""" 

387 

388 if relations: 

389 sel = relations[0] 

390 sel.relations.extend(relations[1:]) 

391 return ct.SelectorList([sel.freeze()]) 

392 else: 

393 return ct.SelectorList() 

394 

395 def freeze(self) -> ct.Selector | ct.SelectorNull: 

396 """Freeze self.""" 

397 

398 if self.no_match: 

399 return ct.SelectorNull() 

400 else: 

401 return ct.Selector( 

402 self.tag, 

403 tuple(self.ids), 

404 tuple(self.classes), 

405 tuple(self.attributes), 

406 tuple(self.nth), 

407 tuple(self.selectors), 

408 self._freeze_relations(self.relations), 

409 self.rel_type, 

410 tuple(self.contains), 

411 tuple(self.lang), 

412 self.flags 

413 ) 

414 

415 def __str__(self) -> str: # pragma: no cover 

416 """String representation.""" 

417 

418 return ( 

419 f'_Selector(tag={self.tag!r}, ids={self.ids!r}, classes={self.classes!r}, attributes={self.attributes!r}, ' 

420 f'nth={self.nth!r}, selectors={self.selectors!r}, relations={self.relations!r}, ' 

421 f'rel_type={self.rel_type!r}, contains={self.contains!r}, lang={self.lang!r}, flags={self.flags!r}, ' 

422 f'no_match={self.no_match!r})' 

423 ) 

424 

425 __repr__ = __str__ 

426 

427 

428class CSSParser: 

429 """Parse CSS selectors.""" 

430 

431 css_tokens = ( 

432 SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE), 

433 SpecialPseudoPattern( 

434 ( 

435 ( 

436 "pseudo_contains", 

437 (':contains', ':-soup-contains', ':-soup-contains-own'), 

438 PAT_PSEUDO_CONTAINS, 

439 SelectorPattern 

440 ), 

441 ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern), 

442 ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern), 

443 ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern), 

444 ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern) 

445 ) 

446 ), 

447 SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM), 

448 SelectorPattern("pseudo_class", PAT_PSEUDO_CLASS), 

449 SelectorPattern("pseudo_element", PAT_PSEUDO_ELEMENT), 

450 SelectorPattern("amp", PAT_AMP), 

451 SelectorPattern("at_rule", PAT_AT_RULE), 

452 SelectorPattern("id", PAT_ID), 

453 SelectorPattern("class", PAT_CLASS), 

454 SelectorPattern("tag", PAT_TAG), 

455 SelectorPattern("attribute", PAT_ATTR), 

456 SelectorPattern("combine", PAT_COMBINE) 

457 ) 

458 

459 def __init__( 

460 self, 

461 selector: str, 

462 custom: dict[str, str | ct.SelectorList] | None = None, 

463 flags: int = 0 

464 ) -> None: 

465 """Initialize.""" 

466 

467 self.pattern = selector.replace('\x00', '\ufffd') 

468 self.flags = flags 

469 self.debug = self.flags & util.DEBUG 

470 self.custom = {} if custom is None else custom 

471 

472 def parse_attribute_selector(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

473 """Create attribute selector from the returned regex match.""" 

474 

475 inverse = False 

476 op = m.group('cmp') 

477 case = util.lower(m.group('case')) if m.group('case') else None 

478 ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else '' 

479 attr = css_unescape(m.group('attr_name')) 

480 is_type = False 

481 pattern2 = None 

482 value = '' 

483 

484 if case: 

485 flags = (re.I if case == 'i' else 0) | re.DOTALL 

486 elif util.lower(attr) == 'type': 

487 flags = re.I | re.DOTALL 

488 is_type = True 

489 else: 

490 flags = re.DOTALL 

491 

492 if op: 

493 if m.group('value').startswith(('"', "'")): 

494 value = css_unescape(m.group('value')[1:-1], True) 

495 else: 

496 value = css_unescape(m.group('value')) 

497 

498 if not op: 

499 # Attribute name 

500 pattern = None 

501 elif op.startswith('^'): 

502 # Value start with 

503 pattern = re.compile(r'^%s.*' % re.escape(value), flags) 

504 elif op.startswith('$'): 

505 # Value ends with 

506 pattern = re.compile(r'.*?%s$' % re.escape(value), flags) 

507 elif op.startswith('*'): 

508 # Value contains 

509 pattern = re.compile(r'.*?%s.*' % re.escape(value), flags) 

510 elif op.startswith('~'): 

511 # Value contains word within space separated list 

512 # `~=` should match nothing if it is empty or contains whitespace, 

513 # so if either of these cases is present, use `[^\s\S]` which cannot be matched. 

514 value = r'[^\s\S]' if not value or RE_WS.search(value) else re.escape(value) 

515 pattern = re.compile(r'.*?(?:(?<=^)|(?<=[ \t\r\n\f]))%s(?=(?:[ \t\r\n\f]|$)).*' % value, flags) 

516 elif op.startswith('|'): 

517 # Value starts with word in dash separated list 

518 pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags) 

519 else: 

520 # Value matches 

521 pattern = re.compile(r'^%s$' % re.escape(value), flags) 

522 if op.startswith('!'): 

523 # Equivalent to `:not([attr=value])` 

524 inverse = True 

525 if is_type and pattern: 

526 pattern2 = re.compile(pattern.pattern) 

527 

528 # Append the attribute selector 

529 sel_attr = ct.SelectorAttribute(attr, ns, pattern, pattern2) 

530 if inverse: 

531 # If we are using `!=`, we need to nest the pattern under a `:not()`. 

532 sub_sel = _Selector() 

533 sub_sel.attributes.append(sel_attr) 

534 not_list = ct.SelectorList([sub_sel.freeze()], True, False) 

535 sel.selectors.append(not_list) 

536 else: 

537 sel.attributes.append(sel_attr) 

538 

539 has_selector = True 

540 return has_selector 

541 

542 def parse_tag_pattern(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

543 """Parse tag pattern from regex match.""" 

544 

545 prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None 

546 tag = css_unescape(m.group('tag_name')) 

547 sel.tag = ct.SelectorTag(tag, prefix) 

548 has_selector = True 

549 return has_selector 

550 

551 def parse_pseudo_class_custom(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

552 """ 

553 Parse custom pseudo class alias. 

554 

555 Compile custom selectors as we need them. When compiling a custom selector, 

556 set it to `None` in the dictionary so we can avoid an infinite loop. 

557 """ 

558 

559 pseudo = util.lower(css_unescape(m.group('name'))) 

560 selector = self.custom.get(pseudo) 

561 if selector is None: 

562 raise SelectorSyntaxError( 

563 f"Undefined custom selector '{pseudo}' found at position {m.end(0)}", 

564 self.pattern, 

565 m.end(0) 

566 ) 

567 

568 if not isinstance(selector, ct.SelectorList): 

569 del self.custom[pseudo] 

570 selector = CSSParser( 

571 selector, custom=self.custom, flags=self.flags 

572 ).process_selectors(flags=FLG_PSEUDO) 

573 self.custom[pseudo] = selector 

574 

575 sel.selectors.append(selector) 

576 has_selector = True 

577 return has_selector 

578 

579 def parse_pseudo_class( 

580 self, 

581 sel: _Selector, 

582 m: Match[str], 

583 has_selector: bool, 

584 iselector: Iterator[tuple[str, Match[str]]], 

585 is_html: bool 

586 ) -> tuple[bool, bool]: 

587 """Parse pseudo class.""" 

588 

589 complex_pseudo = False 

590 pseudo = util.lower(css_unescape(m.group('name'))) 

591 if m.group('open'): 

592 complex_pseudo = True 

593 if complex_pseudo and pseudo in PSEUDO_COMPLEX: 

594 has_selector = self.parse_pseudo_open(sel, pseudo, has_selector, iselector, m.end(0)) 

595 elif not complex_pseudo and pseudo in PSEUDO_SIMPLE: 

596 if pseudo == ':root': 

597 sel.flags |= ct.SEL_ROOT 

598 elif pseudo == ':defined': 

599 sel.flags |= ct.SEL_DEFINED 

600 is_html = True 

601 elif pseudo == ':scope': 

602 sel.flags |= ct.SEL_SCOPE 

603 elif pseudo == ':empty': 

604 sel.flags |= ct.SEL_EMPTY 

605 elif pseudo in (':link', ':any-link'): 

606 sel.selectors.append(CSS_LINK) 

607 elif pseudo == ':checked': 

608 sel.selectors.append(CSS_CHECKED) 

609 elif pseudo == ':default': 

610 sel.selectors.append(CSS_DEFAULT) 

611 elif pseudo == ':indeterminate': 

612 sel.selectors.append(CSS_INDETERMINATE) 

613 elif pseudo == ":disabled": 

614 sel.selectors.append(CSS_DISABLED) 

615 elif pseudo == ":enabled": 

616 sel.selectors.append(CSS_ENABLED) 

617 elif pseudo == ":required": 

618 sel.selectors.append(CSS_REQUIRED) 

619 elif pseudo == ":muted": 

620 sel.selectors.append(CSS_MUTED) 

621 elif pseudo == ":open": 

622 sel.selectors.append(CSS_OPEN) 

623 elif pseudo == ":optional": 

624 sel.selectors.append(CSS_OPTIONAL) 

625 elif pseudo == ":read-only": 

626 sel.selectors.append(CSS_READ_ONLY) 

627 elif pseudo == ":read-write": 

628 sel.selectors.append(CSS_READ_WRITE) 

629 elif pseudo == ":in-range": 

630 sel.selectors.append(CSS_IN_RANGE) 

631 elif pseudo == ":out-of-range": 

632 sel.selectors.append(CSS_OUT_OF_RANGE) 

633 elif pseudo == ":placeholder-shown": 

634 sel.selectors.append(CSS_PLACEHOLDER_SHOWN) 

635 elif pseudo == ':first-child': 

636 sel.nth.append(ct.SelectorNth(1, False, 0, False, False, ct.SelectorList())) 

637 elif pseudo == ':last-child': 

638 sel.nth.append(ct.SelectorNth(1, False, 0, False, True, ct.SelectorList())) 

639 elif pseudo == ':first-of-type': 

640 sel.nth.append(ct.SelectorNth(1, False, 0, True, False, ct.SelectorList())) 

641 elif pseudo == ':last-of-type': 

642 sel.nth.append(ct.SelectorNth(1, False, 0, True, True, ct.SelectorList())) 

643 elif pseudo == ':only-child': 

644 sel.nth.extend( 

645 [ 

646 ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()), 

647 ct.SelectorNth(1, False, 0, False, True, ct.SelectorList()) 

648 ] 

649 ) 

650 elif pseudo == ':only-of-type': 

651 sel.nth.extend( 

652 [ 

653 ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()), 

654 ct.SelectorNth(1, False, 0, True, True, ct.SelectorList()) 

655 ] 

656 ) 

657 has_selector = True 

658 elif complex_pseudo and pseudo in PSEUDO_COMPLEX_NO_MATCH: 

659 self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN) 

660 sel.no_match = True 

661 has_selector = True 

662 elif not complex_pseudo and pseudo in PSEUDO_SIMPLE_NO_MATCH: 

663 sel.no_match = True 

664 has_selector = True 

665 elif pseudo in PSEUDO_SUPPORTED: 

666 raise SelectorSyntaxError( 

667 f"Invalid syntax for pseudo class '{pseudo}'", 

668 self.pattern, 

669 m.start(0) 

670 ) 

671 else: 

672 raise SelectorSyntaxError( 

673 f"'{pseudo}' was detected as a pseudo-class and is either unsupported or invalid. " 

674 "If the syntax was not intended to be recognized as a pseudo-class, please escape the colon.", 

675 self.pattern, 

676 m.start(0) 

677 ) 

678 

679 return has_selector, is_html 

680 

681 def parse_pseudo_nth( 

682 self, 

683 sel: _Selector, 

684 m: Match[str], 

685 has_selector: bool, 

686 iselector: Iterator[tuple[str, Match[str]]] 

687 ) -> bool: 

688 """Parse `nth` pseudo.""" 

689 

690 mdict = m.groupdict() 

691 if mdict.get('pseudo_nth_child'): 

692 postfix = '_child' 

693 else: 

694 postfix = '_type' 

695 mdict['name'] = util.lower(css_unescape(mdict['name'])) 

696 content = util.lower(mdict.get('nth' + postfix)) 

697 if content == 'even': 

698 # 2n 

699 s1 = 2 

700 s2 = 0 

701 var = True 

702 elif content == 'odd': 

703 # 2n+1 

704 s1 = 2 

705 s2 = 1 

706 var = True 

707 else: 

708 nth_parts = cast(Match[str], RE_NTH.match(content)) 

709 _s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else '' 

710 a = nth_parts.group('a') 

711 var = a.endswith('n') 

712 if a.startswith('n'): 

713 _s1 += '1' 

714 elif var: 

715 _s1 += a[:-1] 

716 else: 

717 _s1 += a 

718 _s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else '' 

719 if nth_parts.group('b'): 

720 _s2 += nth_parts.group('b') 

721 else: 

722 _s2 = '0' 

723 s1 = int(_s1, 10) 

724 s2 = int(_s2, 10) 

725 

726 pseudo_sel = mdict['name'] 

727 if postfix == '_child': 

728 if m.group('of'): 

729 # Parse the rest of `of S`. 

730 nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN) 

731 else: 

732 # Use default `*|*` for `of S`. 

733 nth_sel = CSS_NTH_OF_S_DEFAULT 

734 if pseudo_sel == ':nth-child': 

735 sel.nth.append(ct.SelectorNth(s1, var, s2, False, False, nth_sel)) 

736 elif pseudo_sel == ':nth-last-child': 

737 sel.nth.append(ct.SelectorNth(s1, var, s2, False, True, nth_sel)) 

738 else: 

739 if pseudo_sel == ':nth-of-type': 

740 sel.nth.append(ct.SelectorNth(s1, var, s2, True, False, ct.SelectorList())) 

741 elif pseudo_sel == ':nth-last-of-type': 

742 sel.nth.append(ct.SelectorNth(s1, var, s2, True, True, ct.SelectorList())) 

743 has_selector = True 

744 return has_selector 

745 

746 def parse_pseudo_open( 

747 self, 

748 sel: _Selector, 

749 name: str, 

750 has_selector: bool, 

751 iselector: Iterator[tuple[str, Match[str]]], 

752 index: int 

753 ) -> bool: 

754 """Parse pseudo with opening bracket.""" 

755 

756 flags = FLG_PSEUDO | FLG_OPEN 

757 if name == ':not': 

758 flags |= FLG_NOT 

759 elif name == ':has': 

760 flags |= FLG_RELATIVE 

761 elif name in (':where', ':is'): 

762 flags |= FLG_FORGIVE 

763 

764 sel.selectors.append(self.parse_selectors(iselector, index, flags)) 

765 has_selector = True 

766 

767 return has_selector 

768 

769 def parse_has_combinator( 

770 self, 

771 sel: _Selector, 

772 m: Match[str], 

773 has_selector: bool, 

774 selectors: list[_Selector], 

775 rel_type: str, 

776 index: int 

777 ) -> tuple[bool, _Selector, str]: 

778 """Parse combinator tokens.""" 

779 

780 combinator = m.group('relation').strip() 

781 if not combinator: 

782 combinator = WS_COMBINATOR 

783 if combinator == COMMA_COMBINATOR: 

784 sel.rel_type = rel_type 

785 selectors[-1].relations.append(sel) 

786 rel_type = ":" + WS_COMBINATOR 

787 selectors.append(_Selector()) 

788 else: 

789 if has_selector: 

790 # End the current selector and associate the leading combinator with this selector. 

791 sel.rel_type = rel_type 

792 selectors[-1].relations.append(sel) 

793 elif rel_type[1:] != WS_COMBINATOR: 

794 # It's impossible to have two whitespace combinators after each other as the patterns 

795 # will gobble up trailing whitespace. It is also impossible to have a whitespace 

796 # combinator after any other kind for the same reason. But we could have 

797 # multiple non-whitespace combinators. So if the current combinator is not a whitespace, 

798 # then we've hit the multiple combinator case, so we should fail. 

799 raise SelectorSyntaxError( 

800 f'The multiple combinators at position {index}', 

801 self.pattern, 

802 index 

803 ) 

804 

805 # Set the leading combinator for the next selector. 

806 rel_type = ':' + combinator 

807 

808 sel = _Selector() 

809 has_selector = False 

810 return has_selector, sel, rel_type 

811 

812 def parse_combinator( 

813 self, 

814 sel: _Selector, 

815 m: Match[str], 

816 has_selector: bool, 

817 selectors: list[_Selector], 

818 relations: list[_Selector], 

819 is_pseudo: bool, 

820 is_forgive: bool, 

821 index: int 

822 ) -> tuple[bool, _Selector]: 

823 """Parse combinator tokens.""" 

824 

825 combinator = m.group('relation').strip() 

826 if not combinator: 

827 combinator = WS_COMBINATOR 

828 if not has_selector: 

829 if not is_forgive or combinator != COMMA_COMBINATOR: 

830 raise SelectorSyntaxError( 

831 f"The combinator '{combinator}' at position {index}, must have a selector before it", 

832 self.pattern, 

833 index 

834 ) 

835 

836 # If we are in a forgiving pseudo class, just make the selector a "no match" 

837 if combinator == COMMA_COMBINATOR: 

838 sel.no_match = True 

839 del relations[:] 

840 selectors.append(sel) 

841 else: 

842 if combinator == COMMA_COMBINATOR: 

843 if not sel.tag and not is_pseudo: 

844 # Implied `*` 

845 sel.tag = ct.SelectorTag('*', None) 

846 sel.relations.extend(relations) 

847 selectors.append(sel) 

848 del relations[:] 

849 else: 

850 sel.relations.extend(relations) 

851 sel.rel_type = combinator 

852 del relations[:] 

853 relations.append(sel) 

854 

855 sel = _Selector() 

856 has_selector = False 

857 

858 return has_selector, sel 

859 

860 def parse_class_id(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

861 """Parse HTML classes and ids.""" 

862 

863 selector = m.group(0) 

864 if selector.startswith('.'): 

865 sel.classes.append(css_unescape(selector[1:])) 

866 else: 

867 sel.ids.append(css_unescape(selector[1:])) 

868 has_selector = True 

869 return has_selector 

870 

871 def parse_pseudo_contains(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

872 """Parse contains.""" 

873 

874 pseudo = util.lower(css_unescape(m.group('name'))) 

875 if pseudo == ":contains": 

876 warnings.warn( # noqa: B028 

877 "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.", 

878 FutureWarning 

879 ) 

880 contains_own = pseudo == ":-soup-contains-own" 

881 values = css_unescape(m.group('values')) 

882 patterns = [] 

883 for token in RE_VALUES.finditer(values): 

884 if token.group('split'): 

885 continue 

886 value = token.group('value') 

887 if value.startswith(("'", '"')): 

888 value = css_unescape(value[1:-1], True) 

889 else: 

890 value = css_unescape(value) 

891 patterns.append(value) 

892 sel.contains.append(ct.SelectorContains(patterns, contains_own)) 

893 has_selector = True 

894 return has_selector 

895 

896 def parse_pseudo_lang(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

897 """Parse pseudo language.""" 

898 

899 values = m.group('values') 

900 patterns = [] 

901 for token in RE_VALUES.finditer(values): 

902 if token.group('split'): 

903 continue 

904 value = token.group('value') 

905 if value.startswith(('"', "'")): 

906 value = css_unescape(value[1:-1], True) 

907 else: 

908 value = css_unescape(value) 

909 

910 patterns.append(value) 

911 

912 sel.lang.append(ct.SelectorLang(patterns)) 

913 has_selector = True 

914 

915 return has_selector 

916 

917 def parse_pseudo_dir(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

918 """Parse pseudo direction.""" 

919 

920 value = ct.SEL_DIR_LTR if util.lower(m.group('dir')) == 'ltr' else ct.SEL_DIR_RTL 

921 sel.flags |= value 

922 has_selector = True 

923 return has_selector 

924 

925 def parse_selectors( 

926 self, 

927 iselector: Iterator[tuple[str, Match[str]]], 

928 index: int = 0, 

929 flags: int = 0 

930 ) -> ct.SelectorList: 

931 """Parse selectors.""" 

932 

933 # Initialize important variables 

934 sel = _Selector() 

935 selectors = [] 

936 has_selector = False 

937 closed = False 

938 relations = [] # type: list[_Selector] 

939 rel_type = ":" + WS_COMBINATOR 

940 

941 # Setup various flags 

942 is_open = bool(flags & FLG_OPEN) 

943 is_pseudo = bool(flags & FLG_PSEUDO) 

944 is_relative = bool(flags & FLG_RELATIVE) 

945 is_not = bool(flags & FLG_NOT) 

946 is_html = bool(flags & FLG_HTML) 

947 is_default = bool(flags & FLG_DEFAULT) 

948 is_indeterminate = bool(flags & FLG_INDETERMINATE) 

949 is_in_range = bool(flags & FLG_IN_RANGE) 

950 is_out_of_range = bool(flags & FLG_OUT_OF_RANGE) 

951 is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN) 

952 is_forgive = bool(flags & FLG_FORGIVE) 

953 

954 # Print out useful debug stuff 

955 if self.debug: # pragma: no cover 

956 if is_pseudo: 

957 print(' is_pseudo: True') 

958 if is_open: 

959 print(' is_open: True') 

960 if is_relative: 

961 print(' is_relative: True') 

962 if is_not: 

963 print(' is_not: True') 

964 if is_html: 

965 print(' is_html: True') 

966 if is_default: 

967 print(' is_default: True') 

968 if is_indeterminate: 

969 print(' is_indeterminate: True') 

970 if is_in_range: 

971 print(' is_in_range: True') 

972 if is_out_of_range: 

973 print(' is_out_of_range: True') 

974 if is_placeholder_shown: 

975 print(' is_placeholder_shown: True') 

976 if is_forgive: 

977 print(' is_forgive: True') 

978 

979 # The algorithm for relative selectors require an initial selector in the selector list 

980 if is_relative: 

981 selectors.append(_Selector()) 

982 

983 try: 

984 while True: 

985 key, m = next(iselector) 

986 

987 # Handle parts 

988 if key == "at_rule": 

989 raise NotImplementedError(f"At-rules found at position {m.start(0)}") 

990 elif key == "amp": 

991 sel.flags |= ct.SEL_SCOPE 

992 has_selector = True 

993 elif key == 'pseudo_class_custom': 

994 has_selector = self.parse_pseudo_class_custom(sel, m, has_selector) 

995 elif key == 'pseudo_class': 

996 has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html) 

997 elif key == 'pseudo_element': 

998 raise NotImplementedError(f"Pseudo-element found at position {m.start(0)}") 

999 elif key == 'pseudo_contains': 

1000 has_selector = self.parse_pseudo_contains(sel, m, has_selector) 

1001 elif key in ('pseudo_nth_type', 'pseudo_nth_child'): 

1002 has_selector = self.parse_pseudo_nth(sel, m, has_selector, iselector) 

1003 elif key == 'pseudo_lang': 

1004 has_selector = self.parse_pseudo_lang(sel, m, has_selector) 

1005 elif key == 'pseudo_dir': 

1006 has_selector = self.parse_pseudo_dir(sel, m, has_selector) 

1007 # Currently only supports HTML 

1008 is_html = True 

1009 elif key == 'pseudo_close': 

1010 if not has_selector: 

1011 if not is_forgive: 

1012 raise SelectorSyntaxError( 

1013 f"Expected a selector at position {m.start(0)}", 

1014 self.pattern, 

1015 m.start(0) 

1016 ) 

1017 sel.no_match = True 

1018 if is_open: 

1019 closed = True 

1020 break 

1021 else: 

1022 raise SelectorSyntaxError( 

1023 f"Unmatched pseudo-class close at position {m.start(0)}", 

1024 self.pattern, 

1025 m.start(0) 

1026 ) 

1027 elif key == 'combine': 

1028 if is_relative: 

1029 has_selector, sel, rel_type = self.parse_has_combinator( 

1030 sel, m, has_selector, selectors, rel_type, index 

1031 ) 

1032 else: 

1033 has_selector, sel = self.parse_combinator( 

1034 sel, m, has_selector, selectors, relations, is_pseudo, is_forgive, index 

1035 ) 

1036 elif key == 'attribute': 

1037 has_selector = self.parse_attribute_selector(sel, m, has_selector) 

1038 elif key == 'tag': 

1039 if has_selector: 

1040 raise SelectorSyntaxError( 

1041 f"Tag name found at position {m.start(0)} instead of at the start", 

1042 self.pattern, 

1043 m.start(0) 

1044 ) 

1045 has_selector = self.parse_tag_pattern(sel, m, has_selector) 

1046 elif key in ('class', 'id'): 

1047 has_selector = self.parse_class_id(sel, m, has_selector) 

1048 

1049 index = m.end(0) 

1050 except StopIteration: 

1051 pass 

1052 

1053 # Handle selectors that are not closed 

1054 if is_open and not closed: 

1055 raise SelectorSyntaxError( 

1056 f"Unclosed pseudo-class at position {index}", 

1057 self.pattern, 

1058 index 

1059 ) 

1060 

1061 # Cleanup completed selector piece 

1062 if has_selector: 

1063 if not sel.tag and not is_pseudo: 

1064 # Implied `*` 

1065 sel.tag = ct.SelectorTag('*', None) 

1066 if is_relative: 

1067 sel.rel_type = rel_type 

1068 selectors[-1].relations.append(sel) 

1069 else: 

1070 sel.relations.extend(relations) 

1071 del relations[:] 

1072 selectors.append(sel) 

1073 

1074 # Forgive empty slots in pseudo-classes that have lists (and are forgiving) 

1075 elif is_forgive and (not selectors or not relations): 

1076 # Handle normal pseudo-classes with empty slots like `:is()` etc. 

1077 sel.no_match = True 

1078 del relations[:] 

1079 selectors.append(sel) 

1080 has_selector = True 

1081 

1082 if not has_selector: 

1083 # We will always need to finish a selector when `:has()` is used as it leads with combining. 

1084 # May apply to others as well. 

1085 raise SelectorSyntaxError( 

1086 f'Expected a selector at position {index}', 

1087 self.pattern, 

1088 index 

1089 ) 

1090 

1091 # Some patterns require additional logic, such as default. We try to make these the 

1092 # last pattern, and append the appropriate flag to that selector which communicates 

1093 # to the matcher what additional logic is required. 

1094 if is_default: 

1095 selectors[-1].flags = ct.SEL_DEFAULT 

1096 if is_indeterminate: 

1097 selectors[-1].flags = ct.SEL_INDETERMINATE 

1098 if is_in_range: 

1099 selectors[-1].flags = ct.SEL_IN_RANGE 

1100 if is_out_of_range: 

1101 selectors[-1].flags = ct.SEL_OUT_OF_RANGE 

1102 if is_placeholder_shown: 

1103 selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN 

1104 

1105 # Return selector list 

1106 return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html) 

1107 

1108 def selector_iter(self, pattern: str) -> Iterator[tuple[str, Match[str]]]: 

1109 """Iterate selector tokens.""" 

1110 

1111 # Ignore whitespace and comments at start and end of pattern 

1112 m = RE_WS_BEGIN.search(pattern) 

1113 index = m.end(0) if m else 0 

1114 m = RE_WS_END.search(pattern) 

1115 end = (m.start(0) - 1) if m else (len(pattern) - 1) 

1116 

1117 if self.debug: # pragma: no cover 

1118 print(f'## PARSING: {pattern!r}') 

1119 while index <= end: 

1120 m = None 

1121 for v in self.css_tokens: 

1122 m = v.match(pattern, index, self.flags) 

1123 if m: 

1124 name = v.get_name() 

1125 if self.debug: # pragma: no cover 

1126 print(f"TOKEN: '{name}' --> {m.group(0)!r} at position {m.start(0)}") 

1127 index = m.end(0) 

1128 yield name, m 

1129 break 

1130 if m is None: 

1131 c = pattern[index] 

1132 # If the character represents the start of one of the known selector types, 

1133 # throw an exception mentioning that the known selector type is in error; 

1134 # otherwise, report the invalid character. 

1135 if c == '[': 

1136 msg = f"Malformed attribute selector at position {index}" 

1137 elif c == '.': 

1138 msg = f"Malformed class selector at position {index}" 

1139 elif c == '#': 

1140 msg = f"Malformed id selector at position {index}" 

1141 elif c == ':': 

1142 msg = f"Malformed pseudo-class selector at position {index}" 

1143 else: 

1144 msg = f"Invalid character {c!r} position {index}" 

1145 raise SelectorSyntaxError(msg, self.pattern, index) 

1146 if self.debug: # pragma: no cover 

1147 print('## END PARSING') 

1148 

1149 def process_selectors(self, index: int = 0, flags: int = 0) -> ct.SelectorList: 

1150 """Process selectors.""" 

1151 

1152 return self.parse_selectors(self.selector_iter(self.pattern), index, flags) 

1153 

1154 

1155# Precompile CSS selector lists for pseudo-classes (additional logic may be required beyond the pattern) 

1156# A few patterns are order dependent as they use patterns previous compiled. 

1157 

1158# CSS pattern for `:link` and `:any-link` 

1159CSS_LINK = CSSParser( 

1160 'html|*:is(a, area)[href]' 

1161).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1162# CSS pattern for `:checked` 

1163CSS_CHECKED = CSSParser( 

1164 ''' 

1165 html|*:is(input[type=checkbox], input[type=radio])[checked], html|option[selected] 

1166 ''' 

1167).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1168# CSS pattern for `:default` (must compile CSS_CHECKED first) 

1169CSS_DEFAULT = CSSParser( 

1170 ''' 

1171 :checked, 

1172 

1173 /* 

1174 This pattern must be at the end. 

1175 Special logic is applied to the last selector. 

1176 */ 

1177 html|form html|*:is(button, input)[type="submit"] 

1178 ''' 

1179).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_DEFAULT) 

1180# CSS pattern for `:indeterminate` 

1181CSS_INDETERMINATE = CSSParser( 

1182 ''' 

1183 html|input[type="checkbox"][indeterminate], 

1184 html|input[type="radio"]:is(:not([name]), [name=""]):not([checked]), 

1185 html|progress:not([value]), 

1186 

1187 /* 

1188 This pattern must be at the end. 

1189 Special logic is applied to the last selector. 

1190 */ 

1191 html|input[type="radio"][name]:not([name='']):not([checked]) 

1192 ''' 

1193).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE) 

1194# CSS pattern for `:disabled` 

1195CSS_DISABLED = CSSParser( 

1196 ''' 

1197 html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled], 

1198 html|optgroup[disabled] > html|option, 

1199 html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset), 

1200 html|fieldset[disabled] > 

1201 html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset) 

1202 ''' 

1203).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1204# CSS pattern for `:enabled` 

1205CSS_ENABLED = CSSParser( 

1206 ''' 

1207 html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled) 

1208 ''' 

1209).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1210# CSS pattern for `:required` 

1211CSS_REQUIRED = CSSParser( 

1212 'html|*:is(input, textarea, select)[required]' 

1213).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1214# CSS pattern for `:optional` 

1215CSS_OPTIONAL = CSSParser( 

1216 'html|*:is(input, textarea, select):not([required])' 

1217).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1218# CSS pattern for `:placeholder-shown` 

1219CSS_PLACEHOLDER_SHOWN = CSSParser( 

1220 ''' 

1221 html|input:is( 

1222 :not([type]), 

1223 [type=""], 

1224 [type=text], 

1225 [type=search], 

1226 [type=url], 

1227 [type=tel], 

1228 [type=email], 

1229 [type=password], 

1230 [type=number] 

1231 )[placeholder]:not([placeholder='']):is(:not([value]), [value=""]), 

1232 html|textarea[placeholder]:not([placeholder='']) 

1233 ''' 

1234).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN) 

1235# CSS pattern default for `:nth-child` "of S" feature 

1236CSS_NTH_OF_S_DEFAULT = CSSParser( 

1237 '*|*' 

1238).process_selectors(flags=FLG_PSEUDO) 

1239# CSS pattern for `:read-write` (CSS_DISABLED must be compiled first) 

1240CSS_READ_WRITE = CSSParser( 

1241 ''' 

1242 html|*:is( 

1243 textarea, 

1244 input:is( 

1245 :not([type]), 

1246 [type=""], 

1247 [type=text], 

1248 [type=search], 

1249 [type=url], 

1250 [type=tel], 

1251 [type=email], 

1252 [type=number], 

1253 [type=password], 

1254 [type=date], 

1255 [type=datetime-local], 

1256 [type=month], 

1257 [type=time], 

1258 [type=week] 

1259 ) 

1260 ):not([readonly], :disabled), 

1261 html|*:is([contenteditable=""], [contenteditable="true" i]) 

1262 ''' 

1263).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1264# CSS pattern for `:read-only` 

1265CSS_READ_ONLY = CSSParser( 

1266 ''' 

1267 html|*:not(:read-write) 

1268 ''' 

1269).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1270# CSS pattern for `:in-range` 

1271CSS_IN_RANGE = CSSParser( 

1272 ''' 

1273 html|input:is( 

1274 [type="date"], 

1275 [type="month"], 

1276 [type="week"], 

1277 [type="time"], 

1278 [type="datetime-local"], 

1279 [type="number"], 

1280 [type="range"] 

1281 ):is( 

1282 [min], 

1283 [max] 

1284 ) 

1285 ''' 

1286).process_selectors(flags=FLG_PSEUDO | FLG_IN_RANGE | FLG_HTML) 

1287# CSS pattern for `:out-of-range` 

1288CSS_OUT_OF_RANGE = CSSParser( 

1289 ''' 

1290 html|input:is( 

1291 [type="date"], 

1292 [type="month"], 

1293 [type="week"], 

1294 [type="time"], 

1295 [type="datetime-local"], 

1296 [type="number"], 

1297 [type="range"] 

1298 ):is( 

1299 [min], 

1300 [max] 

1301 ) 

1302 ''' 

1303).process_selectors(flags=FLG_PSEUDO | FLG_OUT_OF_RANGE | FLG_HTML) 

1304 

1305# CSS pattern for :open 

1306CSS_OPEN = CSSParser( 

1307 ''' 

1308 html|*:is(details, dialog)[open] 

1309 ''' 

1310).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1311 

1312 

1313# CSS pattern for :muted 

1314CSS_MUTED = CSSParser( 

1315 ''' 

1316 html|*:is(video, audio)[muted] 

1317 ''' 

1318).process_selectors(flags=FLG_PSEUDO | FLG_HTML)