Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/soupsieve/css_parser.py: 85%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

638 statements  

1"""CSS selector parser.""" 

2from __future__ import annotations 

3import re 

4from functools import lru_cache 

5from . import util 

6from . import css_match as cm 

7from . import css_types as ct 

8from .util import SelectorSyntaxError 

9import warnings 

10from typing import Match, Any, Iterator, cast 

11 

12UNICODE_REPLACEMENT_CHAR = 0xFFFD 

13 

14SELECTOR_LIMIT = 8192 

15 

16# Simple pseudo classes that take no parameters 

17PSEUDO_SIMPLE = { 

18 ":any-link", 

19 ":empty", 

20 ":first-child", 

21 ":first-of-type", 

22 ":in-range", 

23 ":open", 

24 ":out-of-range", 

25 ":last-child", 

26 ":last-of-type", 

27 ":link", 

28 ":only-child", 

29 ":only-of-type", 

30 ":root", 

31 ':checked', 

32 ':default', 

33 ':disabled', 

34 ':enabled', 

35 ':indeterminate', 

36 ':optional', 

37 ':placeholder-shown', 

38 ':read-only', 

39 ':read-write', 

40 ':required', 

41 ':scope', 

42 ':defined', 

43 ':muted' 

44} 

45 

46# Supported, simple pseudo classes that match nothing in the Soup Sieve environment 

47PSEUDO_SIMPLE_NO_MATCH = { 

48 ':active', 

49 ':autofill', 

50 ':buffering', 

51 ':current', 

52 ':focus', 

53 ':focus-visible', 

54 ':focus-within', 

55 ':fullscreen', 

56 ':future', 

57 ':host', 

58 ':hover', 

59 ':local-link', 

60 ':past', 

61 ':paused', 

62 ':picture-in-picture', 

63 ':playing', 

64 ':popover-open', 

65 ':seeking', 

66 ':stalled', 

67 ':target', 

68 ':target-within', 

69 ':user-invalid', 

70 ':volume-locked', 

71 ':visited' 

72} 

73 

74# Complex pseudo classes that take selector lists 

75PSEUDO_COMPLEX = { 

76 ':contains', 

77 ':-soup-contains', 

78 ':-soup-contains-own', 

79 ':has', 

80 ':is', 

81 ':matches', 

82 ':not', 

83 ':where' 

84} 

85 

86PSEUDO_COMPLEX_NO_MATCH = { 

87 ':current', 

88 ':host', 

89 ':host-context' 

90} 

91 

92# Complex pseudo classes that take very specific parameters and are handled special 

93PSEUDO_SPECIAL = { 

94 ':dir', 

95 ':lang', 

96 ':nth-child', 

97 ':nth-last-child', 

98 ':nth-last-of-type', 

99 ':nth-of-type' 

100} 

101 

102PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSEUDO_COMPLEX_NO_MATCH | PSEUDO_SPECIAL 

103 

104# Sub-patterns parts 

105# Whitespace 

106NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])' 

107WS = fr'(?:[ \t]|{NEWLINE})' 

108# Comments 

109COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' 

110# Whitespace with comments included 

111WSC = fr'(?:{WS}|{COMMENTS})' 

112# CSS escapes 

113CSS_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$))' 

114CSS_STRING_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$|{NEWLINE}))' 

115# CSS Identifier 

116IDENTIFIER = fr''' 

117(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})+|--) 

118(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})*) 

119''' 

120# `nth` content 

121NTH = fr'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){WSC}*(?:[-+]){WSC}*(?:[0-9]+))?' 

122# Value: quoted string or identifier 

123VALUE = fr'''(?:"(?:\\(?:.|{NEWLINE})|[^\\"\r\n\f])*?"|'(?:\\(?:.|{NEWLINE})|[^\\'\r\n\f])*?'|{IDENTIFIER})''' 

124# Attribute value comparison. `!=` is handled special as it is non-standard. 

125ATTR = fr'(?:{WSC}*(?P<cmp>[!~^|*$]?=){WSC}*(?P<value>{VALUE})(?:{WSC}*(?P<case>[is]))?)?{WSC}*' 

126 

127# Selector patterns 

128# IDs (`#id`) 

129PAT_ID = fr'\#{IDENTIFIER}' 

130# Classes (`.class`) 

131PAT_CLASS = fr'\.{IDENTIFIER}' 

132# Prefix:Tag (`prefix|tag`) 

133PAT_TAG = fr'(?P<tag_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<tag_name>{IDENTIFIER}|\*)' 

134# Attributes (`[attr]`, `[attr=value]`, etc.) 

135PAT_ATTR = fr'\[{WSC}*(?P<attr_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<attr_name>{IDENTIFIER}){ATTR}\]' 

136# Pseudo class (`:pseudo-class`, `:pseudo-class(`) 

137PAT_PSEUDO_CLASS = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)?' 

138# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes. 

139PAT_PSEUDO_CLASS_SPECIAL = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)' 

140# Custom pseudo class (`:--custom-pseudo`) 

141PAT_PSEUDO_CLASS_CUSTOM = fr'(?P<name>:(?=--){IDENTIFIER})' 

142# Nesting ampersand selector. Matches `&` 

143PAT_AMP = r'&' 

144# Closing pseudo group (`)`) 

145PAT_PSEUDO_CLOSE = fr'{WSC}*\)' 

146# Pseudo element (`::pseudo-element`) 

147PAT_PSEUDO_ELEMENT = fr':{PAT_PSEUDO_CLASS}' 

148# At rule (`@page`, etc.) (not supported) 

149PAT_AT_RULE = fr'@P{IDENTIFIER}' 

150# Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.) 

151PAT_PSEUDO_NTH_CHILD = fr''' 

152(?P<pseudo_nth_child>{PAT_PSEUDO_CLASS_SPECIAL} 

153(?P<nth_child>{NTH}|even|odd))(?:{WSC}*\)|(?P<of>{COMMENTS}*{WS}{WSC}*of{COMMENTS}*{WS}{WSC}*)) 

154''' 

155# Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.) 

156PAT_PSEUDO_NTH_TYPE = fr''' 

157(?P<pseudo_nth_type>{PAT_PSEUDO_CLASS_SPECIAL} 

158(?P<nth_type>{NTH}|even|odd)){WSC}*\) 

159''' 

160# Pseudo class language (`:lang("*-de", en)`) 

161PAT_PSEUDO_LANG = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)' 

162# Pseudo class direction (`:dir(ltr)`) 

163PAT_PSEUDO_DIR = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<dir>ltr|rtl){WSC}*\)' 

164# Combining characters (`>`, `~`, ` `, `+`, `,`) 

165PAT_COMBINE = fr'{WSC}*?(?P<relation>[,+>~]|{WS}(?![,+>~])){WSC}*' 

166# Extra: Contains (`:contains(text)`) 

167PAT_PSEUDO_CONTAINS = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)' 

168 

169# Regular expressions 

170# CSS escape pattern 

171RE_CSS_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WSC}?)|(\\[^\r\n\f])|(\\$))', re.I) 

172RE_CSS_STR_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WS}?)|(\\[^\r\n\f])|(\\$)|(\\{NEWLINE}))', re.I) 

173# Pattern to break up `nth` specifiers 

174RE_NTH = re.compile(fr'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){WSC}*(?P<s2>[-+]){WSC}*(?P<b>[0-9]+))?', re.I) 

175# Pattern to iterate multiple values. 

176RE_VALUES = re.compile(fr'(?:(?P<value>{VALUE})|(?P<split>{WSC}*,{WSC}*))', re.X) 

177# Whitespace checks 

178RE_WS = re.compile(WS) 

179RE_WS_BEGIN = re.compile(fr'^{WSC}*') 

180RE_WS_END = re.compile(fr'{WSC}*$') 

181RE_CUSTOM = re.compile(fr'^{PAT_PSEUDO_CLASS_CUSTOM}$', re.X) 

182 

183# Constants 

184# List split token 

185COMMA_COMBINATOR = ',' 

186# Relation token for descendant 

187WS_COMBINATOR = " " 

188 

189# Parse flags 

190FLG_PSEUDO = 0x01 

191FLG_NOT = 0x02 

192FLG_RELATIVE = 0x04 

193FLG_DEFAULT = 0x08 

194FLG_HTML = 0x10 

195FLG_INDETERMINATE = 0x20 

196FLG_OPEN = 0x40 

197FLG_IN_RANGE = 0x80 

198FLG_OUT_OF_RANGE = 0x100 

199FLG_PLACEHOLDER_SHOWN = 0x200 

200FLG_FORGIVE = 0x400 

201 

202# Maximum cached patterns to store 

203_MAXCACHE = 500 

204 

205 

206@lru_cache(maxsize=_MAXCACHE) 

207def _cached_css_compile( 

208 pattern: str, 

209 namespaces: ct.Namespaces | None, 

210 custom: ct.CustomSelectors | None, 

211 flags: int 

212) -> cm.SoupSieve: 

213 """Cached CSS compile.""" 

214 

215 custom_selectors = process_custom(custom) 

216 return cm.SoupSieve( 

217 pattern, 

218 CSSParser( 

219 pattern, 

220 custom=custom_selectors, 

221 flags=flags 

222 ).process_selectors(), 

223 namespaces, 

224 custom, 

225 flags 

226 ) 

227 

228 

229def _purge_cache() -> None: 

230 """Purge the cache.""" 

231 

232 _cached_css_compile.cache_clear() 

233 

234 

235def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.SelectorList]: 

236 """Process custom.""" 

237 

238 custom_selectors = {} 

239 if custom is not None: 

240 for key, value in custom.items(): 

241 name = util.lower(key) 

242 if RE_CUSTOM.match(name) is None: 

243 raise SelectorSyntaxError(f"The name '{name}' is not a valid custom pseudo-class name") 

244 if name in custom_selectors: 

245 raise KeyError(f"The custom selector '{name}' has already been registered") 

246 custom_selectors[css_unescape(name)] = value 

247 return custom_selectors 

248 

249 

250def css_unescape(content: str, string: bool = False) -> str: 

251 """ 

252 Unescape CSS value. 

253 

254 Strings allow for spanning the value on multiple strings by escaping a new line. 

255 """ 

256 

257 def replace(m: Match[str]) -> str: 

258 """Replace with the appropriate substitute.""" 

259 

260 if m.group(1): 

261 codepoint = int(m.group(1)[1:], 16) 

262 if codepoint == 0: 

263 codepoint = UNICODE_REPLACEMENT_CHAR 

264 value = chr(codepoint) 

265 elif m.group(2): 

266 value = m.group(2)[1:] 

267 elif m.group(3): 

268 value = '\ufffd' 

269 else: 

270 value = '' 

271 

272 return value 

273 

274 return (RE_CSS_ESC if not string else RE_CSS_STR_ESC).sub(replace, content) 

275 

276 

277def escape(ident: str) -> str: 

278 """Escape identifier.""" 

279 

280 string = [] 

281 length = len(ident) 

282 start_dash = length > 0 and ident[0] == '-' 

283 if length == 1 and start_dash: 

284 # Need to escape identifier that is a single `-` with no other characters 

285 string.append(f'\\{ident}') 

286 else: 

287 for index, c in enumerate(ident): 

288 codepoint = ord(c) 

289 if codepoint == 0x00: 

290 string.append('\ufffd') 

291 elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F: 

292 string.append(f'\\{codepoint:x} ') 

293 elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39): 

294 string.append(f'\\{codepoint:x} ') 

295 elif ( 

296 codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or 

297 (0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A) 

298 ): 

299 string.append(c) 

300 else: 

301 string.append(f'\\{c}') 

302 return ''.join(string) 

303 

304 

305class SelectorPattern: 

306 """Selector pattern.""" 

307 

308 def __init__(self, name: str, pattern: str) -> None: 

309 """Initialize.""" 

310 

311 self.name = name 

312 self.re_pattern = re.compile(pattern, re.I | re.X | re.U) 

313 

314 def get_name(self) -> str: 

315 """Get name.""" 

316 

317 return self.name 

318 

319 def match(self, selector: str, index: int, flags: int) -> Match[str] | None: 

320 """Match the selector.""" 

321 

322 return self.re_pattern.match(selector, index) 

323 

324 

325class SpecialPseudoPattern(SelectorPattern): 

326 """Selector pattern.""" 

327 

328 def __init__(self, patterns: tuple[tuple[str, tuple[str, ...], str, type[SelectorPattern]], ...]) -> None: 

329 """Initialize.""" 

330 

331 self.patterns = {} 

332 for p in patterns: 

333 name = p[0] 

334 pattern = p[3](name, p[2]) 

335 for pseudo in p[1]: 

336 self.patterns[pseudo] = pattern 

337 

338 self.matched_name = None # type: SelectorPattern | None 

339 self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U) 

340 

341 def get_name(self) -> str: 

342 """Get name.""" 

343 

344 return '' if self.matched_name is None else self.matched_name.get_name() 

345 

346 def match(self, selector: str, index: int, flags: int) -> Match[str] | None: 

347 """Match the selector.""" 

348 

349 pseudo = None 

350 m = self.re_pseudo_name.match(selector, index) 

351 if m: 

352 name = util.lower(css_unescape(m.group('name'))) 

353 pattern = self.patterns.get(name) 

354 if pattern: 

355 pseudo = pattern.match(selector, index, flags) 

356 if pseudo: 

357 self.matched_name = pattern 

358 

359 return pseudo 

360 

361 

362class _Selector: 

363 """ 

364 Intermediate selector class. 

365 

366 This stores selector data for a compound selector as we are acquiring them. 

367 Once we are done collecting the data for a compound selector, we freeze 

368 the data in an object that can be pickled and hashed. 

369 """ 

370 

371 def __init__(self, **kwargs: Any) -> None: 

372 """Initialize.""" 

373 

374 self.tag = kwargs.get('tag', None) # type: ct.SelectorTag | None 

375 self.ids = kwargs.get('ids', []) # type: list[str] 

376 self.classes = kwargs.get('classes', []) # type: list[str] 

377 self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute] 

378 self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth] 

379 self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList] 

380 self.relations = kwargs.get('relations', []) # type: list[_Selector] 

381 self.rel_type = kwargs.get('rel_type', None) # type: str | None 

382 self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains] 

383 self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang] 

384 self.flags = kwargs.get('flags', 0) # type: int 

385 self.no_match = kwargs.get('no_match', False) # type: bool 

386 

387 def _freeze_relations(self, relations: list[_Selector]) -> ct.SelectorList: 

388 """Freeze relation.""" 

389 

390 if relations: 

391 sel = relations[0] 

392 sel.relations.extend(relations[1:]) 

393 return ct.SelectorList([sel.freeze()]) 

394 else: 

395 return ct.SelectorList() 

396 

397 def freeze(self) -> ct.Selector | ct.SelectorNull: 

398 """Freeze self.""" 

399 

400 if self.no_match: 

401 return ct.SelectorNull() 

402 else: 

403 return ct.Selector( 

404 self.tag, 

405 tuple(self.ids), 

406 tuple(self.classes), 

407 tuple(self.attributes), 

408 tuple(self.nth), 

409 tuple(self.selectors), 

410 self._freeze_relations(self.relations), 

411 self.rel_type, 

412 tuple(self.contains), 

413 tuple(self.lang), 

414 self.flags 

415 ) 

416 

417 def __str__(self) -> str: # pragma: no cover 

418 """String representation.""" 

419 

420 return ( 

421 f'_Selector(tag={self.tag!r}, ids={self.ids!r}, classes={self.classes!r}, attributes={self.attributes!r}, ' 

422 f'nth={self.nth!r}, selectors={self.selectors!r}, relations={self.relations!r}, ' 

423 f'rel_type={self.rel_type!r}, contains={self.contains!r}, lang={self.lang!r}, flags={self.flags!r}, ' 

424 f'no_match={self.no_match!r})' 

425 ) 

426 

427 __repr__ = __str__ 

428 

429 

430class CSSParser: 

431 """Parse CSS selectors.""" 

432 

433 css_tokens = ( 

434 SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE), 

435 SpecialPseudoPattern( 

436 ( 

437 ( 

438 "pseudo_contains", 

439 (':contains', ':-soup-contains', ':-soup-contains-own'), 

440 PAT_PSEUDO_CONTAINS, 

441 SelectorPattern 

442 ), 

443 ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern), 

444 ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern), 

445 ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern), 

446 ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern) 

447 ) 

448 ), 

449 SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM), 

450 SelectorPattern("pseudo_class", PAT_PSEUDO_CLASS), 

451 SelectorPattern("pseudo_element", PAT_PSEUDO_ELEMENT), 

452 SelectorPattern("amp", PAT_AMP), 

453 SelectorPattern("at_rule", PAT_AT_RULE), 

454 SelectorPattern("id", PAT_ID), 

455 SelectorPattern("class", PAT_CLASS), 

456 SelectorPattern("tag", PAT_TAG), 

457 SelectorPattern("attribute", PAT_ATTR), 

458 SelectorPattern("combine", PAT_COMBINE) 

459 ) 

460 

461 def __init__( 

462 self, 

463 selector: str, 

464 custom: dict[str, str | ct.SelectorList] | None = None, 

465 flags: int = 0 

466 ) -> None: 

467 """Initialize.""" 

468 

469 self.pattern = selector.replace('\x00', '\ufffd') 

470 self.flags = flags 

471 self.debug = self.flags & util.DEBUG 

472 self.custom = {} if custom is None else custom 

473 self.count = 0 

474 

475 def check_count(self) -> None: 

476 """Check the current selector count.""" 

477 

478 if self.count > SELECTOR_LIMIT: 

479 raise ValueError(f'Selector exceeds pseudo-class nesting limit of {SELECTOR_LIMIT}') 

480 

481 def parse_attribute_selector(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

482 """Create attribute selector from the returned regex match.""" 

483 

484 inverse = False 

485 op = m.group('cmp') 

486 case = util.lower(m.group('case')) if m.group('case') else None 

487 ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else '' 

488 attr = css_unescape(m.group('attr_name')) 

489 is_type = False 

490 pattern2 = None 

491 value = '' 

492 

493 if case: 

494 flags = (re.I if case == 'i' else 0) | re.DOTALL 

495 elif util.lower(attr) == 'type': 

496 flags = re.I | re.DOTALL 

497 is_type = True 

498 else: 

499 flags = re.DOTALL 

500 

501 if op: 

502 if m.group('value').startswith(('"', "'")): 

503 value = css_unescape(m.group('value')[1:-1], True) 

504 else: 

505 value = css_unescape(m.group('value')) 

506 

507 if not op: 

508 # Attribute name 

509 pattern = None 

510 elif op.startswith('^'): 

511 # Value start with 

512 pattern = re.compile(r'^%s.*' % re.escape(value), flags) 

513 elif op.startswith('$'): 

514 # Value ends with 

515 pattern = re.compile(r'.*?%s$' % re.escape(value), flags) 

516 elif op.startswith('*'): 

517 # Value contains 

518 pattern = re.compile(r'.*?%s.*' % re.escape(value), flags) 

519 elif op.startswith('~'): 

520 # Value contains word within space separated list 

521 # `~=` should match nothing if it is empty or contains whitespace, 

522 # so if either of these cases is present, use `[^\s\S]` which cannot be matched. 

523 value = r'[^\s\S]' if not value or RE_WS.search(value) else re.escape(value) 

524 pattern = re.compile(r'.*?(?:(?<=^)|(?<=[ \t\r\n\f]))%s(?=(?:[ \t\r\n\f]|$)).*' % value, flags) 

525 elif op.startswith('|'): 

526 # Value starts with word in dash separated list 

527 pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags) 

528 else: 

529 # Value matches 

530 pattern = re.compile(r'^%s$' % re.escape(value), flags) 

531 if op.startswith('!'): 

532 # Equivalent to `:not([attr=value])` 

533 inverse = True 

534 if is_type and pattern: 

535 pattern2 = re.compile(pattern.pattern) 

536 

537 # Append the attribute selector 

538 sel_attr = ct.SelectorAttribute(attr, ns, pattern, pattern2) 

539 if inverse: 

540 # If we are using `!=`, we need to nest the pattern under a `:not()`. 

541 sub_sel = _Selector() 

542 sub_sel.attributes.append(sel_attr) 

543 not_list = ct.SelectorList([sub_sel.freeze()], True, False) 

544 sel.selectors.append(not_list) 

545 else: 

546 sel.attributes.append(sel_attr) 

547 

548 has_selector = True 

549 return has_selector 

550 

551 def parse_tag_pattern(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

552 """Parse tag pattern from regex match.""" 

553 

554 prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None 

555 tag = css_unescape(m.group('tag_name')) 

556 sel.tag = ct.SelectorTag(tag, prefix) 

557 has_selector = True 

558 return has_selector 

559 

560 def parse_pseudo_class_custom(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

561 """ 

562 Parse custom pseudo class alias. 

563 

564 Compile custom selectors as we need them. When compiling a custom selector, 

565 set it to `None` in the dictionary so we can avoid an infinite loop. 

566 """ 

567 

568 pseudo = util.lower(css_unescape(m.group('name'))) 

569 selector = self.custom.get(pseudo) 

570 if selector is None: 

571 raise SelectorSyntaxError( 

572 f"Undefined custom selector '{pseudo}' found at position {m.end(0)}", 

573 self.pattern, 

574 m.end(0) 

575 ) 

576 

577 if not isinstance(selector, ct.SelectorList): 

578 del self.custom[pseudo] 

579 selector = CSSParser( 

580 selector, custom=self.custom, flags=self.flags 

581 ).process_selectors(flags=FLG_PSEUDO) 

582 self.custom[pseudo] = selector 

583 

584 self.count += selector.count 

585 self.check_count() 

586 

587 sel.selectors.append(selector) 

588 has_selector = True 

589 return has_selector 

590 

591 def parse_pseudo_class( 

592 self, 

593 sel: _Selector, 

594 m: Match[str], 

595 has_selector: bool, 

596 iselector: Iterator[tuple[str, Match[str]]], 

597 is_html: bool 

598 ) -> tuple[bool, bool]: 

599 """Parse pseudo class.""" 

600 

601 complex_pseudo = False 

602 pseudo = util.lower(css_unescape(m.group('name'))) 

603 if m.group('open'): 

604 complex_pseudo = True 

605 if complex_pseudo and pseudo in PSEUDO_COMPLEX: 

606 has_selector = self.parse_pseudo_open(sel, pseudo, has_selector, iselector, m.end(0)) 

607 elif not complex_pseudo and pseudo in PSEUDO_SIMPLE: 

608 if pseudo == ':root': 

609 sel.flags |= ct.SEL_ROOT 

610 elif pseudo == ':defined': 

611 sel.flags |= ct.SEL_DEFINED 

612 is_html = True 

613 elif pseudo == ':scope': 

614 sel.flags |= ct.SEL_SCOPE 

615 elif pseudo == ':empty': 

616 sel.flags |= ct.SEL_EMPTY 

617 elif pseudo in (':link', ':any-link'): 

618 self.count += CSS_LINK.count 

619 self.check_count() 

620 sel.selectors.append(CSS_LINK) 

621 elif pseudo == ':checked': 

622 self.count += CSS_CHECKED.count 

623 self.check_count() 

624 sel.selectors.append(CSS_CHECKED) 

625 elif pseudo == ':default': 

626 self.count += CSS_DEFAULT.count 

627 self.check_count() 

628 sel.selectors.append(CSS_DEFAULT) 

629 elif pseudo == ':indeterminate': 

630 self.count += CSS_INDETERMINATE.count 

631 self.check_count() 

632 sel.selectors.append(CSS_INDETERMINATE) 

633 elif pseudo == ":disabled": 

634 self.count += CSS_DISABLED.count 

635 self.check_count() 

636 sel.selectors.append(CSS_DISABLED) 

637 elif pseudo == ":enabled": 

638 self.count += CSS_ENABLED.count 

639 self.check_count() 

640 sel.selectors.append(CSS_ENABLED) 

641 elif pseudo == ":required": 

642 self.count += CSS_REQUIRED.count 

643 self.check_count() 

644 sel.selectors.append(CSS_REQUIRED) 

645 elif pseudo == ":muted": 

646 self.count += CSS_MUTED.count 

647 self.check_count() 

648 sel.selectors.append(CSS_MUTED) 

649 elif pseudo == ":open": 

650 self.count += CSS_OPEN.count 

651 self.check_count() 

652 sel.selectors.append(CSS_OPEN) 

653 elif pseudo == ":optional": 

654 self.count += CSS_OPTIONAL.count 

655 self.check_count() 

656 sel.selectors.append(CSS_OPTIONAL) 

657 elif pseudo == ":read-only": 

658 self.count += CSS_READ_ONLY.count 

659 self.check_count() 

660 sel.selectors.append(CSS_READ_ONLY) 

661 elif pseudo == ":read-write": 

662 self.count += CSS_READ_WRITE.count 

663 self.check_count() 

664 sel.selectors.append(CSS_READ_WRITE) 

665 elif pseudo == ":in-range": 

666 self.count += CSS_IN_RANGE.count 

667 self.check_count() 

668 sel.selectors.append(CSS_IN_RANGE) 

669 elif pseudo == ":out-of-range": 

670 self.count += CSS_OUT_OF_RANGE.count 

671 self.check_count() 

672 sel.selectors.append(CSS_OUT_OF_RANGE) 

673 elif pseudo == ":placeholder-shown": 

674 self.count += CSS_PLACEHOLDER_SHOWN.count 

675 self.check_count() 

676 sel.selectors.append(CSS_PLACEHOLDER_SHOWN) 

677 elif pseudo == ':first-child': 

678 sel.nth.append(ct.SelectorNth(1, False, 0, False, False, ct.SelectorList())) 

679 elif pseudo == ':last-child': 

680 sel.nth.append(ct.SelectorNth(1, False, 0, False, True, ct.SelectorList())) 

681 elif pseudo == ':first-of-type': 

682 sel.nth.append(ct.SelectorNth(1, False, 0, True, False, ct.SelectorList())) 

683 elif pseudo == ':last-of-type': 

684 sel.nth.append(ct.SelectorNth(1, False, 0, True, True, ct.SelectorList())) 

685 elif pseudo == ':only-child': 

686 sel.nth.extend( 

687 [ 

688 ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()), 

689 ct.SelectorNth(1, False, 0, False, True, ct.SelectorList()) 

690 ] 

691 ) 

692 elif pseudo == ':only-of-type': 

693 sel.nth.extend( 

694 [ 

695 ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()), 

696 ct.SelectorNth(1, False, 0, True, True, ct.SelectorList()) 

697 ] 

698 ) 

699 has_selector = True 

700 elif complex_pseudo and pseudo in PSEUDO_COMPLEX_NO_MATCH: 

701 self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN) 

702 sel.no_match = True 

703 has_selector = True 

704 elif not complex_pseudo and pseudo in PSEUDO_SIMPLE_NO_MATCH: 

705 sel.no_match = True 

706 has_selector = True 

707 elif pseudo in PSEUDO_SUPPORTED: 

708 raise SelectorSyntaxError( 

709 f"Invalid syntax for pseudo class '{pseudo}'", 

710 self.pattern, 

711 m.start(0) 

712 ) 

713 else: 

714 raise SelectorSyntaxError( 

715 f"'{pseudo}' was detected as a pseudo-class and is either unsupported or invalid. " 

716 "If the syntax was not intended to be recognized as a pseudo-class, please escape the colon.", 

717 self.pattern, 

718 m.start(0) 

719 ) 

720 

721 return has_selector, is_html 

722 

723 def parse_pseudo_nth( 

724 self, 

725 sel: _Selector, 

726 m: Match[str], 

727 has_selector: bool, 

728 iselector: Iterator[tuple[str, Match[str]]] 

729 ) -> bool: 

730 """Parse `nth` pseudo.""" 

731 

732 mdict = m.groupdict() 

733 if mdict.get('pseudo_nth_child'): 

734 postfix = '_child' 

735 else: 

736 postfix = '_type' 

737 mdict['name'] = util.lower(css_unescape(mdict['name'])) 

738 content = util.lower(mdict.get('nth' + postfix)) 

739 if content == 'even': 

740 # 2n 

741 s1 = 2 

742 s2 = 0 

743 var = True 

744 elif content == 'odd': 

745 # 2n+1 

746 s1 = 2 

747 s2 = 1 

748 var = True 

749 else: 

750 nth_parts = cast(Match[str], RE_NTH.match(content)) 

751 _s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else '' 

752 a = nth_parts.group('a') 

753 var = a.endswith('n') 

754 if a.startswith('n'): 

755 _s1 += '1' 

756 elif var: 

757 _s1 += a[:-1] 

758 else: 

759 _s1 += a 

760 _s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else '' 

761 if nth_parts.group('b'): 

762 _s2 += nth_parts.group('b') 

763 else: 

764 _s2 = '0' 

765 s1 = int(_s1, 10) 

766 s2 = int(_s2, 10) 

767 

768 pseudo_sel = mdict['name'] 

769 if postfix == '_child': 

770 if m.group('of'): 

771 # Parse the rest of `of S`. 

772 nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN) 

773 else: 

774 # Use default `*|*` for `of S`. 

775 nth_sel = CSS_NTH_OF_S_DEFAULT 

776 self.count += nth_sel.count 

777 self.check_count() 

778 if pseudo_sel == ':nth-child': 

779 sel.nth.append(ct.SelectorNth(s1, var, s2, False, False, nth_sel)) 

780 elif pseudo_sel == ':nth-last-child': 

781 sel.nth.append(ct.SelectorNth(s1, var, s2, False, True, nth_sel)) 

782 else: 

783 if pseudo_sel == ':nth-of-type': 

784 sel.nth.append(ct.SelectorNth(s1, var, s2, True, False, ct.SelectorList())) 

785 elif pseudo_sel == ':nth-last-of-type': 

786 sel.nth.append(ct.SelectorNth(s1, var, s2, True, True, ct.SelectorList())) 

787 has_selector = True 

788 return has_selector 

789 

790 def parse_pseudo_open( 

791 self, 

792 sel: _Selector, 

793 name: str, 

794 has_selector: bool, 

795 iselector: Iterator[tuple[str, Match[str]]], 

796 index: int 

797 ) -> bool: 

798 """Parse pseudo with opening bracket.""" 

799 

800 flags = FLG_PSEUDO | FLG_OPEN 

801 if name == ':not': 

802 flags |= FLG_NOT 

803 elif name == ':has': 

804 flags |= FLG_RELATIVE 

805 elif name in (':where', ':is'): 

806 flags |= FLG_FORGIVE 

807 

808 sel.selectors.append(self.parse_selectors(iselector, index, flags)) 

809 has_selector = True 

810 

811 return has_selector 

812 

813 def parse_has_combinator( 

814 self, 

815 sel: _Selector, 

816 m: Match[str], 

817 has_selector: bool, 

818 selectors: list[_Selector], 

819 rel_type: str, 

820 index: int 

821 ) -> tuple[bool, _Selector, str]: 

822 """Parse combinator tokens.""" 

823 

824 combinator = m.group('relation').strip() 

825 if not combinator: 

826 combinator = WS_COMBINATOR 

827 if combinator == COMMA_COMBINATOR: 

828 sel.rel_type = rel_type 

829 selectors[-1].relations.append(sel) 

830 rel_type = ":" + WS_COMBINATOR 

831 selectors.append(_Selector()) 

832 else: 

833 if has_selector: 

834 # End the current selector and associate the leading combinator with this selector. 

835 sel.rel_type = rel_type 

836 selectors[-1].relations.append(sel) 

837 elif rel_type[1:] != WS_COMBINATOR: 

838 # It's impossible to have two whitespace combinators after each other as the patterns 

839 # will gobble up trailing whitespace. It is also impossible to have a whitespace 

840 # combinator after any other kind for the same reason. But we could have 

841 # multiple non-whitespace combinators. So if the current combinator is not a whitespace, 

842 # then we've hit the multiple combinator case, so we should fail. 

843 raise SelectorSyntaxError( 

844 f'The multiple combinators at position {index}', 

845 self.pattern, 

846 index 

847 ) 

848 

849 # Set the leading combinator for the next selector. 

850 rel_type = ':' + combinator 

851 

852 sel = _Selector() 

853 has_selector = False 

854 return has_selector, sel, rel_type 

855 

856 def parse_combinator( 

857 self, 

858 sel: _Selector, 

859 m: Match[str], 

860 has_selector: bool, 

861 selectors: list[_Selector], 

862 relations: list[_Selector], 

863 is_pseudo: bool, 

864 is_forgive: bool, 

865 index: int 

866 ) -> tuple[bool, _Selector]: 

867 """Parse combinator tokens.""" 

868 

869 combinator = m.group('relation').strip() 

870 if not combinator: 

871 combinator = WS_COMBINATOR 

872 if not has_selector: 

873 if not is_forgive or combinator != COMMA_COMBINATOR: 

874 raise SelectorSyntaxError( 

875 f"The combinator '{combinator}' at position {index}, must have a selector before it", 

876 self.pattern, 

877 index 

878 ) 

879 

880 # If we are in a forgiving pseudo class, just make the selector a "no match" 

881 if combinator == COMMA_COMBINATOR: 

882 sel.no_match = True 

883 del relations[:] 

884 selectors.append(sel) 

885 else: 

886 if combinator == COMMA_COMBINATOR: 

887 if not sel.tag and not is_pseudo: 

888 # Implied `*` 

889 sel.tag = ct.SelectorTag('*', None) 

890 sel.relations.extend(relations) 

891 selectors.append(sel) 

892 del relations[:] 

893 else: 

894 sel.relations.extend(relations) 

895 sel.rel_type = combinator 

896 del relations[:] 

897 relations.append(sel) 

898 

899 sel = _Selector() 

900 has_selector = False 

901 

902 return has_selector, sel 

903 

904 def parse_class_id(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

905 """Parse HTML classes and ids.""" 

906 

907 selector = m.group(0) 

908 if selector.startswith('.'): 

909 sel.classes.append(css_unescape(selector[1:])) 

910 else: 

911 sel.ids.append(css_unescape(selector[1:])) 

912 has_selector = True 

913 return has_selector 

914 

915 def parse_pseudo_contains(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

916 """Parse contains.""" 

917 

918 pseudo = util.lower(css_unescape(m.group('name'))) 

919 if pseudo == ":contains": 

920 warnings.warn( # noqa: B028 

921 "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.", 

922 FutureWarning 

923 ) 

924 contains_own = pseudo == ":-soup-contains-own" 

925 values = css_unescape(m.group('values')) 

926 patterns = [] 

927 for token in RE_VALUES.finditer(values): 

928 if token.group('split'): 

929 continue 

930 value = token.group('value') 

931 if value.startswith(("'", '"')): 

932 value = css_unescape(value[1:-1], True) 

933 else: 

934 value = css_unescape(value) 

935 patterns.append(value) 

936 sel.contains.append(ct.SelectorContains(patterns, contains_own)) 

937 has_selector = True 

938 return has_selector 

939 

940 def parse_pseudo_lang(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

941 """Parse pseudo language.""" 

942 

943 values = m.group('values') 

944 patterns = [] 

945 for token in RE_VALUES.finditer(values): 

946 if token.group('split'): 

947 continue 

948 value = token.group('value') 

949 if value.startswith(('"', "'")): 

950 value = css_unescape(value[1:-1], True) 

951 else: 

952 value = css_unescape(value) 

953 

954 patterns.append(value) 

955 

956 sel.lang.append(ct.SelectorLang(patterns)) 

957 has_selector = True 

958 

959 return has_selector 

960 

961 def parse_pseudo_dir(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

962 """Parse pseudo direction.""" 

963 

964 value = ct.SEL_DIR_LTR if util.lower(m.group('dir')) == 'ltr' else ct.SEL_DIR_RTL 

965 sel.flags |= value 

966 has_selector = True 

967 return has_selector 

968 

969 def parse_selectors( 

970 self, 

971 iselector: Iterator[tuple[str, Match[str]]], 

972 index: int = 0, 

973 flags: int = 0 

974 ) -> ct.SelectorList: 

975 """Parse selectors.""" 

976 

977 # Initialize important variables 

978 sel = _Selector() 

979 selectors = [] 

980 has_selector = False 

981 closed = False 

982 relations = [] # type: list[_Selector] 

983 rel_type = ":" + WS_COMBINATOR 

984 count = self.count 

985 

986 # Setup various flags 

987 is_open = bool(flags & FLG_OPEN) 

988 is_pseudo = bool(flags & FLG_PSEUDO) 

989 is_relative = bool(flags & FLG_RELATIVE) 

990 is_not = bool(flags & FLG_NOT) 

991 is_html = bool(flags & FLG_HTML) 

992 is_default = bool(flags & FLG_DEFAULT) 

993 is_indeterminate = bool(flags & FLG_INDETERMINATE) 

994 is_in_range = bool(flags & FLG_IN_RANGE) 

995 is_out_of_range = bool(flags & FLG_OUT_OF_RANGE) 

996 is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN) 

997 is_forgive = bool(flags & FLG_FORGIVE) 

998 

999 # Print out useful debug stuff 

1000 if self.debug: # pragma: no cover 

1001 if is_pseudo: 

1002 print(' is_pseudo: True') 

1003 if is_open: 

1004 print(' is_open: True') 

1005 if is_relative: 

1006 print(' is_relative: True') 

1007 if is_not: 

1008 print(' is_not: True') 

1009 if is_html: 

1010 print(' is_html: True') 

1011 if is_default: 

1012 print(' is_default: True') 

1013 if is_indeterminate: 

1014 print(' is_indeterminate: True') 

1015 if is_in_range: 

1016 print(' is_in_range: True') 

1017 if is_out_of_range: 

1018 print(' is_out_of_range: True') 

1019 if is_placeholder_shown: 

1020 print(' is_placeholder_shown: True') 

1021 if is_forgive: 

1022 print(' is_forgive: True') 

1023 

1024 # The algorithm for relative selectors require an initial selector in the selector list 

1025 if is_relative: 

1026 selectors.append(_Selector()) 

1027 

1028 try: 

1029 while True: 

1030 key, m = next(iselector) 

1031 

1032 if key not in ('combine', 'pseudo_close'): 

1033 self.count += 1 

1034 self.check_count() 

1035 

1036 # Handle parts 

1037 if key == "at_rule": 

1038 raise NotImplementedError(f"At-rules found at position {m.start(0)}") 

1039 elif key == "amp": 

1040 sel.flags |= ct.SEL_SCOPE 

1041 has_selector = True 

1042 elif key == 'pseudo_class_custom': 

1043 has_selector = self.parse_pseudo_class_custom(sel, m, has_selector) 

1044 elif key == 'pseudo_class': 

1045 has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html) 

1046 elif key == 'pseudo_element': 

1047 raise NotImplementedError(f"Pseudo-element found at position {m.start(0)}") 

1048 elif key == 'pseudo_contains': 

1049 has_selector = self.parse_pseudo_contains(sel, m, has_selector) 

1050 elif key in ('pseudo_nth_type', 'pseudo_nth_child'): 

1051 has_selector = self.parse_pseudo_nth(sel, m, has_selector, iselector) 

1052 elif key == 'pseudo_lang': 

1053 has_selector = self.parse_pseudo_lang(sel, m, has_selector) 

1054 elif key == 'pseudo_dir': 

1055 has_selector = self.parse_pseudo_dir(sel, m, has_selector) 

1056 # Currently only supports HTML 

1057 is_html = True 

1058 elif key == 'pseudo_close': 

1059 if not has_selector: 

1060 if not is_forgive: 

1061 raise SelectorSyntaxError( 

1062 f"Expected a selector at position {m.start(0)}", 

1063 self.pattern, 

1064 m.start(0) 

1065 ) 

1066 sel.no_match = True 

1067 if is_open: 

1068 closed = True 

1069 break 

1070 else: 

1071 raise SelectorSyntaxError( 

1072 f"Unmatched pseudo-class close at position {m.start(0)}", 

1073 self.pattern, 

1074 m.start(0) 

1075 ) 

1076 elif key == 'combine': 

1077 if is_relative: 

1078 has_selector, sel, rel_type = self.parse_has_combinator( 

1079 sel, m, has_selector, selectors, rel_type, index 

1080 ) 

1081 else: 

1082 has_selector, sel = self.parse_combinator( 

1083 sel, m, has_selector, selectors, relations, is_pseudo, is_forgive, index 

1084 ) 

1085 elif key == 'attribute': 

1086 has_selector = self.parse_attribute_selector(sel, m, has_selector) 

1087 elif key == 'tag': 

1088 if has_selector: 

1089 raise SelectorSyntaxError( 

1090 f"Tag name found at position {m.start(0)} instead of at the start", 

1091 self.pattern, 

1092 m.start(0) 

1093 ) 

1094 has_selector = self.parse_tag_pattern(sel, m, has_selector) 

1095 elif key in ('class', 'id'): 

1096 has_selector = self.parse_class_id(sel, m, has_selector) 

1097 

1098 index = m.end(0) 

1099 except StopIteration: 

1100 pass 

1101 

1102 # Handle selectors that are not closed 

1103 if is_open and not closed: 

1104 raise SelectorSyntaxError( 

1105 f"Unclosed pseudo-class at position {index}", 

1106 self.pattern, 

1107 index 

1108 ) 

1109 

1110 # Cleanup completed selector piece 

1111 if has_selector: 

1112 if not sel.tag and not is_pseudo: 

1113 # Implied `*` 

1114 sel.tag = ct.SelectorTag('*', None) 

1115 if is_relative: 

1116 sel.rel_type = rel_type 

1117 selectors[-1].relations.append(sel) 

1118 else: 

1119 sel.relations.extend(relations) 

1120 del relations[:] 

1121 selectors.append(sel) 

1122 

1123 # Forgive empty slots in pseudo-classes that have lists (and are forgiving) 

1124 elif is_forgive and (not selectors or not relations): 

1125 # Handle normal pseudo-classes with empty slots like `:is()` etc. 

1126 sel.no_match = True 

1127 del relations[:] 

1128 selectors.append(sel) 

1129 has_selector = True 

1130 

1131 if not has_selector: 

1132 # We will always need to finish a selector when `:has()` is used as it leads with combining. 

1133 # May apply to others as well. 

1134 raise SelectorSyntaxError( 

1135 f'Expected a selector at position {index}', 

1136 self.pattern, 

1137 index 

1138 ) 

1139 

1140 # Some patterns require additional logic, such as default. We try to make these the 

1141 # last pattern, and append the appropriate flag to that selector which communicates 

1142 # to the matcher what additional logic is required. 

1143 if is_default: 

1144 selectors[-1].flags = ct.SEL_DEFAULT 

1145 if is_indeterminate: 

1146 selectors[-1].flags = ct.SEL_INDETERMINATE 

1147 if is_in_range: 

1148 selectors[-1].flags = ct.SEL_IN_RANGE 

1149 if is_out_of_range: 

1150 selectors[-1].flags = ct.SEL_OUT_OF_RANGE 

1151 if is_placeholder_shown: 

1152 selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN 

1153 

1154 # Return selector list 

1155 return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html, self.count - count) 

1156 

1157 def selector_iter(self, pattern: str) -> Iterator[tuple[str, Match[str]]]: 

1158 """Iterate selector tokens.""" 

1159 

1160 # Ignore whitespace and comments at start and end of pattern 

1161 m = RE_WS_BEGIN.search(pattern) 

1162 index = m.end(0) if m else 0 

1163 m = RE_WS_END.search(pattern) 

1164 end = (m.start(0) - 1) if m else (len(pattern) - 1) 

1165 

1166 if self.debug: # pragma: no cover 

1167 print(f'## PARSING: {pattern!r}') 

1168 while index <= end: 

1169 m = None 

1170 for v in self.css_tokens: 

1171 m = v.match(pattern, index, self.flags) 

1172 if m: 

1173 name = v.get_name() 

1174 if self.debug: # pragma: no cover 

1175 print(f"TOKEN: '{name}' --> {m.group(0)!r} at position {m.start(0)}") 

1176 index = m.end(0) 

1177 yield name, m 

1178 break 

1179 if m is None: 

1180 c = pattern[index] 

1181 # If the character represents the start of one of the known selector types, 

1182 # throw an exception mentioning that the known selector type is in error; 

1183 # otherwise, report the invalid character. 

1184 if c == '[': 

1185 msg = f"Malformed attribute selector at position {index}" 

1186 elif c == '.': 

1187 msg = f"Malformed class selector at position {index}" 

1188 elif c == '#': 

1189 msg = f"Malformed id selector at position {index}" 

1190 elif c == ':': 

1191 msg = f"Malformed pseudo-class selector at position {index}" 

1192 else: 

1193 msg = f"Invalid character {c!r} position {index}" 

1194 raise SelectorSyntaxError(msg, self.pattern, index) 

1195 if self.debug: # pragma: no cover 

1196 print('## END PARSING') 

1197 

1198 def process_selectors(self, index: int = 0, flags: int = 0) -> ct.SelectorList: 

1199 """Process selectors.""" 

1200 

1201 return self.parse_selectors(self.selector_iter(self.pattern), index, flags) 

1202 

1203 

1204# Precompile CSS selector lists for pseudo-classes (additional logic may be required beyond the pattern) 

1205# A few patterns are order dependent as they use patterns previous compiled. 

1206 

1207# CSS pattern for `:link` and `:any-link` 

1208CSS_LINK = CSSParser( 

1209 'html|*:is(a, area)[href]' 

1210).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1211# CSS pattern for `:checked` 

1212CSS_CHECKED = CSSParser( 

1213 ''' 

1214 html|*:is(input[type=checkbox], input[type=radio])[checked], html|option[selected] 

1215 ''' 

1216).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1217# CSS pattern for `:default` (must compile CSS_CHECKED first) 

1218CSS_DEFAULT = CSSParser( 

1219 ''' 

1220 :checked, 

1221 

1222 /* 

1223 This pattern must be at the end. 

1224 Special logic is applied to the last selector. 

1225 */ 

1226 html|form html|*:is(button, input)[type="submit"] 

1227 ''' 

1228).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_DEFAULT) 

1229# CSS pattern for `:indeterminate` 

1230CSS_INDETERMINATE = CSSParser( 

1231 ''' 

1232 html|input[type="checkbox"][indeterminate], 

1233 html|input[type="radio"]:is(:not([name]), [name=""]):not([checked]), 

1234 html|progress:not([value]), 

1235 

1236 /* 

1237 This pattern must be at the end. 

1238 Special logic is applied to the last selector. 

1239 */ 

1240 html|input[type="radio"][name]:not([name='']):not([checked]) 

1241 ''' 

1242).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE) 

1243# CSS pattern for `:disabled` 

1244CSS_DISABLED = CSSParser( 

1245 ''' 

1246 html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled], 

1247 html|optgroup[disabled] > html|option, 

1248 html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset), 

1249 html|fieldset[disabled] > 

1250 html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset) 

1251 ''' 

1252).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1253# CSS pattern for `:enabled` 

1254CSS_ENABLED = CSSParser( 

1255 ''' 

1256 html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled) 

1257 ''' 

1258).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1259# CSS pattern for `:required` 

1260CSS_REQUIRED = CSSParser( 

1261 'html|*:is(input, textarea, select)[required]' 

1262).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1263# CSS pattern for `:optional` 

1264CSS_OPTIONAL = CSSParser( 

1265 'html|*:is(input, textarea, select):not([required])' 

1266).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1267# CSS pattern for `:placeholder-shown` 

1268CSS_PLACEHOLDER_SHOWN = CSSParser( 

1269 ''' 

1270 html|input:is( 

1271 :not([type]), 

1272 [type=""], 

1273 [type=text], 

1274 [type=search], 

1275 [type=url], 

1276 [type=tel], 

1277 [type=email], 

1278 [type=password], 

1279 [type=number] 

1280 )[placeholder]:not([placeholder='']):is(:not([value]), [value=""]), 

1281 html|textarea[placeholder]:not([placeholder='']) 

1282 ''' 

1283).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN) 

1284# CSS pattern default for `:nth-child` "of S" feature 

1285CSS_NTH_OF_S_DEFAULT = CSSParser( 

1286 '*|*' 

1287).process_selectors(flags=FLG_PSEUDO) 

1288# CSS pattern for `:read-write` (CSS_DISABLED must be compiled first) 

1289CSS_READ_WRITE = CSSParser( 

1290 ''' 

1291 html|*:is( 

1292 textarea, 

1293 input:is( 

1294 :not([type]), 

1295 [type=""], 

1296 [type=text], 

1297 [type=search], 

1298 [type=url], 

1299 [type=tel], 

1300 [type=email], 

1301 [type=number], 

1302 [type=password], 

1303 [type=date], 

1304 [type=datetime-local], 

1305 [type=month], 

1306 [type=time], 

1307 [type=week] 

1308 ) 

1309 ):not([readonly], :disabled), 

1310 html|*:is([contenteditable=""], [contenteditable="true" i]) 

1311 ''' 

1312).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1313# CSS pattern for `:read-only` 

1314CSS_READ_ONLY = CSSParser( 

1315 ''' 

1316 html|*:not(:read-write) 

1317 ''' 

1318).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1319# CSS pattern for `:in-range` 

1320CSS_IN_RANGE = CSSParser( 

1321 ''' 

1322 html|input:is( 

1323 [type="date"], 

1324 [type="month"], 

1325 [type="week"], 

1326 [type="time"], 

1327 [type="datetime-local"], 

1328 [type="number"], 

1329 [type="range"] 

1330 ):is( 

1331 [min], 

1332 [max] 

1333 ) 

1334 ''' 

1335).process_selectors(flags=FLG_PSEUDO | FLG_IN_RANGE | FLG_HTML) 

1336# CSS pattern for `:out-of-range` 

1337CSS_OUT_OF_RANGE = CSSParser( 

1338 ''' 

1339 html|input:is( 

1340 [type="date"], 

1341 [type="month"], 

1342 [type="week"], 

1343 [type="time"], 

1344 [type="datetime-local"], 

1345 [type="number"], 

1346 [type="range"] 

1347 ):is( 

1348 [min], 

1349 [max] 

1350 ) 

1351 ''' 

1352).process_selectors(flags=FLG_PSEUDO | FLG_OUT_OF_RANGE | FLG_HTML) 

1353 

1354# CSS pattern for :open 

1355CSS_OPEN = CSSParser( 

1356 ''' 

1357 html|*:is(details, dialog)[open] 

1358 ''' 

1359).process_selectors(flags=FLG_PSEUDO | FLG_HTML) 

1360 

1361 

1362# CSS pattern for :muted 

1363CSS_MUTED = CSSParser( 

1364 ''' 

1365 html|*:is(video, audio)[muted] 

1366 ''' 

1367).process_selectors(flags=FLG_PSEUDO | FLG_HTML)