Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/soupsieve/css_parser.py: 85%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""CSS selector parser."""
2from __future__ import annotations
3import re
4from functools import lru_cache
5from . import util
6from . import css_match as cm
7from . import css_types as ct
8from .util import SelectorSyntaxError
9import warnings
10from typing import Match, Any, Iterator, cast
12UNICODE_REPLACEMENT_CHAR = 0xFFFD
14SELECTOR_LIMIT = 8192
16# Simple pseudo classes that take no parameters
17PSEUDO_SIMPLE = {
18 ":any-link",
19 ":empty",
20 ":first-child",
21 ":first-of-type",
22 ":in-range",
23 ":open",
24 ":out-of-range",
25 ":last-child",
26 ":last-of-type",
27 ":link",
28 ":only-child",
29 ":only-of-type",
30 ":root",
31 ':checked',
32 ':default',
33 ':disabled',
34 ':enabled',
35 ':indeterminate',
36 ':optional',
37 ':placeholder-shown',
38 ':read-only',
39 ':read-write',
40 ':required',
41 ':scope',
42 ':defined',
43 ':muted'
44}
46# Supported, simple pseudo classes that match nothing in the Soup Sieve environment
47PSEUDO_SIMPLE_NO_MATCH = {
48 ':active',
49 ':autofill',
50 ':buffering',
51 ':current',
52 ':focus',
53 ':focus-visible',
54 ':focus-within',
55 ':fullscreen',
56 ':future',
57 ':host',
58 ':hover',
59 ':local-link',
60 ':past',
61 ':paused',
62 ':picture-in-picture',
63 ':playing',
64 ':popover-open',
65 ':seeking',
66 ':stalled',
67 ':target',
68 ':target-within',
69 ':user-invalid',
70 ':volume-locked',
71 ':visited'
72}
74# Complex pseudo classes that take selector lists
75PSEUDO_COMPLEX = {
76 ':contains',
77 ':-soup-contains',
78 ':-soup-contains-own',
79 ':has',
80 ':is',
81 ':matches',
82 ':not',
83 ':where'
84}
86PSEUDO_COMPLEX_NO_MATCH = {
87 ':current',
88 ':host',
89 ':host-context'
90}
92# Complex pseudo classes that take very specific parameters and are handled special
93PSEUDO_SPECIAL = {
94 ':dir',
95 ':lang',
96 ':nth-child',
97 ':nth-last-child',
98 ':nth-last-of-type',
99 ':nth-of-type'
100}
102PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSEUDO_COMPLEX_NO_MATCH | PSEUDO_SPECIAL
104# Sub-patterns parts
105# Whitespace
106NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])'
107WS = fr'(?:[ \t]|{NEWLINE})'
108# Comments
109COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
110# Whitespace with comments included
111WSC = fr'(?:{WS}|{COMMENTS})'
112# CSS escapes
113CSS_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$))'
114CSS_STRING_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$|{NEWLINE}))'
115# CSS Identifier
116IDENTIFIER = fr'''
117(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})+|--)
118(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})*)
119'''
120# `nth` content
121NTH = fr'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){WSC}*(?:[-+]){WSC}*(?:[0-9]+))?'
122# Value: quoted string or identifier
123VALUE = fr'''(?:"(?:\\(?:.|{NEWLINE})|[^\\"\r\n\f])*?"|'(?:\\(?:.|{NEWLINE})|[^\\'\r\n\f])*?'|{IDENTIFIER})'''
124# Attribute value comparison. `!=` is handled special as it is non-standard.
125ATTR = fr'(?:{WSC}*(?P<cmp>[!~^|*$]?=){WSC}*(?P<value>{VALUE})(?:{WSC}*(?P<case>[is]))?)?{WSC}*'
127# Selector patterns
128# IDs (`#id`)
129PAT_ID = fr'\#{IDENTIFIER}'
130# Classes (`.class`)
131PAT_CLASS = fr'\.{IDENTIFIER}'
132# Prefix:Tag (`prefix|tag`)
133PAT_TAG = fr'(?P<tag_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<tag_name>{IDENTIFIER}|\*)'
134# Attributes (`[attr]`, `[attr=value]`, etc.)
135PAT_ATTR = fr'\[{WSC}*(?P<attr_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<attr_name>{IDENTIFIER}){ATTR}\]'
136# Pseudo class (`:pseudo-class`, `:pseudo-class(`)
137PAT_PSEUDO_CLASS = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)?'
138# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
139PAT_PSEUDO_CLASS_SPECIAL = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)'
140# Custom pseudo class (`:--custom-pseudo`)
141PAT_PSEUDO_CLASS_CUSTOM = fr'(?P<name>:(?=--){IDENTIFIER})'
142# Nesting ampersand selector. Matches `&`
143PAT_AMP = r'&'
144# Closing pseudo group (`)`)
145PAT_PSEUDO_CLOSE = fr'{WSC}*\)'
146# Pseudo element (`::pseudo-element`)
147PAT_PSEUDO_ELEMENT = fr':{PAT_PSEUDO_CLASS}'
148# At rule (`@page`, etc.) (not supported)
149PAT_AT_RULE = fr'@P{IDENTIFIER}'
150# Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.)
151PAT_PSEUDO_NTH_CHILD = fr'''
152(?P<pseudo_nth_child>{PAT_PSEUDO_CLASS_SPECIAL}
153(?P<nth_child>{NTH}|even|odd))(?:{WSC}*\)|(?P<of>{COMMENTS}*{WS}{WSC}*of{COMMENTS}*{WS}{WSC}*))
154'''
155# Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.)
156PAT_PSEUDO_NTH_TYPE = fr'''
157(?P<pseudo_nth_type>{PAT_PSEUDO_CLASS_SPECIAL}
158(?P<nth_type>{NTH}|even|odd)){WSC}*\)
159'''
160# Pseudo class language (`:lang("*-de", en)`)
161PAT_PSEUDO_LANG = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)'
162# Pseudo class direction (`:dir(ltr)`)
163PAT_PSEUDO_DIR = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<dir>ltr|rtl){WSC}*\)'
164# Combining characters (`>`, `~`, ` `, `+`, `,`)
165PAT_COMBINE = fr'{WSC}*?(?P<relation>[,+>~]|{WS}(?![,+>~])){WSC}*'
166# Extra: Contains (`:contains(text)`)
167PAT_PSEUDO_CONTAINS = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)'
169# Regular expressions
170# CSS escape pattern
171RE_CSS_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WSC}?)|(\\[^\r\n\f])|(\\$))', re.I)
172RE_CSS_STR_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WS}?)|(\\[^\r\n\f])|(\\$)|(\\{NEWLINE}))', re.I)
173# Pattern to break up `nth` specifiers
174RE_NTH = re.compile(fr'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){WSC}*(?P<s2>[-+]){WSC}*(?P<b>[0-9]+))?', re.I)
175# Pattern to iterate multiple values.
176RE_VALUES = re.compile(fr'(?:(?P<value>{VALUE})|(?P<split>{WSC}*,{WSC}*))', re.X)
177# Whitespace checks
178RE_WS = re.compile(WS)
179RE_WS_BEGIN = re.compile(fr'^{WSC}*')
180RE_WS_END = re.compile(fr'{WSC}*$')
181RE_CUSTOM = re.compile(fr'^{PAT_PSEUDO_CLASS_CUSTOM}$', re.X)
183# Constants
184# List split token
185COMMA_COMBINATOR = ','
186# Relation token for descendant
187WS_COMBINATOR = " "
189# Parse flags
190FLG_PSEUDO = 0x01
191FLG_NOT = 0x02
192FLG_RELATIVE = 0x04
193FLG_DEFAULT = 0x08
194FLG_HTML = 0x10
195FLG_INDETERMINATE = 0x20
196FLG_OPEN = 0x40
197FLG_IN_RANGE = 0x80
198FLG_OUT_OF_RANGE = 0x100
199FLG_PLACEHOLDER_SHOWN = 0x200
200FLG_FORGIVE = 0x400
202# Maximum cached patterns to store
203_MAXCACHE = 500
206@lru_cache(maxsize=_MAXCACHE)
207def _cached_css_compile(
208 pattern: str,
209 namespaces: ct.Namespaces | None,
210 custom: ct.CustomSelectors | None,
211 flags: int
212) -> cm.SoupSieve:
213 """Cached CSS compile."""
215 custom_selectors = process_custom(custom)
216 return cm.SoupSieve(
217 pattern,
218 CSSParser(
219 pattern,
220 custom=custom_selectors,
221 flags=flags
222 ).process_selectors(),
223 namespaces,
224 custom,
225 flags
226 )
229def _purge_cache() -> None:
230 """Purge the cache."""
232 _cached_css_compile.cache_clear()
235def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.SelectorList]:
236 """Process custom."""
238 custom_selectors = {}
239 if custom is not None:
240 for key, value in custom.items():
241 name = util.lower(key)
242 if RE_CUSTOM.match(name) is None:
243 raise SelectorSyntaxError(f"The name '{name}' is not a valid custom pseudo-class name")
244 if name in custom_selectors:
245 raise KeyError(f"The custom selector '{name}' has already been registered")
246 custom_selectors[css_unescape(name)] = value
247 return custom_selectors
250def css_unescape(content: str, string: bool = False) -> str:
251 """
252 Unescape CSS value.
254 Strings allow for spanning the value on multiple strings by escaping a new line.
255 """
257 def replace(m: Match[str]) -> str:
258 """Replace with the appropriate substitute."""
260 if m.group(1):
261 codepoint = int(m.group(1)[1:], 16)
262 if codepoint == 0:
263 codepoint = UNICODE_REPLACEMENT_CHAR
264 value = chr(codepoint)
265 elif m.group(2):
266 value = m.group(2)[1:]
267 elif m.group(3):
268 value = '\ufffd'
269 else:
270 value = ''
272 return value
274 return (RE_CSS_ESC if not string else RE_CSS_STR_ESC).sub(replace, content)
277def escape(ident: str) -> str:
278 """Escape identifier."""
280 string = []
281 length = len(ident)
282 start_dash = length > 0 and ident[0] == '-'
283 if length == 1 and start_dash:
284 # Need to escape identifier that is a single `-` with no other characters
285 string.append(f'\\{ident}')
286 else:
287 for index, c in enumerate(ident):
288 codepoint = ord(c)
289 if codepoint == 0x00:
290 string.append('\ufffd')
291 elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
292 string.append(f'\\{codepoint:x} ')
293 elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39):
294 string.append(f'\\{codepoint:x} ')
295 elif (
296 codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or
297 (0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A)
298 ):
299 string.append(c)
300 else:
301 string.append(f'\\{c}')
302 return ''.join(string)
305class SelectorPattern:
306 """Selector pattern."""
308 def __init__(self, name: str, pattern: str) -> None:
309 """Initialize."""
311 self.name = name
312 self.re_pattern = re.compile(pattern, re.I | re.X | re.U)
314 def get_name(self) -> str:
315 """Get name."""
317 return self.name
319 def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
320 """Match the selector."""
322 return self.re_pattern.match(selector, index)
325class SpecialPseudoPattern(SelectorPattern):
326 """Selector pattern."""
328 def __init__(self, patterns: tuple[tuple[str, tuple[str, ...], str, type[SelectorPattern]], ...]) -> None:
329 """Initialize."""
331 self.patterns = {}
332 for p in patterns:
333 name = p[0]
334 pattern = p[3](name, p[2])
335 for pseudo in p[1]:
336 self.patterns[pseudo] = pattern
338 self.matched_name = None # type: SelectorPattern | None
339 self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)
341 def get_name(self) -> str:
342 """Get name."""
344 return '' if self.matched_name is None else self.matched_name.get_name()
346 def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
347 """Match the selector."""
349 pseudo = None
350 m = self.re_pseudo_name.match(selector, index)
351 if m:
352 name = util.lower(css_unescape(m.group('name')))
353 pattern = self.patterns.get(name)
354 if pattern:
355 pseudo = pattern.match(selector, index, flags)
356 if pseudo:
357 self.matched_name = pattern
359 return pseudo
362class _Selector:
363 """
364 Intermediate selector class.
366 This stores selector data for a compound selector as we are acquiring them.
367 Once we are done collecting the data for a compound selector, we freeze
368 the data in an object that can be pickled and hashed.
369 """
371 def __init__(self, **kwargs: Any) -> None:
372 """Initialize."""
374 self.tag = kwargs.get('tag', None) # type: ct.SelectorTag | None
375 self.ids = kwargs.get('ids', []) # type: list[str]
376 self.classes = kwargs.get('classes', []) # type: list[str]
377 self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute]
378 self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth]
379 self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList]
380 self.relations = kwargs.get('relations', []) # type: list[_Selector]
381 self.rel_type = kwargs.get('rel_type', None) # type: str | None
382 self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains]
383 self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang]
384 self.flags = kwargs.get('flags', 0) # type: int
385 self.no_match = kwargs.get('no_match', False) # type: bool
387 def _freeze_relations(self, relations: list[_Selector]) -> ct.SelectorList:
388 """Freeze relation."""
390 if relations:
391 sel = relations[0]
392 sel.relations.extend(relations[1:])
393 return ct.SelectorList([sel.freeze()])
394 else:
395 return ct.SelectorList()
397 def freeze(self) -> ct.Selector | ct.SelectorNull:
398 """Freeze self."""
400 if self.no_match:
401 return ct.SelectorNull()
402 else:
403 return ct.Selector(
404 self.tag,
405 tuple(self.ids),
406 tuple(self.classes),
407 tuple(self.attributes),
408 tuple(self.nth),
409 tuple(self.selectors),
410 self._freeze_relations(self.relations),
411 self.rel_type,
412 tuple(self.contains),
413 tuple(self.lang),
414 self.flags
415 )
417 def __str__(self) -> str: # pragma: no cover
418 """String representation."""
420 return (
421 f'_Selector(tag={self.tag!r}, ids={self.ids!r}, classes={self.classes!r}, attributes={self.attributes!r}, '
422 f'nth={self.nth!r}, selectors={self.selectors!r}, relations={self.relations!r}, '
423 f'rel_type={self.rel_type!r}, contains={self.contains!r}, lang={self.lang!r}, flags={self.flags!r}, '
424 f'no_match={self.no_match!r})'
425 )
427 __repr__ = __str__
430class CSSParser:
431 """Parse CSS selectors."""
433 css_tokens = (
434 SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
435 SpecialPseudoPattern(
436 (
437 (
438 "pseudo_contains",
439 (':contains', ':-soup-contains', ':-soup-contains-own'),
440 PAT_PSEUDO_CONTAINS,
441 SelectorPattern
442 ),
443 ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern),
444 ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern),
445 ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern),
446 ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern)
447 )
448 ),
449 SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM),
450 SelectorPattern("pseudo_class", PAT_PSEUDO_CLASS),
451 SelectorPattern("pseudo_element", PAT_PSEUDO_ELEMENT),
452 SelectorPattern("amp", PAT_AMP),
453 SelectorPattern("at_rule", PAT_AT_RULE),
454 SelectorPattern("id", PAT_ID),
455 SelectorPattern("class", PAT_CLASS),
456 SelectorPattern("tag", PAT_TAG),
457 SelectorPattern("attribute", PAT_ATTR),
458 SelectorPattern("combine", PAT_COMBINE)
459 )
461 def __init__(
462 self,
463 selector: str,
464 custom: dict[str, str | ct.SelectorList] | None = None,
465 flags: int = 0
466 ) -> None:
467 """Initialize."""
469 self.pattern = selector.replace('\x00', '\ufffd')
470 self.flags = flags
471 self.debug = self.flags & util.DEBUG
472 self.custom = {} if custom is None else custom
473 self.count = 0
475 def check_count(self) -> None:
476 """Check the current selector count."""
478 if self.count > SELECTOR_LIMIT:
479 raise ValueError(f'Selector exceeds pseudo-class nesting limit of {SELECTOR_LIMIT}')
481 def parse_attribute_selector(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
482 """Create attribute selector from the returned regex match."""
484 inverse = False
485 op = m.group('cmp')
486 case = util.lower(m.group('case')) if m.group('case') else None
487 ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else ''
488 attr = css_unescape(m.group('attr_name'))
489 is_type = False
490 pattern2 = None
491 value = ''
493 if case:
494 flags = (re.I if case == 'i' else 0) | re.DOTALL
495 elif util.lower(attr) == 'type':
496 flags = re.I | re.DOTALL
497 is_type = True
498 else:
499 flags = re.DOTALL
501 if op:
502 if m.group('value').startswith(('"', "'")):
503 value = css_unescape(m.group('value')[1:-1], True)
504 else:
505 value = css_unescape(m.group('value'))
507 if not op:
508 # Attribute name
509 pattern = None
510 elif op.startswith('^'):
511 # Value start with
512 pattern = re.compile(r'^%s.*' % re.escape(value), flags)
513 elif op.startswith('$'):
514 # Value ends with
515 pattern = re.compile(r'.*?%s$' % re.escape(value), flags)
516 elif op.startswith('*'):
517 # Value contains
518 pattern = re.compile(r'.*?%s.*' % re.escape(value), flags)
519 elif op.startswith('~'):
520 # Value contains word within space separated list
521 # `~=` should match nothing if it is empty or contains whitespace,
522 # so if either of these cases is present, use `[^\s\S]` which cannot be matched.
523 value = r'[^\s\S]' if not value or RE_WS.search(value) else re.escape(value)
524 pattern = re.compile(r'.*?(?:(?<=^)|(?<=[ \t\r\n\f]))%s(?=(?:[ \t\r\n\f]|$)).*' % value, flags)
525 elif op.startswith('|'):
526 # Value starts with word in dash separated list
527 pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags)
528 else:
529 # Value matches
530 pattern = re.compile(r'^%s$' % re.escape(value), flags)
531 if op.startswith('!'):
532 # Equivalent to `:not([attr=value])`
533 inverse = True
534 if is_type and pattern:
535 pattern2 = re.compile(pattern.pattern)
537 # Append the attribute selector
538 sel_attr = ct.SelectorAttribute(attr, ns, pattern, pattern2)
539 if inverse:
540 # If we are using `!=`, we need to nest the pattern under a `:not()`.
541 sub_sel = _Selector()
542 sub_sel.attributes.append(sel_attr)
543 not_list = ct.SelectorList([sub_sel.freeze()], True, False)
544 sel.selectors.append(not_list)
545 else:
546 sel.attributes.append(sel_attr)
548 has_selector = True
549 return has_selector
551 def parse_tag_pattern(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
552 """Parse tag pattern from regex match."""
554 prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None
555 tag = css_unescape(m.group('tag_name'))
556 sel.tag = ct.SelectorTag(tag, prefix)
557 has_selector = True
558 return has_selector
560 def parse_pseudo_class_custom(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
561 """
562 Parse custom pseudo class alias.
564 Compile custom selectors as we need them. When compiling a custom selector,
565 set it to `None` in the dictionary so we can avoid an infinite loop.
566 """
568 pseudo = util.lower(css_unescape(m.group('name')))
569 selector = self.custom.get(pseudo)
570 if selector is None:
571 raise SelectorSyntaxError(
572 f"Undefined custom selector '{pseudo}' found at position {m.end(0)}",
573 self.pattern,
574 m.end(0)
575 )
577 if not isinstance(selector, ct.SelectorList):
578 del self.custom[pseudo]
579 selector = CSSParser(
580 selector, custom=self.custom, flags=self.flags
581 ).process_selectors(flags=FLG_PSEUDO)
582 self.custom[pseudo] = selector
584 self.count += selector.count
585 self.check_count()
587 sel.selectors.append(selector)
588 has_selector = True
589 return has_selector
591 def parse_pseudo_class(
592 self,
593 sel: _Selector,
594 m: Match[str],
595 has_selector: bool,
596 iselector: Iterator[tuple[str, Match[str]]],
597 is_html: bool
598 ) -> tuple[bool, bool]:
599 """Parse pseudo class."""
601 complex_pseudo = False
602 pseudo = util.lower(css_unescape(m.group('name')))
603 if m.group('open'):
604 complex_pseudo = True
605 if complex_pseudo and pseudo in PSEUDO_COMPLEX:
606 has_selector = self.parse_pseudo_open(sel, pseudo, has_selector, iselector, m.end(0))
607 elif not complex_pseudo and pseudo in PSEUDO_SIMPLE:
608 if pseudo == ':root':
609 sel.flags |= ct.SEL_ROOT
610 elif pseudo == ':defined':
611 sel.flags |= ct.SEL_DEFINED
612 is_html = True
613 elif pseudo == ':scope':
614 sel.flags |= ct.SEL_SCOPE
615 elif pseudo == ':empty':
616 sel.flags |= ct.SEL_EMPTY
617 elif pseudo in (':link', ':any-link'):
618 self.count += CSS_LINK.count
619 self.check_count()
620 sel.selectors.append(CSS_LINK)
621 elif pseudo == ':checked':
622 self.count += CSS_CHECKED.count
623 self.check_count()
624 sel.selectors.append(CSS_CHECKED)
625 elif pseudo == ':default':
626 self.count += CSS_DEFAULT.count
627 self.check_count()
628 sel.selectors.append(CSS_DEFAULT)
629 elif pseudo == ':indeterminate':
630 self.count += CSS_INDETERMINATE.count
631 self.check_count()
632 sel.selectors.append(CSS_INDETERMINATE)
633 elif pseudo == ":disabled":
634 self.count += CSS_DISABLED.count
635 self.check_count()
636 sel.selectors.append(CSS_DISABLED)
637 elif pseudo == ":enabled":
638 self.count += CSS_ENABLED.count
639 self.check_count()
640 sel.selectors.append(CSS_ENABLED)
641 elif pseudo == ":required":
642 self.count += CSS_REQUIRED.count
643 self.check_count()
644 sel.selectors.append(CSS_REQUIRED)
645 elif pseudo == ":muted":
646 self.count += CSS_MUTED.count
647 self.check_count()
648 sel.selectors.append(CSS_MUTED)
649 elif pseudo == ":open":
650 self.count += CSS_OPEN.count
651 self.check_count()
652 sel.selectors.append(CSS_OPEN)
653 elif pseudo == ":optional":
654 self.count += CSS_OPTIONAL.count
655 self.check_count()
656 sel.selectors.append(CSS_OPTIONAL)
657 elif pseudo == ":read-only":
658 self.count += CSS_READ_ONLY.count
659 self.check_count()
660 sel.selectors.append(CSS_READ_ONLY)
661 elif pseudo == ":read-write":
662 self.count += CSS_READ_WRITE.count
663 self.check_count()
664 sel.selectors.append(CSS_READ_WRITE)
665 elif pseudo == ":in-range":
666 self.count += CSS_IN_RANGE.count
667 self.check_count()
668 sel.selectors.append(CSS_IN_RANGE)
669 elif pseudo == ":out-of-range":
670 self.count += CSS_OUT_OF_RANGE.count
671 self.check_count()
672 sel.selectors.append(CSS_OUT_OF_RANGE)
673 elif pseudo == ":placeholder-shown":
674 self.count += CSS_PLACEHOLDER_SHOWN.count
675 self.check_count()
676 sel.selectors.append(CSS_PLACEHOLDER_SHOWN)
677 elif pseudo == ':first-child':
678 sel.nth.append(ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()))
679 elif pseudo == ':last-child':
680 sel.nth.append(ct.SelectorNth(1, False, 0, False, True, ct.SelectorList()))
681 elif pseudo == ':first-of-type':
682 sel.nth.append(ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()))
683 elif pseudo == ':last-of-type':
684 sel.nth.append(ct.SelectorNth(1, False, 0, True, True, ct.SelectorList()))
685 elif pseudo == ':only-child':
686 sel.nth.extend(
687 [
688 ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()),
689 ct.SelectorNth(1, False, 0, False, True, ct.SelectorList())
690 ]
691 )
692 elif pseudo == ':only-of-type':
693 sel.nth.extend(
694 [
695 ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()),
696 ct.SelectorNth(1, False, 0, True, True, ct.SelectorList())
697 ]
698 )
699 has_selector = True
700 elif complex_pseudo and pseudo in PSEUDO_COMPLEX_NO_MATCH:
701 self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN)
702 sel.no_match = True
703 has_selector = True
704 elif not complex_pseudo and pseudo in PSEUDO_SIMPLE_NO_MATCH:
705 sel.no_match = True
706 has_selector = True
707 elif pseudo in PSEUDO_SUPPORTED:
708 raise SelectorSyntaxError(
709 f"Invalid syntax for pseudo class '{pseudo}'",
710 self.pattern,
711 m.start(0)
712 )
713 else:
714 raise SelectorSyntaxError(
715 f"'{pseudo}' was detected as a pseudo-class and is either unsupported or invalid. "
716 "If the syntax was not intended to be recognized as a pseudo-class, please escape the colon.",
717 self.pattern,
718 m.start(0)
719 )
721 return has_selector, is_html
723 def parse_pseudo_nth(
724 self,
725 sel: _Selector,
726 m: Match[str],
727 has_selector: bool,
728 iselector: Iterator[tuple[str, Match[str]]]
729 ) -> bool:
730 """Parse `nth` pseudo."""
732 mdict = m.groupdict()
733 if mdict.get('pseudo_nth_child'):
734 postfix = '_child'
735 else:
736 postfix = '_type'
737 mdict['name'] = util.lower(css_unescape(mdict['name']))
738 content = util.lower(mdict.get('nth' + postfix))
739 if content == 'even':
740 # 2n
741 s1 = 2
742 s2 = 0
743 var = True
744 elif content == 'odd':
745 # 2n+1
746 s1 = 2
747 s2 = 1
748 var = True
749 else:
750 nth_parts = cast(Match[str], RE_NTH.match(content))
751 _s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else ''
752 a = nth_parts.group('a')
753 var = a.endswith('n')
754 if a.startswith('n'):
755 _s1 += '1'
756 elif var:
757 _s1 += a[:-1]
758 else:
759 _s1 += a
760 _s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else ''
761 if nth_parts.group('b'):
762 _s2 += nth_parts.group('b')
763 else:
764 _s2 = '0'
765 s1 = int(_s1, 10)
766 s2 = int(_s2, 10)
768 pseudo_sel = mdict['name']
769 if postfix == '_child':
770 if m.group('of'):
771 # Parse the rest of `of S`.
772 nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN)
773 else:
774 # Use default `*|*` for `of S`.
775 nth_sel = CSS_NTH_OF_S_DEFAULT
776 self.count += nth_sel.count
777 self.check_count()
778 if pseudo_sel == ':nth-child':
779 sel.nth.append(ct.SelectorNth(s1, var, s2, False, False, nth_sel))
780 elif pseudo_sel == ':nth-last-child':
781 sel.nth.append(ct.SelectorNth(s1, var, s2, False, True, nth_sel))
782 else:
783 if pseudo_sel == ':nth-of-type':
784 sel.nth.append(ct.SelectorNth(s1, var, s2, True, False, ct.SelectorList()))
785 elif pseudo_sel == ':nth-last-of-type':
786 sel.nth.append(ct.SelectorNth(s1, var, s2, True, True, ct.SelectorList()))
787 has_selector = True
788 return has_selector
790 def parse_pseudo_open(
791 self,
792 sel: _Selector,
793 name: str,
794 has_selector: bool,
795 iselector: Iterator[tuple[str, Match[str]]],
796 index: int
797 ) -> bool:
798 """Parse pseudo with opening bracket."""
800 flags = FLG_PSEUDO | FLG_OPEN
801 if name == ':not':
802 flags |= FLG_NOT
803 elif name == ':has':
804 flags |= FLG_RELATIVE
805 elif name in (':where', ':is'):
806 flags |= FLG_FORGIVE
808 sel.selectors.append(self.parse_selectors(iselector, index, flags))
809 has_selector = True
811 return has_selector
813 def parse_has_combinator(
814 self,
815 sel: _Selector,
816 m: Match[str],
817 has_selector: bool,
818 selectors: list[_Selector],
819 rel_type: str,
820 index: int
821 ) -> tuple[bool, _Selector, str]:
822 """Parse combinator tokens."""
824 combinator = m.group('relation').strip()
825 if not combinator:
826 combinator = WS_COMBINATOR
827 if combinator == COMMA_COMBINATOR:
828 sel.rel_type = rel_type
829 selectors[-1].relations.append(sel)
830 rel_type = ":" + WS_COMBINATOR
831 selectors.append(_Selector())
832 else:
833 if has_selector:
834 # End the current selector and associate the leading combinator with this selector.
835 sel.rel_type = rel_type
836 selectors[-1].relations.append(sel)
837 elif rel_type[1:] != WS_COMBINATOR:
838 # It's impossible to have two whitespace combinators after each other as the patterns
839 # will gobble up trailing whitespace. It is also impossible to have a whitespace
840 # combinator after any other kind for the same reason. But we could have
841 # multiple non-whitespace combinators. So if the current combinator is not a whitespace,
842 # then we've hit the multiple combinator case, so we should fail.
843 raise SelectorSyntaxError(
844 f'The multiple combinators at position {index}',
845 self.pattern,
846 index
847 )
849 # Set the leading combinator for the next selector.
850 rel_type = ':' + combinator
852 sel = _Selector()
853 has_selector = False
854 return has_selector, sel, rel_type
856 def parse_combinator(
857 self,
858 sel: _Selector,
859 m: Match[str],
860 has_selector: bool,
861 selectors: list[_Selector],
862 relations: list[_Selector],
863 is_pseudo: bool,
864 is_forgive: bool,
865 index: int
866 ) -> tuple[bool, _Selector]:
867 """Parse combinator tokens."""
869 combinator = m.group('relation').strip()
870 if not combinator:
871 combinator = WS_COMBINATOR
872 if not has_selector:
873 if not is_forgive or combinator != COMMA_COMBINATOR:
874 raise SelectorSyntaxError(
875 f"The combinator '{combinator}' at position {index}, must have a selector before it",
876 self.pattern,
877 index
878 )
880 # If we are in a forgiving pseudo class, just make the selector a "no match"
881 if combinator == COMMA_COMBINATOR:
882 sel.no_match = True
883 del relations[:]
884 selectors.append(sel)
885 else:
886 if combinator == COMMA_COMBINATOR:
887 if not sel.tag and not is_pseudo:
888 # Implied `*`
889 sel.tag = ct.SelectorTag('*', None)
890 sel.relations.extend(relations)
891 selectors.append(sel)
892 del relations[:]
893 else:
894 sel.relations.extend(relations)
895 sel.rel_type = combinator
896 del relations[:]
897 relations.append(sel)
899 sel = _Selector()
900 has_selector = False
902 return has_selector, sel
904 def parse_class_id(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
905 """Parse HTML classes and ids."""
907 selector = m.group(0)
908 if selector.startswith('.'):
909 sel.classes.append(css_unescape(selector[1:]))
910 else:
911 sel.ids.append(css_unescape(selector[1:]))
912 has_selector = True
913 return has_selector
915 def parse_pseudo_contains(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
916 """Parse contains."""
918 pseudo = util.lower(css_unescape(m.group('name')))
919 if pseudo == ":contains":
920 warnings.warn( # noqa: B028
921 "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
922 FutureWarning
923 )
924 contains_own = pseudo == ":-soup-contains-own"
925 values = css_unescape(m.group('values'))
926 patterns = []
927 for token in RE_VALUES.finditer(values):
928 if token.group('split'):
929 continue
930 value = token.group('value')
931 if value.startswith(("'", '"')):
932 value = css_unescape(value[1:-1], True)
933 else:
934 value = css_unescape(value)
935 patterns.append(value)
936 sel.contains.append(ct.SelectorContains(patterns, contains_own))
937 has_selector = True
938 return has_selector
940 def parse_pseudo_lang(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
941 """Parse pseudo language."""
943 values = m.group('values')
944 patterns = []
945 for token in RE_VALUES.finditer(values):
946 if token.group('split'):
947 continue
948 value = token.group('value')
949 if value.startswith(('"', "'")):
950 value = css_unescape(value[1:-1], True)
951 else:
952 value = css_unescape(value)
954 patterns.append(value)
956 sel.lang.append(ct.SelectorLang(patterns))
957 has_selector = True
959 return has_selector
961 def parse_pseudo_dir(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
962 """Parse pseudo direction."""
964 value = ct.SEL_DIR_LTR if util.lower(m.group('dir')) == 'ltr' else ct.SEL_DIR_RTL
965 sel.flags |= value
966 has_selector = True
967 return has_selector
969 def parse_selectors(
970 self,
971 iselector: Iterator[tuple[str, Match[str]]],
972 index: int = 0,
973 flags: int = 0
974 ) -> ct.SelectorList:
975 """Parse selectors."""
977 # Initialize important variables
978 sel = _Selector()
979 selectors = []
980 has_selector = False
981 closed = False
982 relations = [] # type: list[_Selector]
983 rel_type = ":" + WS_COMBINATOR
984 count = self.count
986 # Setup various flags
987 is_open = bool(flags & FLG_OPEN)
988 is_pseudo = bool(flags & FLG_PSEUDO)
989 is_relative = bool(flags & FLG_RELATIVE)
990 is_not = bool(flags & FLG_NOT)
991 is_html = bool(flags & FLG_HTML)
992 is_default = bool(flags & FLG_DEFAULT)
993 is_indeterminate = bool(flags & FLG_INDETERMINATE)
994 is_in_range = bool(flags & FLG_IN_RANGE)
995 is_out_of_range = bool(flags & FLG_OUT_OF_RANGE)
996 is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN)
997 is_forgive = bool(flags & FLG_FORGIVE)
999 # Print out useful debug stuff
1000 if self.debug: # pragma: no cover
1001 if is_pseudo:
1002 print(' is_pseudo: True')
1003 if is_open:
1004 print(' is_open: True')
1005 if is_relative:
1006 print(' is_relative: True')
1007 if is_not:
1008 print(' is_not: True')
1009 if is_html:
1010 print(' is_html: True')
1011 if is_default:
1012 print(' is_default: True')
1013 if is_indeterminate:
1014 print(' is_indeterminate: True')
1015 if is_in_range:
1016 print(' is_in_range: True')
1017 if is_out_of_range:
1018 print(' is_out_of_range: True')
1019 if is_placeholder_shown:
1020 print(' is_placeholder_shown: True')
1021 if is_forgive:
1022 print(' is_forgive: True')
1024 # The algorithm for relative selectors require an initial selector in the selector list
1025 if is_relative:
1026 selectors.append(_Selector())
1028 try:
1029 while True:
1030 key, m = next(iselector)
1032 if key not in ('combine', 'pseudo_close'):
1033 self.count += 1
1034 self.check_count()
1036 # Handle parts
1037 if key == "at_rule":
1038 raise NotImplementedError(f"At-rules found at position {m.start(0)}")
1039 elif key == "amp":
1040 sel.flags |= ct.SEL_SCOPE
1041 has_selector = True
1042 elif key == 'pseudo_class_custom':
1043 has_selector = self.parse_pseudo_class_custom(sel, m, has_selector)
1044 elif key == 'pseudo_class':
1045 has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
1046 elif key == 'pseudo_element':
1047 raise NotImplementedError(f"Pseudo-element found at position {m.start(0)}")
1048 elif key == 'pseudo_contains':
1049 has_selector = self.parse_pseudo_contains(sel, m, has_selector)
1050 elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
1051 has_selector = self.parse_pseudo_nth(sel, m, has_selector, iselector)
1052 elif key == 'pseudo_lang':
1053 has_selector = self.parse_pseudo_lang(sel, m, has_selector)
1054 elif key == 'pseudo_dir':
1055 has_selector = self.parse_pseudo_dir(sel, m, has_selector)
1056 # Currently only supports HTML
1057 is_html = True
1058 elif key == 'pseudo_close':
1059 if not has_selector:
1060 if not is_forgive:
1061 raise SelectorSyntaxError(
1062 f"Expected a selector at position {m.start(0)}",
1063 self.pattern,
1064 m.start(0)
1065 )
1066 sel.no_match = True
1067 if is_open:
1068 closed = True
1069 break
1070 else:
1071 raise SelectorSyntaxError(
1072 f"Unmatched pseudo-class close at position {m.start(0)}",
1073 self.pattern,
1074 m.start(0)
1075 )
1076 elif key == 'combine':
1077 if is_relative:
1078 has_selector, sel, rel_type = self.parse_has_combinator(
1079 sel, m, has_selector, selectors, rel_type, index
1080 )
1081 else:
1082 has_selector, sel = self.parse_combinator(
1083 sel, m, has_selector, selectors, relations, is_pseudo, is_forgive, index
1084 )
1085 elif key == 'attribute':
1086 has_selector = self.parse_attribute_selector(sel, m, has_selector)
1087 elif key == 'tag':
1088 if has_selector:
1089 raise SelectorSyntaxError(
1090 f"Tag name found at position {m.start(0)} instead of at the start",
1091 self.pattern,
1092 m.start(0)
1093 )
1094 has_selector = self.parse_tag_pattern(sel, m, has_selector)
1095 elif key in ('class', 'id'):
1096 has_selector = self.parse_class_id(sel, m, has_selector)
1098 index = m.end(0)
1099 except StopIteration:
1100 pass
1102 # Handle selectors that are not closed
1103 if is_open and not closed:
1104 raise SelectorSyntaxError(
1105 f"Unclosed pseudo-class at position {index}",
1106 self.pattern,
1107 index
1108 )
1110 # Cleanup completed selector piece
1111 if has_selector:
1112 if not sel.tag and not is_pseudo:
1113 # Implied `*`
1114 sel.tag = ct.SelectorTag('*', None)
1115 if is_relative:
1116 sel.rel_type = rel_type
1117 selectors[-1].relations.append(sel)
1118 else:
1119 sel.relations.extend(relations)
1120 del relations[:]
1121 selectors.append(sel)
1123 # Forgive empty slots in pseudo-classes that have lists (and are forgiving)
1124 elif is_forgive and (not selectors or not relations):
1125 # Handle normal pseudo-classes with empty slots like `:is()` etc.
1126 sel.no_match = True
1127 del relations[:]
1128 selectors.append(sel)
1129 has_selector = True
1131 if not has_selector:
1132 # We will always need to finish a selector when `:has()` is used as it leads with combining.
1133 # May apply to others as well.
1134 raise SelectorSyntaxError(
1135 f'Expected a selector at position {index}',
1136 self.pattern,
1137 index
1138 )
1140 # Some patterns require additional logic, such as default. We try to make these the
1141 # last pattern, and append the appropriate flag to that selector which communicates
1142 # to the matcher what additional logic is required.
1143 if is_default:
1144 selectors[-1].flags = ct.SEL_DEFAULT
1145 if is_indeterminate:
1146 selectors[-1].flags = ct.SEL_INDETERMINATE
1147 if is_in_range:
1148 selectors[-1].flags = ct.SEL_IN_RANGE
1149 if is_out_of_range:
1150 selectors[-1].flags = ct.SEL_OUT_OF_RANGE
1151 if is_placeholder_shown:
1152 selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN
1154 # Return selector list
1155 return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html, self.count - count)
1157 def selector_iter(self, pattern: str) -> Iterator[tuple[str, Match[str]]]:
1158 """Iterate selector tokens."""
1160 # Ignore whitespace and comments at start and end of pattern
1161 m = RE_WS_BEGIN.search(pattern)
1162 index = m.end(0) if m else 0
1163 m = RE_WS_END.search(pattern)
1164 end = (m.start(0) - 1) if m else (len(pattern) - 1)
1166 if self.debug: # pragma: no cover
1167 print(f'## PARSING: {pattern!r}')
1168 while index <= end:
1169 m = None
1170 for v in self.css_tokens:
1171 m = v.match(pattern, index, self.flags)
1172 if m:
1173 name = v.get_name()
1174 if self.debug: # pragma: no cover
1175 print(f"TOKEN: '{name}' --> {m.group(0)!r} at position {m.start(0)}")
1176 index = m.end(0)
1177 yield name, m
1178 break
1179 if m is None:
1180 c = pattern[index]
1181 # If the character represents the start of one of the known selector types,
1182 # throw an exception mentioning that the known selector type is in error;
1183 # otherwise, report the invalid character.
1184 if c == '[':
1185 msg = f"Malformed attribute selector at position {index}"
1186 elif c == '.':
1187 msg = f"Malformed class selector at position {index}"
1188 elif c == '#':
1189 msg = f"Malformed id selector at position {index}"
1190 elif c == ':':
1191 msg = f"Malformed pseudo-class selector at position {index}"
1192 else:
1193 msg = f"Invalid character {c!r} position {index}"
1194 raise SelectorSyntaxError(msg, self.pattern, index)
1195 if self.debug: # pragma: no cover
1196 print('## END PARSING')
1198 def process_selectors(self, index: int = 0, flags: int = 0) -> ct.SelectorList:
1199 """Process selectors."""
1201 return self.parse_selectors(self.selector_iter(self.pattern), index, flags)
1204# Precompile CSS selector lists for pseudo-classes (additional logic may be required beyond the pattern)
1205# A few patterns are order dependent as they use patterns previous compiled.
1207# CSS pattern for `:link` and `:any-link`
1208CSS_LINK = CSSParser(
1209 'html|*:is(a, area)[href]'
1210).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
1211# CSS pattern for `:checked`
1212CSS_CHECKED = CSSParser(
1213 '''
1214 html|*:is(input[type=checkbox], input[type=radio])[checked], html|option[selected]
1215 '''
1216).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
1217# CSS pattern for `:default` (must compile CSS_CHECKED first)
1218CSS_DEFAULT = CSSParser(
1219 '''
1220 :checked,
1222 /*
1223 This pattern must be at the end.
1224 Special logic is applied to the last selector.
1225 */
1226 html|form html|*:is(button, input)[type="submit"]
1227 '''
1228).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_DEFAULT)
1229# CSS pattern for `:indeterminate`
1230CSS_INDETERMINATE = CSSParser(
1231 '''
1232 html|input[type="checkbox"][indeterminate],
1233 html|input[type="radio"]:is(:not([name]), [name=""]):not([checked]),
1234 html|progress:not([value]),
1236 /*
1237 This pattern must be at the end.
1238 Special logic is applied to the last selector.
1239 */
1240 html|input[type="radio"][name]:not([name='']):not([checked])
1241 '''
1242).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)
1243# CSS pattern for `:disabled`
1244CSS_DISABLED = CSSParser(
1245 '''
1246 html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
1247 html|optgroup[disabled] > html|option,
1248 html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset),
1249 html|fieldset[disabled] >
1250 html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset)
1251 '''
1252).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
1253# CSS pattern for `:enabled`
1254CSS_ENABLED = CSSParser(
1255 '''
1256 html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
1257 '''
1258).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
1259# CSS pattern for `:required`
1260CSS_REQUIRED = CSSParser(
1261 'html|*:is(input, textarea, select)[required]'
1262).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
1263# CSS pattern for `:optional`
1264CSS_OPTIONAL = CSSParser(
1265 'html|*:is(input, textarea, select):not([required])'
1266).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
1267# CSS pattern for `:placeholder-shown`
1268CSS_PLACEHOLDER_SHOWN = CSSParser(
1269 '''
1270 html|input:is(
1271 :not([type]),
1272 [type=""],
1273 [type=text],
1274 [type=search],
1275 [type=url],
1276 [type=tel],
1277 [type=email],
1278 [type=password],
1279 [type=number]
1280 )[placeholder]:not([placeholder='']):is(:not([value]), [value=""]),
1281 html|textarea[placeholder]:not([placeholder=''])
1282 '''
1283).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN)
1284# CSS pattern default for `:nth-child` "of S" feature
1285CSS_NTH_OF_S_DEFAULT = CSSParser(
1286 '*|*'
1287).process_selectors(flags=FLG_PSEUDO)
1288# CSS pattern for `:read-write` (CSS_DISABLED must be compiled first)
1289CSS_READ_WRITE = CSSParser(
1290 '''
1291 html|*:is(
1292 textarea,
1293 input:is(
1294 :not([type]),
1295 [type=""],
1296 [type=text],
1297 [type=search],
1298 [type=url],
1299 [type=tel],
1300 [type=email],
1301 [type=number],
1302 [type=password],
1303 [type=date],
1304 [type=datetime-local],
1305 [type=month],
1306 [type=time],
1307 [type=week]
1308 )
1309 ):not([readonly], :disabled),
1310 html|*:is([contenteditable=""], [contenteditable="true" i])
1311 '''
1312).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
1313# CSS pattern for `:read-only`
1314CSS_READ_ONLY = CSSParser(
1315 '''
1316 html|*:not(:read-write)
1317 '''
1318).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
1319# CSS pattern for `:in-range`
1320CSS_IN_RANGE = CSSParser(
1321 '''
1322 html|input:is(
1323 [type="date"],
1324 [type="month"],
1325 [type="week"],
1326 [type="time"],
1327 [type="datetime-local"],
1328 [type="number"],
1329 [type="range"]
1330 ):is(
1331 [min],
1332 [max]
1333 )
1334 '''
1335).process_selectors(flags=FLG_PSEUDO | FLG_IN_RANGE | FLG_HTML)
1336# CSS pattern for `:out-of-range`
1337CSS_OUT_OF_RANGE = CSSParser(
1338 '''
1339 html|input:is(
1340 [type="date"],
1341 [type="month"],
1342 [type="week"],
1343 [type="time"],
1344 [type="datetime-local"],
1345 [type="number"],
1346 [type="range"]
1347 ):is(
1348 [min],
1349 [max]
1350 )
1351 '''
1352).process_selectors(flags=FLG_PSEUDO | FLG_OUT_OF_RANGE | FLG_HTML)
1354# CSS pattern for :open
1355CSS_OPEN = CSSParser(
1356 '''
1357 html|*:is(details, dialog)[open]
1358 '''
1359).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
1362# CSS pattern for :muted
1363CSS_MUTED = CSSParser(
1364 '''
1365 html|*:is(video, audio)[muted]
1366 '''
1367).process_selectors(flags=FLG_PSEUDO | FLG_HTML)