Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/soupsieve/css_parser.py: 87%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

610 statements  

1"""CSS selector parser.""" 

2from __future__ import annotations 

3import re 

4from functools import lru_cache 

5from . import util 

6from . import css_match as cm 

7from . import css_types as ct 

8from .util import SelectorSyntaxError 

9import warnings 

10from typing import Match, Any, Iterator, cast 

11from dataclasses import dataclass 

12from collections import UserDict 

13import threading 

14 

15RE_LOCK = threading.Lock() 

16SEL_LOCK = threading.RLock() 

17 

18UNICODE_REPLACEMENT_CHAR = 0xFFFD 

19 

20SELECTOR_LIMIT = 8192 

21 

22# Simple pseudo classes that take no parameters 

23PSEUDO_SIMPLE = { 

24 ":any-link", 

25 ":empty", 

26 ":first-child", 

27 ":first-of-type", 

28 ":in-range", 

29 ":open", 

30 ":out-of-range", 

31 ":last-child", 

32 ":last-of-type", 

33 ":link", 

34 ":only-child", 

35 ":only-of-type", 

36 ":root", 

37 ':checked', 

38 ':default', 

39 ':disabled', 

40 ':enabled', 

41 ':indeterminate', 

42 ':optional', 

43 ':placeholder-shown', 

44 ':read-only', 

45 ':read-write', 

46 ':required', 

47 ':scope', 

48 ':defined', 

49 ':muted' 

50} 

51 

52# Supported, simple pseudo classes that match nothing in the Soup Sieve environment 

53PSEUDO_SIMPLE_NO_MATCH = { 

54 ':active', 

55 ':autofill', 

56 ':buffering', 

57 ':current', 

58 ':focus', 

59 ':focus-visible', 

60 ':focus-within', 

61 ':fullscreen', 

62 ':future', 

63 ':host', 

64 ':hover', 

65 ':local-link', 

66 ':past', 

67 ':paused', 

68 ':picture-in-picture', 

69 ':playing', 

70 ':popover-open', 

71 ':seeking', 

72 ':stalled', 

73 ':target', 

74 ':target-within', 

75 ':user-invalid', 

76 ':volume-locked', 

77 ':visited' 

78} 

79 

80# Complex pseudo classes that take selector lists 

81PSEUDO_COMPLEX = { 

82 ':contains', 

83 ':-soup-contains', 

84 ':-soup-contains-own', 

85 ':has', 

86 ':is', 

87 ':matches', 

88 ':not', 

89 ':where' 

90} 

91 

92PSEUDO_COMPLEX_NO_MATCH = { 

93 ':current', 

94 ':host', 

95 ':host-context' 

96} 

97 

98# Complex pseudo classes that take very specific parameters and are handled special 

99PSEUDO_SPECIAL = { 

100 ':dir', 

101 ':lang', 

102 ':nth-child', 

103 ':nth-last-child', 

104 ':nth-last-of-type', 

105 ':nth-of-type' 

106} 

107 

108PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSEUDO_COMPLEX_NO_MATCH | PSEUDO_SPECIAL 

109 

110# Sub-patterns parts 

111# Whitespace 

112NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])' 

113WS = fr'(?:[ \t]|{NEWLINE})' 

114# Comments 

115COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' 

116# Whitespace with comments included 

117WSC = fr'(?:{WS}|{COMMENTS})' 

118# CSS escapes 

119CSS_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$))' 

120CSS_STRING_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$|{NEWLINE}))' 

121# CSS Identifier 

122IDENTIFIER = fr''' 

123(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})+|--) 

124(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})*) 

125''' 

126# `nth` content 

127NTH = fr'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){WSC}*(?:[-+]){WSC}*(?:[0-9]+))?' 

128# Value: quoted string or identifier 

129VALUE = fr'''(?:"(?:\\(?:.|{NEWLINE})|[^\\"\r\n\f])*?"|'(?:\\(?:.|{NEWLINE})|[^\\'\r\n\f])*?'|{IDENTIFIER})''' 

130# Attribute value comparison. `!=` is handled special as it is non-standard. 

131ATTR = fr'(?:{WSC}*(?P<cmp>[!~^|*$]?=){WSC}*(?P<value>{VALUE})(?:{WSC}*(?P<case>[is]))?)?{WSC}*' 

132 

133# Selector patterns 

134# IDs (`#id`) 

135PAT_ID = fr'\#{IDENTIFIER}' 

136# Classes (`.class`) 

137PAT_CLASS = fr'\.{IDENTIFIER}' 

138# Prefix:Tag (`prefix|tag`) 

139PAT_TAG = fr'(?P<tag_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<tag_name>{IDENTIFIER}|\*)' 

140# Attributes (`[attr]`, `[attr=value]`, etc.) 

141PAT_ATTR = fr'\[{WSC}*(?P<attr_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<attr_name>{IDENTIFIER}){ATTR}\]' 

142# Pseudo class (`:pseudo-class`, `:pseudo-class(`) 

143PAT_PSEUDO_CLASS = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)?' 

144# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes. 

145PAT_PSEUDO_CLASS_SPECIAL = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)' 

146# Custom pseudo class (`:--custom-pseudo`) 

147PAT_PSEUDO_CLASS_CUSTOM = fr'(?P<name>:(?=--){IDENTIFIER})' 

148# Nesting ampersand selector. Matches `&` 

149PAT_AMP = r'&' 

150# Closing pseudo group (`)`) 

151PAT_PSEUDO_CLOSE = fr'{WSC}*\)' 

152# Pseudo element (`::pseudo-element`) 

153PAT_PSEUDO_ELEMENT = fr':{PAT_PSEUDO_CLASS}' 

154# At rule (`@page`, etc.) (not supported) 

155PAT_AT_RULE = fr'@P{IDENTIFIER}' 

156# Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.) 

157PAT_PSEUDO_NTH_CHILD = fr''' 

158(?P<pseudo_nth_child>{PAT_PSEUDO_CLASS_SPECIAL} 

159(?P<nth_child>{NTH}|even|odd))(?:{WSC}*\)|(?P<of>{COMMENTS}*{WS}{WSC}*of{COMMENTS}*{WS}{WSC}*)) 

160''' 

161# Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.) 

162PAT_PSEUDO_NTH_TYPE = fr''' 

163(?P<pseudo_nth_type>{PAT_PSEUDO_CLASS_SPECIAL} 

164(?P<nth_type>{NTH}|even|odd)){WSC}*\) 

165''' 

166# Pseudo class language (`:lang("*-de", en)`) 

167PAT_PSEUDO_LANG = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)' 

168# Pseudo class direction (`:dir(ltr)`) 

169PAT_PSEUDO_DIR = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<dir>ltr|rtl){WSC}*\)' 

170# Combining characters (`>`, `~`, ` `, `+`, `,`) 

171PAT_COMBINE = fr'{WSC}*?(?P<relation>[,+>~]|{WS}(?![,+>~])){WSC}*' 

172# Extra: Contains (`:contains(text)`) 

173PAT_PSEUDO_CONTAINS = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)' 

174 

175# Regular expressions 

176# CSS escape pattern 

177RE_CSS_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WSC}?)|(\\[^\r\n\f])|(\\$))', re.I) 

178RE_CSS_STR_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WS}?)|(\\[^\r\n\f])|(\\$)|(\\{NEWLINE}))', re.I) 

179# Pattern to break up `nth` specifiers 

180RE_NTH = re.compile(fr'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){WSC}*(?P<s2>[-+]){WSC}*(?P<b>[0-9]+))?', re.I) 

181# Pattern to iterate multiple values. 

182RE_VALUES = re.compile(fr'(?:(?P<value>{VALUE})|(?P<split>{WSC}*,{WSC}*))', re.X) 

183# Whitespace checks 

184RE_WS = re.compile(WS) 

185RE_WS_BEGIN = re.compile(fr'^{WSC}*') 

186RE_WS_END = re.compile(fr'{WSC}*$') 

187RE_CUSTOM = re.compile(fr'^{PAT_PSEUDO_CLASS_CUSTOM}$', re.X) 

188RE_PSEUDO_CLASS_SPECIAL = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U) 

189 

190# Constants 

191# List split token 

192COMMA_COMBINATOR = ',' 

193# Relation token for descendant 

194WS_COMBINATOR = " " 

195 

196# Parse flags 

197FLG_PSEUDO = 0x01 

198FLG_NOT = 0x02 

199FLG_RELATIVE = 0x04 

200FLG_DEFAULT = 0x08 

201FLG_HTML = 0x10 

202FLG_INDETERMINATE = 0x20 

203FLG_OPEN = 0x40 

204FLG_IN_RANGE = 0x80 

205FLG_OUT_OF_RANGE = 0x100 

206FLG_PLACEHOLDER_SHOWN = 0x200 

207FLG_FORGIVE = 0x400 

208 

209# Maximum cached patterns to store 

210_MAXCACHE = 500 

211 

212 

213@lru_cache(maxsize=_MAXCACHE) 

214def _cached_css_compile( 

215 pattern: str, 

216 namespaces: ct.Namespaces | None, 

217 custom: ct.CustomSelectors | None, 

218 flags: int 

219) -> cm.SoupSieve: 

220 """Cached CSS compile.""" 

221 

222 custom_selectors = process_custom(custom) 

223 return cm.SoupSieve( 

224 pattern, 

225 CSSParser( 

226 pattern, 

227 custom=custom_selectors, 

228 flags=flags 

229 ).process_selectors(), 

230 namespaces, 

231 custom, 

232 flags 

233 ) 

234 

235 

236def _purge_cache() -> None: 

237 """Purge the cache.""" 

238 

239 _cached_css_compile.cache_clear() 

240 

241 

242def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.SelectorList]: 

243 """Process custom.""" 

244 

245 custom_selectors = {} 

246 if custom is not None: 

247 for key, value in custom.items(): 

248 name = util.lower(key) 

249 if RE_CUSTOM.match(name) is None: 

250 raise SelectorSyntaxError(f"The name '{name}' is not a valid custom pseudo-class name") 

251 if name in custom_selectors: 

252 raise KeyError(f"The custom selector '{name}' has already been registered") 

253 custom_selectors[css_unescape(name)] = value 

254 return custom_selectors 

255 

256 

257def css_unescape(content: str, string: bool = False) -> str: 

258 """ 

259 Unescape CSS value. 

260 

261 Strings allow for spanning the value on multiple strings by escaping a new line. 

262 """ 

263 

264 def replace(m: Match[str]) -> str: 

265 """Replace with the appropriate substitute.""" 

266 

267 if m.group(1): 

268 codepoint = int(m.group(1)[1:], 16) 

269 if codepoint == 0: 

270 codepoint = UNICODE_REPLACEMENT_CHAR 

271 value = chr(codepoint) 

272 elif m.group(2): 

273 value = m.group(2)[1:] 

274 elif m.group(3): 

275 value = '\ufffd' 

276 else: 

277 value = '' 

278 

279 return value 

280 

281 return (RE_CSS_ESC if not string else RE_CSS_STR_ESC).sub(replace, content) 

282 

283 

284def escape(ident: str) -> str: 

285 """Escape identifier.""" 

286 

287 string = [] 

288 length = len(ident) 

289 start_dash = length > 0 and ident[0] == '-' 

290 if length == 1 and start_dash: 

291 # Need to escape identifier that is a single `-` with no other characters 

292 string.append(f'\\{ident}') 

293 else: 

294 for index, c in enumerate(ident): 

295 codepoint = ord(c) 

296 if codepoint == 0x00: 

297 string.append('\ufffd') 

298 elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F: 

299 string.append(f'\\{codepoint:x} ') 

300 elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39): 

301 string.append(f'\\{codepoint:x} ') 

302 elif ( 

303 codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or 

304 (0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A) 

305 ): 

306 string.append(c) 

307 else: 

308 string.append(f'\\{c}') 

309 return ''.join(string) 

310 

311 

312class SelectorPattern: 

313 """Selector pattern.""" 

314 

315 def __init__(self, name: str, pattern: str) -> None: 

316 """Initialize.""" 

317 

318 self.name = name 

319 self.pattern = pattern 

320 self._re_pattern: re.Pattern[str] | None = None 

321 

322 @property 

323 def re_pattern(self) -> re.Pattern[str]: 

324 """Retrieve the compiled regular expression pattern.""" 

325 

326 with RE_LOCK: 

327 if self._re_pattern is None: 

328 self._re_pattern = re.compile(self.pattern, re.I | re.X | re.U) 

329 return self._re_pattern 

330 

331 def get_name(self) -> str: 

332 """Get name.""" 

333 

334 return self.name 

335 

336 def match(self, selector: str, index: int, flags: int) -> Match[str] | None: 

337 """Match the selector.""" 

338 

339 return self.re_pattern.match(selector, index) 

340 

341 

342class SpecialPseudoPattern(SelectorPattern): 

343 """Selector pattern.""" 

344 

345 def __init__(self, patterns: tuple[tuple[str, tuple[str, ...], str, type[SelectorPattern]], ...]) -> None: 

346 """Initialize.""" 

347 

348 self.patterns = {} 

349 for p in patterns: 

350 name = p[0] 

351 pattern = p[3](name, p[2]) 

352 for pseudo in p[1]: 

353 self.patterns[pseudo] = pattern 

354 

355 self.matched_name = None # type: SelectorPattern | None 

356 

357 def get_name(self) -> str: 

358 """Get name.""" 

359 

360 return '' if self.matched_name is None else self.matched_name.get_name() 

361 

362 def match(self, selector: str, index: int, flags: int) -> Match[str] | None: 

363 """Match the selector.""" 

364 

365 pseudo = None 

366 m = RE_PSEUDO_CLASS_SPECIAL.match(selector, index) 

367 if m: 

368 name = util.lower(css_unescape(m.group('name'))) 

369 pattern = self.patterns.get(name) 

370 if pattern: 

371 pseudo = pattern.match(selector, index, flags) 

372 if pseudo: 

373 self.matched_name = pattern 

374 

375 return pseudo 

376 

377 

378class _Selector: 

379 """ 

380 Intermediate selector class. 

381 

382 This stores selector data for a compound selector as we are acquiring them. 

383 Once we are done collecting the data for a compound selector, we freeze 

384 the data in an object that can be pickled and hashed. 

385 """ 

386 

387 def __init__(self, **kwargs: Any) -> None: 

388 """Initialize.""" 

389 

390 self.tag = kwargs.get('tag', None) # type: ct.SelectorTag | None 

391 self.ids = kwargs.get('ids', []) # type: list[str] 

392 self.classes = kwargs.get('classes', []) # type: list[str] 

393 self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute] 

394 self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth] 

395 self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList] 

396 self.relations = kwargs.get('relations', []) # type: list[_Selector] 

397 self.rel_type = kwargs.get('rel_type', None) # type: str | None 

398 self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains] 

399 self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang] 

400 self.flags = kwargs.get('flags', 0) # type: int 

401 self.no_match = kwargs.get('no_match', False) # type: bool 

402 

403 def _freeze_relations(self, relations: list[_Selector]) -> ct.SelectorList: 

404 """Freeze relation.""" 

405 

406 if relations: 

407 sel = relations[0] 

408 sel.relations.extend(relations[1:]) 

409 return ct.SelectorList([sel.freeze()]) 

410 else: 

411 return ct.SelectorList() 

412 

413 def freeze(self) -> ct.Selector | ct.SelectorNull: 

414 """Freeze self.""" 

415 

416 if self.no_match: 

417 return ct.SelectorNull() 

418 else: 

419 return ct.Selector( 

420 self.tag, 

421 tuple(self.ids), 

422 tuple(self.classes), 

423 tuple(self.attributes), 

424 tuple(self.nth), 

425 tuple(self.selectors), 

426 self._freeze_relations(self.relations), 

427 self.rel_type, 

428 tuple(self.contains), 

429 tuple(self.lang), 

430 self.flags 

431 ) 

432 

433 def __str__(self) -> str: # pragma: no cover 

434 """String representation.""" 

435 

436 return ( 

437 f'_Selector(tag={self.tag!r}, ids={self.ids!r}, classes={self.classes!r}, attributes={self.attributes!r}, ' 

438 f'nth={self.nth!r}, selectors={self.selectors!r}, relations={self.relations!r}, ' 

439 f'rel_type={self.rel_type!r}, contains={self.contains!r}, lang={self.lang!r}, flags={self.flags!r}, ' 

440 f'no_match={self.no_match!r})' 

441 ) 

442 

443 __repr__ = __str__ 

444 

445 

446@dataclass 

447class CSSPattern: 

448 """A CSS pattern that hasn't been processed by `CSSParser` yet.""" 

449 

450 selector: str 

451 flags: int 

452 

453 

454class PseudoSelectorMap(UserDict[str, CSSPattern | ct.SelectorList]): 

455 """Pseudo selector map.""" 

456 

457 def __setitem__(self, key: str, value: CSSPattern | ct.SelectorList) -> None: 

458 """Set item.""" 

459 

460 self.data[key] = value 

461 

462 def __getitem__(self, key: str) -> ct.SelectorList: 

463 """Get item.""" 

464 

465 with SEL_LOCK: 

466 value = self.data[key] 

467 if isinstance(value, CSSPattern): 

468 value = CSSParser(value.selector).process_selectors(flags=value.flags) 

469 self.data[key] = value 

470 

471 return value 

472 

473 

474# CSS pattern for `:link` and `:any-link` 

475CSS_LINK = CSSPattern('html|*:is(a, area)[href]', FLG_PSEUDO | FLG_HTML) 

476# CSS pattern for `:checked` 

477CSS_CHECKED = CSSPattern( 

478 ''' 

479 html|*:is(input[type=checkbox], input[type=radio])[checked], html|option[selected] 

480 ''', 

481 FLG_PSEUDO | FLG_HTML 

482) 

483# CSS pattern for `:default` (must compile CSS_CHECKED first) 

484CSS_DEFAULT = CSSPattern( 

485 ''' 

486 :checked, 

487 

488 /* 

489 This pattern must be at the end. 

490 Special logic is applied to the last selector. 

491 */ 

492 html|form html|*:is(button, input)[type="submit"] 

493 ''', 

494 FLG_PSEUDO | FLG_HTML | FLG_DEFAULT 

495) 

496# CSS pattern for `:indeterminate` 

497CSS_INDETERMINATE = CSSPattern( 

498 ''' 

499 html|input[type="checkbox"][indeterminate], 

500 html|input[type="radio"]:is(:not([name]), [name=""]):not([checked]), 

501 html|progress:not([value]), 

502 

503 /* 

504 This pattern must be at the end. 

505 Special logic is applied to the last selector. 

506 */ 

507 html|input[type="radio"][name]:not([name='']):not([checked]) 

508 ''', 

509 FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE 

510) 

511# CSS pattern for `:disabled` 

512CSS_DISABLED = CSSPattern( 

513 ''' 

514 html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled], 

515 html|optgroup[disabled] > html|option, 

516 html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset), 

517 html|fieldset[disabled] > 

518 html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset) 

519 ''', 

520 FLG_PSEUDO | FLG_HTML 

521) 

522# CSS pattern for `:enabled` 

523CSS_ENABLED = CSSPattern( 

524 ''' 

525 html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled) 

526 ''', 

527 FLG_PSEUDO | FLG_HTML 

528) 

529# CSS pattern for `:required` 

530CSS_REQUIRED = CSSPattern('html|*:is(input, textarea, select)[required]', FLG_PSEUDO | FLG_HTML) 

531# CSS pattern for `:optional` 

532CSS_OPTIONAL = CSSPattern('html|*:is(input, textarea, select):not([required])', FLG_PSEUDO | FLG_HTML) 

533# CSS pattern for `:placeholder-shown` 

534CSS_PLACEHOLDER_SHOWN = CSSPattern( 

535 ''' 

536 html|input:is( 

537 :not([type]), 

538 [type=""], 

539 [type=text], 

540 [type=search], 

541 [type=url], 

542 [type=tel], 

543 [type=email], 

544 [type=password], 

545 [type=number] 

546 )[placeholder]:not([placeholder='']):is(:not([value]), [value=""]), 

547 html|textarea[placeholder]:not([placeholder='']) 

548 ''', 

549 FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN 

550) 

551# CSS pattern for `:read-write` (CSS_DISABLED must be compiled first) 

552CSS_READ_WRITE = CSSPattern( 

553 ''' 

554 html|*:is( 

555 textarea, 

556 input:is( 

557 :not([type]), 

558 [type=""], 

559 [type=text], 

560 [type=search], 

561 [type=url], 

562 [type=tel], 

563 [type=email], 

564 [type=number], 

565 [type=password], 

566 [type=date], 

567 [type=datetime-local], 

568 [type=month], 

569 [type=time], 

570 [type=week] 

571 ) 

572 ):not([readonly], :disabled), 

573 html|*:is([contenteditable=""], [contenteditable="true" i]) 

574 ''', 

575 FLG_PSEUDO | FLG_HTML 

576) 

577# CSS pattern for `:read-only` 

578CSS_READ_ONLY = CSSPattern('html|*:not(:read-write)', FLG_PSEUDO | FLG_HTML) 

579# CSS pattern for `:in-range` 

580CSS_IN_RANGE = CSSPattern( 

581 ''' 

582 html|input:is( 

583 [type="date"], 

584 [type="month"], 

585 [type="week"], 

586 [type="time"], 

587 [type="datetime-local"], 

588 [type="number"], 

589 [type="range"] 

590 ):is( 

591 [min], 

592 [max] 

593 ) 

594 ''', 

595 FLG_PSEUDO | FLG_HTML | FLG_IN_RANGE 

596) 

597# CSS pattern for `:out-of-range` 

598CSS_OUT_OF_RANGE = CSSPattern( 

599 ''' 

600 html|input:is( 

601 [type="date"], 

602 [type="month"], 

603 [type="week"], 

604 [type="time"], 

605 [type="datetime-local"], 

606 [type="number"], 

607 [type="range"] 

608 ):is( 

609 [min], 

610 [max] 

611 ) 

612 ''', 

613 FLG_PSEUDO | FLG_HTML | FLG_OUT_OF_RANGE 

614) 

615# CSS pattern for :open 

616CSS_OPEN = CSSPattern('html|*:is(details, dialog)[open]', FLG_PSEUDO | FLG_HTML) 

617# CSS pattern for :muted 

618CSS_MUTED = CSSPattern('html|*:is(video, audio)[muted]', FLG_PSEUDO | FLG_HTML) 

619# CSS pattern default for `:nth-child` "of S" feature 

620CSS_NTH_OF_S_DEFAULT = CSSPattern("*|*", FLG_PSEUDO) 

621 

622 

623class CSSParser: 

624 """Parse CSS selectors.""" 

625 

626 CSS_TOKENS = ( 

627 SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE), 

628 SpecialPseudoPattern( 

629 ( 

630 ( 

631 "pseudo_contains", 

632 (':contains', ':-soup-contains', ':-soup-contains-own'), 

633 PAT_PSEUDO_CONTAINS, 

634 SelectorPattern 

635 ), 

636 ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern), 

637 ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern), 

638 ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern), 

639 ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern) 

640 ) 

641 ), 

642 SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM), 

643 SelectorPattern("pseudo_class", PAT_PSEUDO_CLASS), 

644 SelectorPattern("pseudo_element", PAT_PSEUDO_ELEMENT), 

645 SelectorPattern("amp", PAT_AMP), 

646 SelectorPattern("at_rule", PAT_AT_RULE), 

647 SelectorPattern("id", PAT_ID), 

648 SelectorPattern("class", PAT_CLASS), 

649 SelectorPattern("tag", PAT_TAG), 

650 SelectorPattern("attribute", PAT_ATTR), 

651 SelectorPattern("combine", PAT_COMBINE) 

652 ) 

653 

654 # Pseudos that expand to selectors 

655 PSEUDO_SELECTORS = PseudoSelectorMap( 

656 { 

657 ':link': CSS_LINK, 

658 ':any-link': CSS_LINK, 

659 ':checked': CSS_CHECKED, 

660 ':default': CSS_DEFAULT, 

661 ':indeterminate': CSS_INDETERMINATE, 

662 ':disabled': CSS_DISABLED, 

663 ':enabled': CSS_ENABLED, 

664 ':required': CSS_REQUIRED, 

665 ':muted': CSS_MUTED, 

666 ':open': CSS_OPEN, 

667 ':optional': CSS_OPTIONAL, 

668 ':read-only': CSS_READ_ONLY, 

669 ':read-write': CSS_READ_WRITE, 

670 ':in-range': CSS_IN_RANGE, 

671 ':out-of-range': CSS_OUT_OF_RANGE, 

672 ':placeholder-shown': CSS_PLACEHOLDER_SHOWN, 

673 '<nth-of-s>': CSS_NTH_OF_S_DEFAULT 

674 } 

675 ) 

676 

677 def __init__( 

678 self, 

679 selector: str, 

680 custom: dict[str, str | ct.SelectorList] | None = None, 

681 flags: int = 0 

682 ) -> None: 

683 """Initialize.""" 

684 

685 self.pattern = selector.replace('\x00', '\ufffd') 

686 self.flags = flags 

687 self.debug = self.flags & util.DEBUG 

688 self.custom = {} if custom is None else custom 

689 self.count = 0 

690 

691 def check_count(self) -> None: 

692 """Check the current selector count.""" 

693 

694 if self.count > SELECTOR_LIMIT: 

695 raise ValueError(f'Selector exceeds pseudo-class nesting limit of {SELECTOR_LIMIT}') 

696 

697 def parse_attribute_selector(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

698 """Create attribute selector from the returned regex match.""" 

699 

700 inverse = False 

701 op = m.group('cmp') 

702 case = util.lower(m.group('case')) if m.group('case') else None 

703 ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else '' 

704 attr = css_unescape(m.group('attr_name')) 

705 is_type = False 

706 pattern2 = None 

707 value = '' 

708 

709 if case: 

710 flags = (re.I if case == 'i' else 0) | re.DOTALL 

711 elif util.lower(attr) == 'type': 

712 flags = re.I | re.DOTALL 

713 is_type = True 

714 else: 

715 flags = re.DOTALL 

716 

717 if op: 

718 if m.group('value').startswith(('"', "'")): 

719 value = css_unescape(m.group('value')[1:-1], True) 

720 else: 

721 value = css_unescape(m.group('value')) 

722 

723 if not op: 

724 # Attribute name 

725 pattern = None 

726 elif op.startswith('^'): 

727 # Value start with 

728 pattern = re.compile(r'^%s.*' % re.escape(value), flags) 

729 elif op.startswith('$'): 

730 # Value ends with 

731 pattern = re.compile(r'.*?%s$' % re.escape(value), flags) 

732 elif op.startswith('*'): 

733 # Value contains 

734 pattern = re.compile(r'.*?%s.*' % re.escape(value), flags) 

735 elif op.startswith('~'): 

736 # Value contains word within space separated list 

737 # `~=` should match nothing if it is empty or contains whitespace, 

738 # so if either of these cases is present, use `[^\s\S]` which cannot be matched. 

739 value = r'[^\s\S]' if not value or RE_WS.search(value) else re.escape(value) 

740 pattern = re.compile(r'.*?(?:(?<=^)|(?<=[ \t\r\n\f]))%s(?=(?:[ \t\r\n\f]|$)).*' % value, flags) 

741 elif op.startswith('|'): 

742 # Value starts with word in dash separated list 

743 pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags) 

744 else: 

745 # Value matches 

746 pattern = re.compile(r'^%s$' % re.escape(value), flags) 

747 if op.startswith('!'): 

748 # Equivalent to `:not([attr=value])` 

749 inverse = True 

750 if is_type and pattern: 

751 pattern2 = re.compile(pattern.pattern) 

752 

753 # Append the attribute selector 

754 sel_attr = ct.SelectorAttribute(attr, ns, pattern, pattern2) 

755 if inverse: 

756 # If we are using `!=`, we need to nest the pattern under a `:not()`. 

757 sub_sel = _Selector() 

758 sub_sel.attributes.append(sel_attr) 

759 not_list = ct.SelectorList([sub_sel.freeze()], True, False) 

760 sel.selectors.append(not_list) 

761 else: 

762 sel.attributes.append(sel_attr) 

763 

764 has_selector = True 

765 return has_selector 

766 

767 def parse_tag_pattern(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

768 """Parse tag pattern from regex match.""" 

769 

770 prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None 

771 tag = css_unescape(m.group('tag_name')) 

772 sel.tag = ct.SelectorTag(tag, prefix) 

773 has_selector = True 

774 return has_selector 

775 

776 def parse_pseudo_class_custom(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

777 """ 

778 Parse custom pseudo class alias. 

779 

780 Compile custom selectors as we need them. When compiling a custom selector, 

781 set it to `None` in the dictionary so we can avoid an infinite loop. 

782 """ 

783 

784 pseudo = util.lower(css_unescape(m.group('name'))) 

785 selector = self.custom.get(pseudo) 

786 if selector is None: 

787 raise SelectorSyntaxError( 

788 f"Undefined custom selector '{pseudo}' found at position {m.end(0)}", 

789 self.pattern, 

790 m.end(0) 

791 ) 

792 

793 if not isinstance(selector, ct.SelectorList): 

794 del self.custom[pseudo] 

795 selector = CSSParser( 

796 selector, custom=self.custom, flags=self.flags 

797 ).process_selectors(flags=FLG_PSEUDO) 

798 self.custom[pseudo] = selector 

799 

800 self.count += selector.count 

801 self.check_count() 

802 

803 sel.selectors.append(selector) 

804 has_selector = True 

805 return has_selector 

806 

807 def parse_pseudo_class( 

808 self, 

809 sel: _Selector, 

810 m: Match[str], 

811 has_selector: bool, 

812 iselector: Iterator[tuple[str, Match[str]]], 

813 is_html: bool 

814 ) -> tuple[bool, bool]: 

815 """Parse pseudo class.""" 

816 

817 complex_pseudo = False 

818 pseudo = util.lower(css_unescape(m.group('name'))) 

819 if m.group('open'): 

820 complex_pseudo = True 

821 if complex_pseudo and pseudo in PSEUDO_COMPLEX: 

822 has_selector = self.parse_pseudo_open(sel, pseudo, has_selector, iselector, m.end(0)) 

823 elif not complex_pseudo and pseudo in PSEUDO_SIMPLE: 

824 if pseudo == ':root': 

825 sel.flags |= ct.SEL_ROOT 

826 elif pseudo == ':defined': 

827 sel.flags |= ct.SEL_DEFINED 

828 is_html = True 

829 elif pseudo == ':scope': 

830 sel.flags |= ct.SEL_SCOPE 

831 elif pseudo == ':empty': 

832 sel.flags |= ct.SEL_EMPTY 

833 elif pseudo in self.PSEUDO_SELECTORS: 

834 pseudo_selector = self.PSEUDO_SELECTORS[pseudo] 

835 self.count += pseudo_selector.count 

836 self.check_count() 

837 sel.selectors.append(pseudo_selector) 

838 elif pseudo == ':first-child': 

839 sel.nth.append(ct.SelectorNth(1, False, 0, False, False, ct.SelectorList())) 

840 elif pseudo == ':last-child': 

841 sel.nth.append(ct.SelectorNth(1, False, 0, False, True, ct.SelectorList())) 

842 elif pseudo == ':first-of-type': 

843 sel.nth.append(ct.SelectorNth(1, False, 0, True, False, ct.SelectorList())) 

844 elif pseudo == ':last-of-type': 

845 sel.nth.append(ct.SelectorNth(1, False, 0, True, True, ct.SelectorList())) 

846 elif pseudo == ':only-child': 

847 sel.nth.extend( 

848 [ 

849 ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()), 

850 ct.SelectorNth(1, False, 0, False, True, ct.SelectorList()) 

851 ] 

852 ) 

853 elif pseudo == ':only-of-type': 

854 sel.nth.extend( 

855 [ 

856 ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()), 

857 ct.SelectorNth(1, False, 0, True, True, ct.SelectorList()) 

858 ] 

859 ) 

860 has_selector = True 

861 elif complex_pseudo and pseudo in PSEUDO_COMPLEX_NO_MATCH: 

862 self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN) 

863 sel.no_match = True 

864 has_selector = True 

865 elif not complex_pseudo and pseudo in PSEUDO_SIMPLE_NO_MATCH: 

866 sel.no_match = True 

867 has_selector = True 

868 elif pseudo in PSEUDO_SUPPORTED: 

869 raise SelectorSyntaxError( 

870 f"Invalid syntax for pseudo class '{pseudo}'", 

871 self.pattern, 

872 m.start(0) 

873 ) 

874 else: 

875 raise SelectorSyntaxError( 

876 f"'{pseudo}' was detected as a pseudo-class and is either unsupported or invalid. " 

877 "If the syntax was not intended to be recognized as a pseudo-class, please escape the colon.", 

878 self.pattern, 

879 m.start(0) 

880 ) 

881 

882 return has_selector, is_html 

883 

884 def parse_pseudo_nth( 

885 self, 

886 sel: _Selector, 

887 m: Match[str], 

888 has_selector: bool, 

889 iselector: Iterator[tuple[str, Match[str]]] 

890 ) -> bool: 

891 """Parse `nth` pseudo.""" 

892 

893 mdict = m.groupdict() 

894 if mdict.get('pseudo_nth_child'): 

895 postfix = '_child' 

896 else: 

897 postfix = '_type' 

898 mdict['name'] = util.lower(css_unescape(mdict['name'])) 

899 content = util.lower(mdict.get('nth' + postfix)) 

900 if content == 'even': 

901 # 2n 

902 s1 = 2 

903 s2 = 0 

904 var = True 

905 elif content == 'odd': 

906 # 2n+1 

907 s1 = 2 

908 s2 = 1 

909 var = True 

910 else: 

911 nth_parts = cast(Match[str], RE_NTH.match(content)) 

912 _s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else '' 

913 a = nth_parts.group('a') 

914 var = a.endswith('n') 

915 if a.startswith('n'): 

916 _s1 += '1' 

917 elif var: 

918 _s1 += a[:-1] 

919 else: 

920 _s1 += a 

921 _s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else '' 

922 if nth_parts.group('b'): 

923 _s2 += nth_parts.group('b') 

924 else: 

925 _s2 = '0' 

926 s1 = int(_s1, 10) 

927 s2 = int(_s2, 10) 

928 

929 pseudo_sel = mdict['name'] 

930 if postfix == '_child': 

931 if m.group('of'): 

932 # Parse the rest of `of S`. 

933 nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN) 

934 else: 

935 # Use default `*|*` for `of S`. 

936 nth_sel = self.PSEUDO_SELECTORS['<nth-of-s>'] 

937 self.count += nth_sel.count 

938 self.check_count() 

939 if pseudo_sel == ':nth-child': 

940 sel.nth.append(ct.SelectorNth(s1, var, s2, False, False, nth_sel)) 

941 elif pseudo_sel == ':nth-last-child': 

942 sel.nth.append(ct.SelectorNth(s1, var, s2, False, True, nth_sel)) 

943 else: 

944 if pseudo_sel == ':nth-of-type': 

945 sel.nth.append(ct.SelectorNth(s1, var, s2, True, False, ct.SelectorList())) 

946 elif pseudo_sel == ':nth-last-of-type': 

947 sel.nth.append(ct.SelectorNth(s1, var, s2, True, True, ct.SelectorList())) 

948 has_selector = True 

949 return has_selector 

950 

951 def parse_pseudo_open( 

952 self, 

953 sel: _Selector, 

954 name: str, 

955 has_selector: bool, 

956 iselector: Iterator[tuple[str, Match[str]]], 

957 index: int 

958 ) -> bool: 

959 """Parse pseudo with opening bracket.""" 

960 

961 flags = FLG_PSEUDO | FLG_OPEN 

962 if name == ':not': 

963 flags |= FLG_NOT 

964 elif name == ':has': 

965 flags |= FLG_RELATIVE 

966 elif name in (':where', ':is'): 

967 flags |= FLG_FORGIVE 

968 

969 sel.selectors.append(self.parse_selectors(iselector, index, flags)) 

970 has_selector = True 

971 

972 return has_selector 

973 

974 def parse_has_combinator( 

975 self, 

976 sel: _Selector, 

977 m: Match[str], 

978 has_selector: bool, 

979 selectors: list[_Selector], 

980 rel_type: str, 

981 index: int 

982 ) -> tuple[bool, _Selector, str]: 

983 """Parse combinator tokens.""" 

984 

985 combinator = m.group('relation').strip() 

986 if not combinator: 

987 combinator = WS_COMBINATOR 

988 if combinator == COMMA_COMBINATOR: 

989 sel.rel_type = rel_type 

990 selectors[-1].relations.append(sel) 

991 rel_type = ":" + WS_COMBINATOR 

992 selectors.append(_Selector()) 

993 else: 

994 if has_selector: 

995 # End the current selector and associate the leading combinator with this selector. 

996 sel.rel_type = rel_type 

997 selectors[-1].relations.append(sel) 

998 elif rel_type[1:] != WS_COMBINATOR: 

999 # It's impossible to have two whitespace combinators after each other as the patterns 

1000 # will gobble up trailing whitespace. It is also impossible to have a whitespace 

1001 # combinator after any other kind for the same reason. But we could have 

1002 # multiple non-whitespace combinators. So if the current combinator is not a whitespace, 

1003 # then we've hit the multiple combinator case, so we should fail. 

1004 raise SelectorSyntaxError( 

1005 f'The multiple combinators at position {index}', 

1006 self.pattern, 

1007 index 

1008 ) 

1009 

1010 # Set the leading combinator for the next selector. 

1011 rel_type = ':' + combinator 

1012 

1013 sel = _Selector() 

1014 has_selector = False 

1015 return has_selector, sel, rel_type 

1016 

1017 def parse_combinator( 

1018 self, 

1019 sel: _Selector, 

1020 m: Match[str], 

1021 has_selector: bool, 

1022 selectors: list[_Selector], 

1023 relations: list[_Selector], 

1024 is_pseudo: bool, 

1025 is_forgive: bool, 

1026 index: int 

1027 ) -> tuple[bool, _Selector]: 

1028 """Parse combinator tokens.""" 

1029 

1030 combinator = m.group('relation').strip() 

1031 if not combinator: 

1032 combinator = WS_COMBINATOR 

1033 if not has_selector: 

1034 if not is_forgive or combinator != COMMA_COMBINATOR: 

1035 raise SelectorSyntaxError( 

1036 f"The combinator '{combinator}' at position {index}, must have a selector before it", 

1037 self.pattern, 

1038 index 

1039 ) 

1040 

1041 # If we are in a forgiving pseudo class, just make the selector a "no match" 

1042 if combinator == COMMA_COMBINATOR: 

1043 sel.no_match = True 

1044 del relations[:] 

1045 selectors.append(sel) 

1046 else: 

1047 if combinator == COMMA_COMBINATOR: 

1048 if not sel.tag and not is_pseudo: 

1049 # Implied `*` 

1050 sel.tag = ct.SelectorTag('*', None) 

1051 sel.relations.extend(relations) 

1052 selectors.append(sel) 

1053 del relations[:] 

1054 else: 

1055 sel.relations.extend(relations) 

1056 sel.rel_type = combinator 

1057 del relations[:] 

1058 relations.append(sel) 

1059 

1060 sel = _Selector() 

1061 has_selector = False 

1062 

1063 return has_selector, sel 

1064 

1065 def parse_class_id(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

1066 """Parse HTML classes and ids.""" 

1067 

1068 selector = m.group(0) 

1069 if selector.startswith('.'): 

1070 sel.classes.append(css_unescape(selector[1:])) 

1071 else: 

1072 sel.ids.append(css_unescape(selector[1:])) 

1073 has_selector = True 

1074 return has_selector 

1075 

1076 def parse_pseudo_contains(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

1077 """Parse contains.""" 

1078 

1079 pseudo = util.lower(css_unescape(m.group('name'))) 

1080 if pseudo == ":contains": 

1081 warnings.warn( # noqa: B028 

1082 "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.", 

1083 FutureWarning 

1084 ) 

1085 contains_own = pseudo == ":-soup-contains-own" 

1086 values = css_unescape(m.group('values')) 

1087 patterns = [] 

1088 for token in RE_VALUES.finditer(values): 

1089 if token.group('split'): 

1090 continue 

1091 value = token.group('value') 

1092 if value.startswith(("'", '"')): 

1093 value = css_unescape(value[1:-1], True) 

1094 else: 

1095 value = css_unescape(value) 

1096 patterns.append(value) 

1097 sel.contains.append(ct.SelectorContains(patterns, contains_own)) 

1098 has_selector = True 

1099 return has_selector 

1100 

1101 def parse_pseudo_lang(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

1102 """Parse pseudo language.""" 

1103 

1104 values = m.group('values') 

1105 patterns = [] 

1106 for token in RE_VALUES.finditer(values): 

1107 if token.group('split'): 

1108 continue 

1109 value = token.group('value') 

1110 if value.startswith(('"', "'")): 

1111 value = css_unescape(value[1:-1], True) 

1112 else: 

1113 value = css_unescape(value) 

1114 

1115 patterns.append(value) 

1116 

1117 sel.lang.append(ct.SelectorLang(patterns)) 

1118 has_selector = True 

1119 

1120 return has_selector 

1121 

1122 def parse_pseudo_dir(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool: 

1123 """Parse pseudo direction.""" 

1124 

1125 value = ct.SEL_DIR_LTR if util.lower(m.group('dir')) == 'ltr' else ct.SEL_DIR_RTL 

1126 sel.flags |= value 

1127 has_selector = True 

1128 return has_selector 

1129 

1130 def parse_selectors( 

1131 self, 

1132 iselector: Iterator[tuple[str, Match[str]]], 

1133 index: int = 0, 

1134 flags: int = 0 

1135 ) -> ct.SelectorList: 

1136 """Parse selectors.""" 

1137 

1138 # Initialize important variables 

1139 sel = _Selector() 

1140 selectors = [] 

1141 has_selector = False 

1142 closed = False 

1143 relations = [] # type: list[_Selector] 

1144 rel_type = ":" + WS_COMBINATOR 

1145 count = self.count 

1146 

1147 # Setup various flags 

1148 is_open = bool(flags & FLG_OPEN) 

1149 is_pseudo = bool(flags & FLG_PSEUDO) 

1150 is_relative = bool(flags & FLG_RELATIVE) 

1151 is_not = bool(flags & FLG_NOT) 

1152 is_html = bool(flags & FLG_HTML) 

1153 is_default = bool(flags & FLG_DEFAULT) 

1154 is_indeterminate = bool(flags & FLG_INDETERMINATE) 

1155 is_in_range = bool(flags & FLG_IN_RANGE) 

1156 is_out_of_range = bool(flags & FLG_OUT_OF_RANGE) 

1157 is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN) 

1158 is_forgive = bool(flags & FLG_FORGIVE) 

1159 

1160 # Print out useful debug stuff 

1161 if self.debug: # pragma: no cover 

1162 if is_pseudo: 

1163 print(' is_pseudo: True') 

1164 if is_open: 

1165 print(' is_open: True') 

1166 if is_relative: 

1167 print(' is_relative: True') 

1168 if is_not: 

1169 print(' is_not: True') 

1170 if is_html: 

1171 print(' is_html: True') 

1172 if is_default: 

1173 print(' is_default: True') 

1174 if is_indeterminate: 

1175 print(' is_indeterminate: True') 

1176 if is_in_range: 

1177 print(' is_in_range: True') 

1178 if is_out_of_range: 

1179 print(' is_out_of_range: True') 

1180 if is_placeholder_shown: 

1181 print(' is_placeholder_shown: True') 

1182 if is_forgive: 

1183 print(' is_forgive: True') 

1184 

1185 # The algorithm for relative selectors require an initial selector in the selector list 

1186 if is_relative: 

1187 selectors.append(_Selector()) 

1188 

1189 try: 

1190 while True: 

1191 key, m = next(iselector) 

1192 

1193 if key not in ('combine', 'pseudo_close'): 

1194 self.count += 1 

1195 self.check_count() 

1196 

1197 # Handle parts 

1198 if key == "at_rule": 

1199 raise NotImplementedError(f"At-rules found at position {m.start(0)}") 

1200 elif key == "amp": 

1201 sel.flags |= ct.SEL_SCOPE 

1202 has_selector = True 

1203 elif key == 'pseudo_class_custom': 

1204 has_selector = self.parse_pseudo_class_custom(sel, m, has_selector) 

1205 elif key == 'pseudo_class': 

1206 has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html) 

1207 elif key == 'pseudo_element': 

1208 raise NotImplementedError(f"Pseudo-element found at position {m.start(0)}") 

1209 elif key == 'pseudo_contains': 

1210 has_selector = self.parse_pseudo_contains(sel, m, has_selector) 

1211 elif key in ('pseudo_nth_type', 'pseudo_nth_child'): 

1212 has_selector = self.parse_pseudo_nth(sel, m, has_selector, iselector) 

1213 elif key == 'pseudo_lang': 

1214 has_selector = self.parse_pseudo_lang(sel, m, has_selector) 

1215 elif key == 'pseudo_dir': 

1216 has_selector = self.parse_pseudo_dir(sel, m, has_selector) 

1217 # Currently only supports HTML 

1218 is_html = True 

1219 elif key == 'pseudo_close': 

1220 if not has_selector: 

1221 if not is_forgive: 

1222 raise SelectorSyntaxError( 

1223 f"Expected a selector at position {m.start(0)}", 

1224 self.pattern, 

1225 m.start(0) 

1226 ) 

1227 sel.no_match = True 

1228 if is_open: 

1229 closed = True 

1230 break 

1231 else: 

1232 raise SelectorSyntaxError( 

1233 f"Unmatched pseudo-class close at position {m.start(0)}", 

1234 self.pattern, 

1235 m.start(0) 

1236 ) 

1237 elif key == 'combine': 

1238 if is_relative: 

1239 has_selector, sel, rel_type = self.parse_has_combinator( 

1240 sel, m, has_selector, selectors, rel_type, index 

1241 ) 

1242 else: 

1243 has_selector, sel = self.parse_combinator( 

1244 sel, m, has_selector, selectors, relations, is_pseudo, is_forgive, index 

1245 ) 

1246 elif key == 'attribute': 

1247 has_selector = self.parse_attribute_selector(sel, m, has_selector) 

1248 elif key == 'tag': 

1249 if has_selector: 

1250 raise SelectorSyntaxError( 

1251 f"Tag name found at position {m.start(0)} instead of at the start", 

1252 self.pattern, 

1253 m.start(0) 

1254 ) 

1255 has_selector = self.parse_tag_pattern(sel, m, has_selector) 

1256 elif key in ('class', 'id'): 

1257 has_selector = self.parse_class_id(sel, m, has_selector) 

1258 

1259 index = m.end(0) 

1260 except StopIteration: 

1261 pass 

1262 

1263 # Handle selectors that are not closed 

1264 if is_open and not closed: 

1265 raise SelectorSyntaxError( 

1266 f"Unclosed pseudo-class at position {index}", 

1267 self.pattern, 

1268 index 

1269 ) 

1270 

1271 # Cleanup completed selector piece 

1272 if has_selector: 

1273 if not sel.tag and not is_pseudo: 

1274 # Implied `*` 

1275 sel.tag = ct.SelectorTag('*', None) 

1276 if is_relative: 

1277 sel.rel_type = rel_type 

1278 selectors[-1].relations.append(sel) 

1279 else: 

1280 sel.relations.extend(relations) 

1281 del relations[:] 

1282 selectors.append(sel) 

1283 

1284 # Forgive empty slots in pseudo-classes that have lists (and are forgiving) 

1285 elif is_forgive and (not selectors or not relations): 

1286 # Handle normal pseudo-classes with empty slots like `:is()` etc. 

1287 sel.no_match = True 

1288 del relations[:] 

1289 selectors.append(sel) 

1290 has_selector = True 

1291 

1292 if not has_selector: 

1293 # We will always need to finish a selector when `:has()` is used as it leads with combining. 

1294 # May apply to others as well. 

1295 raise SelectorSyntaxError( 

1296 f'Expected a selector at position {index}', 

1297 self.pattern, 

1298 index 

1299 ) 

1300 

1301 # Some patterns require additional logic, such as default. We try to make these the 

1302 # last pattern, and append the appropriate flag to that selector which communicates 

1303 # to the matcher what additional logic is required. 

1304 if is_default: 

1305 selectors[-1].flags = ct.SEL_DEFAULT 

1306 if is_indeterminate: 

1307 selectors[-1].flags = ct.SEL_INDETERMINATE 

1308 if is_in_range: 

1309 selectors[-1].flags = ct.SEL_IN_RANGE 

1310 if is_out_of_range: 

1311 selectors[-1].flags = ct.SEL_OUT_OF_RANGE 

1312 if is_placeholder_shown: 

1313 selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN 

1314 

1315 # Return selector list 

1316 return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html, self.count - count) 

1317 

1318 def selector_iter(self, pattern: str) -> Iterator[tuple[str, Match[str]]]: 

1319 """Iterate selector tokens.""" 

1320 

1321 # Ignore whitespace and comments at start and end of pattern 

1322 m = RE_WS_BEGIN.search(pattern) 

1323 index = m.end(0) if m else 0 

1324 m = RE_WS_END.search(pattern) 

1325 end = (m.start(0) - 1) if m else (len(pattern) - 1) 

1326 

1327 if self.debug: # pragma: no cover 

1328 print(f'## PARSING: {pattern!r}') 

1329 while index <= end: 

1330 m = None 

1331 for v in self.CSS_TOKENS: 

1332 m = v.match(pattern, index, self.flags) 

1333 if m: 

1334 name = v.get_name() 

1335 if self.debug: # pragma: no cover 

1336 print(f"TOKEN: '{name}' --> {m.group(0)!r} at position {m.start(0)}") 

1337 index = m.end(0) 

1338 yield name, m 

1339 break 

1340 if m is None: 

1341 c = pattern[index] 

1342 # If the character represents the start of one of the known selector types, 

1343 # throw an exception mentioning that the known selector type is in error; 

1344 # otherwise, report the invalid character. 

1345 if c == '[': 

1346 msg = f"Malformed attribute selector at position {index}" 

1347 elif c == '.': 

1348 msg = f"Malformed class selector at position {index}" 

1349 elif c == '#': 

1350 msg = f"Malformed id selector at position {index}" 

1351 elif c == ':': 

1352 msg = f"Malformed pseudo-class selector at position {index}" 

1353 else: 

1354 msg = f"Invalid character {c!r} position {index}" 

1355 raise SelectorSyntaxError(msg, self.pattern, index) 

1356 if self.debug: # pragma: no cover 

1357 print('## END PARSING') 

1358 

1359 def process_selectors(self, index: int = 0, flags: int = 0) -> ct.SelectorList: 

1360 """Process selectors.""" 

1361 

1362 return self.parse_selectors(self.selector_iter(self.pattern), index, flags)