Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/soupsieve/css

1"""CSS matcher."""

2from __future__ import annotations

3from datetime import datetime

4from . import util

5import re

6from . import css_types as ct

7import unicodedata

8import bs4

9from typing import Iterator, Iterable, Any, Callable, Sequence, Any, cast # noqa: F401, F811

11# Empty tag pattern (whitespace okay)

12RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')

14RE_NOT_WS = re.compile('[^ \t\r\n\f]+')

16# Relationships

17REL_PARENT = ' '

18REL_CLOSE_PARENT = '>'

19REL_SIBLING = '~'

20REL_CLOSE_SIBLING = '+'

22# Relationships for :has() (forward looking)

23REL_HAS_PARENT = ': '

24REL_HAS_CLOSE_PARENT = ':>'

25REL_HAS_SIBLING = ':~'

26REL_HAS_CLOSE_SIBLING = ':+'

28NS_XHTML = 'http://www.w3.org/1999/xhtml'

29NS_XML = 'http://www.w3.org/XML/1998/namespace'

31DIR_FLAGS = ct.SEL_DIR_LTR | ct.SEL_DIR_RTL

32RANGES = ct.SEL_IN_RANGE | ct.SEL_OUT_OF_RANGE

34DIR_MAP = {

35 'ltr': ct.SEL_DIR_LTR,

36 'rtl': ct.SEL_DIR_RTL,

37 'auto': 0

38}

40RE_NUM = re.compile(r"^(?P<value>-?(?:[0-9]{1,}(\.[0-9]+)?|\.[0-9]+))$")

41RE_TIME = re.compile(r'^(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$')

42RE_MONTH = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})$')

43RE_WEEK = re.compile(r'^(?P<year>[0-9]{4,})-W(?P<week>[0-9]{2})$')

44RE_DATE = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})$')

45RE_DATETIME = re.compile(

46 r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})T(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$'

47)

48RE_WILD_STRIP = re.compile(r'(?:(?:-\*-)(?:\*(?:-|$))*|-\*$)')

50MONTHS_30 = (4, 6, 9, 11) # April, June, September, and November

51FEB = 2

52SHORT_MONTH = 30

53LONG_MONTH = 31

54FEB_MONTH = 28

55FEB_LEAP_MONTH = 29

56DAYS_IN_WEEK = 7

59class _FakeParent:

60 """

61 Fake parent class.

63 When we have a fragment with no `BeautifulSoup` document object,

64 we can't evaluate `nth` selectors properly. Create a temporary

65 fake parent so we can traverse the root element as a child.

66 """

68 def __init__(self, element: bs4.Tag) -> None:

69 """Initialize."""

71 self.contents = [element]

73 def __len__(self) -> int:

74 """Length."""

76 return len(self.contents)

79class _DocumentNav:

80 """Navigate a Beautiful Soup document."""

82 @classmethod

83 def assert_valid_input(cls, tag: Any) -> None:

84 """Check if valid input tag or document."""

86 # Fail on unexpected types.

87 if not cls.is_tag(tag):

88 raise TypeError(f"Expected a BeautifulSoup 'Tag', but instead received type {type(tag)}")

90 @staticmethod

91 def is_doc(obj: bs4.element.PageElement | None) -> bool:

92 """Is `BeautifulSoup` object."""

93 return isinstance(obj, bs4.BeautifulSoup)

95 @staticmethod

96 def is_tag(obj: bs4.element.PageElement | None) -> bool:

97 """Is tag."""

98 return isinstance(obj, bs4.Tag)

100 @staticmethod

101 def is_declaration(obj: bs4.element.PageElement | None) -> bool: # pragma: no cover

102 """Is declaration."""

103 return isinstance(obj, bs4.Declaration)

104

105 @staticmethod

106 def is_cdata(obj: bs4.element.PageElement | None) -> bool:

107 """Is CDATA."""

108 return isinstance(obj, bs4.CData)

109

110 @staticmethod

111 def is_processing_instruction(obj: bs4.element.PageElement | None) -> bool: # pragma: no cover

112 """Is processing instruction."""

113 return isinstance(obj, bs4.ProcessingInstruction)

114

115 @staticmethod

116 def is_navigable_string(obj: bs4.element.PageElement | None) -> bool:

117 """Is navigable string."""

118 return isinstance(obj, bs4.element.NavigableString)

119

120 @staticmethod

121 def is_special_string(obj: bs4.element.PageElement | None) -> bool:

122 """Is special string."""

123 return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))

124

125 @classmethod

126 def is_content_string(cls, obj: bs4.element.PageElement | None) -> bool:

127 """Check if node is content string."""

128

129 return cls.is_navigable_string(obj) and not cls.is_special_string(obj)

130

131 @staticmethod

132 def create_fake_parent(el: bs4.Tag) -> _FakeParent:

133 """Create fake parent for a given element."""

134

135 return _FakeParent(el)

136

137 @staticmethod

138 def is_xml_tree(el: bs4.Tag | None) -> bool:

139 """Check if element (or document) is from a XML tree."""

140

141 return el is not None and bool(el._is_xml)

142

143 def is_iframe(self, el: bs4.Tag | None) -> bool:

144 """Check if element is an `iframe`."""

145

146 if el is None: # pragma: no cover

147 return False

148

149 return bool(

150 ((el.name if self.is_xml_tree(el) else util.lower(el.name)) == 'iframe') and

151 self.is_html_tag(el) # type: ignore[attr-defined]

152 )

153

154 def is_root(self, el: bs4.Tag) -> bool:

155 """

156 Return whether element is a root element.

157

158 We check that the element is the root of the tree (which we have already pre-calculated),

159 and we check if it is the root element under an `iframe`.

160 """

161

162 root = self.root and self.root is el # type: ignore[attr-defined]

163 if not root:

164 parent = self.get_parent(el)

165 root = parent is not None and self.is_html and self.is_iframe(parent) # type: ignore[attr-defined]

166 return root

167

168 def get_contents(self, el: bs4.Tag | None, no_iframe: bool = False) -> Iterator[bs4.element.PageElement]:

169 """Get contents or contents in reverse."""

170

171 if el is not None:

172 if not no_iframe or not self.is_iframe(el):

173 yield from el.contents

174

175 def get_tag_children(

176 self,

177 el: bs4.Tag | None,

178 start: int | None = None,

179 reverse: bool = False,

180 no_iframe: bool = False

181 ) -> Iterator[bs4.Tag]:

182 """Get tag children."""

183

184 return self.get_children(el, start, reverse, True, no_iframe) # type: ignore[return-value]

185

186 def get_children(

187 self,

188 el: bs4.Tag | None,

189 start: int | None = None,

190 reverse: bool = False,

191 tags: bool = False,

192 no_iframe: bool = False

193 ) -> Iterator[bs4.element.PageElement]:

194 """Get children."""

195

196 if el is not None and (not no_iframe or not self.is_iframe(el)):

197 last = len(el.contents) - 1

198 if start is None:

199 index = last if reverse else 0

200 else:

201 index = start

202 end = -1 if reverse else last + 1

203 incr = -1 if reverse else 1

204

205 if 0 <= index <= last:

206 while index != end:

207 node = el.contents[index]

208 index += incr

209 if not tags or self.is_tag(node):

210 yield node

211

212 def get_tag_descendants(

213 self,

214 el: bs4.Tag | None,

215 no_iframe: bool = False

216 ) -> Iterator[bs4.Tag]:

217 """Specifically get tag descendants."""

218

219 yield from self.get_descendants(el, tags=True, no_iframe=no_iframe) # type: ignore[misc]

220

221 def get_descendants(

222 self,

223 el: bs4.Tag | None,

224 tags: bool = False,

225 no_iframe: bool = False

226 ) -> Iterator[bs4.element.PageElement]:

227 """Get descendants."""

228

229 if el is not None and (not no_iframe or not self.is_iframe(el)):

230 next_good = None

231 for child in el.descendants:

232

233 if next_good is not None:

234 if child is not next_good:

235 continue

236 next_good = None

237

238 if isinstance(child, bs4.Tag):

239 if no_iframe and self.is_iframe(child):

240 if child.next_sibling is not None:

241 next_good = child.next_sibling

242 else:

243 last_child = child # type: bs4.element.PageElement

244 while isinstance(last_child, bs4.Tag) and last_child.contents:

245 last_child = last_child.contents[-1]

246 next_good = last_child.next_element

247 yield child

248 if next_good is None:

249 break

250 # Coverage isn't seeing this even though it's executed

251 continue # pragma: no cover

252 yield child

253

254 elif not tags:

255 yield child

256

257 def get_parent(self, el: bs4.Tag | None, no_iframe: bool = False) -> bs4.Tag | None:

258 """Get parent."""

259

260 parent = el.parent if el is not None else None

261 if no_iframe and parent is not None and self.is_iframe(parent): # pragma: no cover

262 parent = None

263 return parent

264

265 @staticmethod

266 def get_tag_name(el: bs4.Tag | None) -> str | None:

267 """Get tag."""

268

269 return el.name if el is not None else None

270

271 @staticmethod

272 def get_prefix_name(el: bs4.Tag) -> str | None:

273 """Get prefix."""

274

275 return el.prefix

276

277 @staticmethod

278 def get_uri(el: bs4.Tag | None) -> str | None:

279 """Get namespace `URI`."""

280

281 return el.namespace if el is not None else None

282

283 @classmethod

284 def get_next_tag(cls, el: bs4.Tag) -> bs4.Tag | None:

285 """Get next sibling tag."""

286

287 return cls.get_next(el, tags=True) # type: ignore[return-value]

288

289 @classmethod

290 def get_next(cls, el: bs4.Tag, tags: bool = False) -> bs4.element.PageElement | None:

291 """Get next sibling tag."""

292

293 sibling = el.next_sibling

294 while tags and not isinstance(sibling, bs4.Tag) and sibling is not None:

295 sibling = sibling.next_sibling

296

297 if tags and not isinstance(sibling, bs4.Tag):

298 sibling = None

299

300 return sibling

301

302 @classmethod

303 def get_previous_tag(cls, el: bs4.Tag, tags: bool = True) -> bs4.Tag | None:

304 """Get previous sibling tag."""

305

306 return cls.get_previous(el, True) # type: ignore[return-value]

307

308 @classmethod

309 def get_previous(cls, el: bs4.Tag, tags: bool = False) -> bs4.element.PageElement | None:

310 """Get previous sibling tag."""

311

312 sibling = el.previous_sibling

313 while tags and not isinstance(sibling, bs4.Tag) and sibling is not None:

314 sibling = sibling.previous_sibling

315

316 if tags and not isinstance(sibling, bs4.Tag):

317 sibling = None

318

319 return sibling

320

321 @staticmethod

322 def has_html_ns(el: bs4.Tag | None) -> bool:

323 """

324 Check if element has an HTML namespace.

325

326 This is a bit different than whether a element is treated as having an HTML namespace,

327 like we do in the case of `is_html_tag`.

328 """

329

330 ns = getattr(el, 'namespace') if el is not None else None # noqa: B009

331 return bool(ns and ns == NS_XHTML)

332

333 @staticmethod

334 def split_namespace(el: bs4.Tag | None, attr_name: str) -> tuple[str | None, str | None]:

335 """Return namespace and attribute name without the prefix."""

336

337 if el is None: # pragma: no cover

338 return None, None

339

340 return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)

341

342 @classmethod

343 def normalize_value(cls, value: Any) -> str | Sequence[str]:

344 """Normalize the value to be a string or list of strings."""

345

346 # Treat `None` as empty string.

347 if value is None:

348 return ''

349

350 # Pass through strings

351 if (isinstance(value, str)):

352 return value

353

354 # If it's a byte string, convert it to Unicode, treating it as UTF-8.

355 if isinstance(value, bytes):

356 return value.decode("utf8")

357

358 # BeautifulSoup supports sequences of attribute values, so make sure the children are strings.

359 if isinstance(value, Sequence):

360 new_value = []

361 for v in value:

362 if not isinstance(v, (str, bytes)) and isinstance(v, Sequence):

363 # This is most certainly a user error and will crash and burn later.

364 # To keep things working, we'll do what we do with all objects,

365 # And convert them to strings.

366 new_value.append(str(v))

367 else:

368 # Convert the child to a string

369 new_value.append(cast(str, cls.normalize_value(v)))

370 return new_value

371

372 # Try and make anything else a string

373 return str(value)

374

375 @classmethod

376 def get_attribute_by_name(

377 cls,

378 el: bs4.Tag,

379 name: str,

380 default: str | Sequence[str] | None = None

381 ) -> str | Sequence[str] | None:

382 """Get attribute by name."""

383

384 value = default

385 if el._is_xml:

386 try:

387 value = cls.normalize_value(el.attrs[name])

388 except KeyError:

389 pass

390 else:

391 for k, v in el.attrs.items():

392 if util.lower(k) == name:

393 value = cls.normalize_value(v)

394 break

395 return value

396

397 @classmethod

398 def iter_attributes(cls, el: bs4.Tag | None) -> Iterator[tuple[str, str | Sequence[str] | None]]:

399 """Iterate attributes."""

400

401 if el is not None:

402 for k, v in el.attrs.items():

403 yield k, cls.normalize_value(v)

404

405 @classmethod

406 def get_classes(cls, el: bs4.Tag) -> Sequence[str]:

407 """Get classes."""

408

409 classes = cls.get_attribute_by_name(el, 'class', [])

410 if isinstance(classes, str):

411 classes = RE_NOT_WS.findall(classes)

412 return cast(Sequence[str], classes)

413

414 def get_text(self, el: bs4.Tag, no_iframe: bool = False) -> str:

415 """Get text."""

416

417 return ''.join(

418 [

419 node for node in self.get_descendants(el, no_iframe=no_iframe) # type: ignore[misc]

420 if self.is_content_string(node)

421 ]

422 )

423

424 def get_own_text(self, el: bs4.Tag, no_iframe: bool = False) -> list[str]:

425 """Get Own Text."""

426

427 return [

428 node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node) # type: ignore[misc]

429 ]

430

431

432class Inputs:

433 """Class for parsing and validating input items."""

434

435 @staticmethod

436 def validate_day(year: int, month: int, day: int) -> bool:

437 """Validate day."""

438

439 max_days = LONG_MONTH

440 if month == FEB:

441 max_days = FEB_LEAP_MONTH if ((year % 4 == 0) and (year % 100 != 0)) or (year % 400 == 0) else FEB_MONTH

442 elif month in MONTHS_30:

443 max_days = SHORT_MONTH

444 return 1 <= day <= max_days

445

446 @staticmethod

447 def validate_week(year: int, week: int) -> bool:

448 """Validate week."""

449

450 # Validate an ISO week number for `year`.

451 #

452 # Per ISO 8601 rules, the last ISO week of a year is the week

453 # containing Dec 28. Using Dec 28 guarantees we obtain the

454 # correct ISO week-number for the final week of `year`, even in

455 # years where Dec 31 falls in ISO week 01 of the following year.

456 #

457 # Example: if Dec 31 is a Thursday the year's last ISO week will

458 # be week 53; if Dec 31 is a Monday and that week is counted as

459 # week 1 of the next year, Dec 28 still belongs to the final

460 # week of the current ISO year and yields the correct max week.

461 max_week = datetime(year, 12, 28).isocalendar()[1]

462 return 1 <= week <= max_week

463

464 @staticmethod

465 def validate_month(month: int) -> bool:

466 """Validate month."""

467

468 return 1 <= month <= 12

469

470 @staticmethod

471 def validate_year(year: int) -> bool:

472 """Validate year."""

473

474 return 1 <= year

475

476 @staticmethod

477 def validate_hour(hour: int) -> bool:

478 """Validate hour."""

479

480 return 0 <= hour <= 23

481

482 @staticmethod

483 def validate_minutes(minutes: int) -> bool:

484 """Validate minutes."""

485

486 return 0 <= minutes <= 59

487

488 @classmethod

489 def parse_value(cls, itype: str, value: str | None) -> tuple[float, ...] | None:

490 """Parse the input value."""

491

492 parsed = None # type: tuple[float, ...] | None

493 if value is None:

494 return value

495 if itype == "date":

496 m = RE_DATE.match(value)

497 if m:

498 year = int(m.group('year'), 10)

499 month = int(m.group('month'), 10)

500 day = int(m.group('day'), 10)

501 if cls.validate_year(year) and cls.validate_month(month) and cls.validate_day(year, month, day):

502 parsed = (year, month, day)

503 elif itype == "month":

504 m = RE_MONTH.match(value)

505 if m:

506 year = int(m.group('year'), 10)

507 month = int(m.group('month'), 10)

508 if cls.validate_year(year) and cls.validate_month(month):

509 parsed = (year, month)

510 elif itype == "week":

511 m = RE_WEEK.match(value)

512 if m:

513 year = int(m.group('year'), 10)

514 week = int(m.group('week'), 10)

515 if cls.validate_year(year) and cls.validate_week(year, week):

516 parsed = (year, week)

517 elif itype == "time":

518 m = RE_TIME.match(value)

519 if m:

520 hour = int(m.group('hour'), 10)

521 minutes = int(m.group('minutes'), 10)

522 if cls.validate_hour(hour) and cls.validate_minutes(minutes):

523 parsed = (hour, minutes)

524 elif itype == "datetime-local":

525 m = RE_DATETIME.match(value)

526 if m:

527 year = int(m.group('year'), 10)

528 month = int(m.group('month'), 10)

529 day = int(m.group('day'), 10)

530 hour = int(m.group('hour'), 10)

531 minutes = int(m.group('minutes'), 10)

532 if (

533 cls.validate_year(year) and cls.validate_month(month) and cls.validate_day(year, month, day) and

534 cls.validate_hour(hour) and cls.validate_minutes(minutes)

535 ):

536 parsed = (year, month, day, hour, minutes)

537 elif itype in ("number", "range"):

538 m = RE_NUM.match(value)

539 if m:

540 parsed = (float(m.group('value')),)

541 return parsed

542

543

544class CSSMatch(_DocumentNav):

545 """Perform CSS matching."""

546

547 def __init__(

548 self,

549 selectors: ct.SelectorList,

550 scope: bs4.Tag | None,

551 namespaces: ct.Namespaces | None,

552 flags: int

553 ) -> None:

554 """Initialize."""

555

556 self.assert_valid_input(scope)

557 self.tag = scope

558 self.cached_meta_lang = [] # type: list[tuple[str, str]]

559 self.cached_default_forms = [] # type: list[tuple[bs4.Tag, bs4.Tag]]

560 self.cached_indeterminate_forms = [] # type: list[tuple[bs4.Tag, str, bool]]

561 self.selectors = selectors

562 self.namespaces = {} if namespaces is None else namespaces # type: ct.Namespaces | dict[str, str]

563 self.flags = flags

564 self.iframe_restrict = False

565

566 # Find the root element for the whole tree

567 doc = scope

568 parent = self.get_parent(doc)

569 while parent:

570 doc = parent

571 parent = self.get_parent(doc)

572 root = None # type: bs4.Tag | None

573 if not self.is_doc(doc):

574 root = doc

575 else:

576 for child in self.get_tag_children(doc):

577 root = child

578 break

579

580 self.root = root

581 self.scope = scope if scope is not doc else root

582 self.has_html_namespace = self.has_html_ns(root)

583

584 # A document can be both XML and HTML (XHTML)

585 self.is_xml = self.is_xml_tree(doc)

586 self.is_html = not self.is_xml or self.has_html_namespace

587

588 def supports_namespaces(self) -> bool:

589 """Check if namespaces are supported in the HTML type."""

590

591 return self.is_xml or self.has_html_namespace

592

593 def get_tag_ns(self, el: bs4.Tag | None) -> str:

594 """Get tag namespace."""

595

596 namespace = ''

597 if el is None: # pragma: no cover

598 return namespace

599

600 if self.supports_namespaces():

601 ns = self.get_uri(el)

602 if ns:

603 namespace = ns

604 else:

605 namespace = NS_XHTML

606 return namespace

607

608 def is_html_tag(self, el: bs4.Tag | None) -> bool:

609 """Check if tag is in HTML namespace."""

610

611 return self.get_tag_ns(el) == NS_XHTML

612

613 def get_tag(self, el: bs4.Tag | None) -> str | None:

614 """Get tag."""

615

616 name = self.get_tag_name(el)

617 return util.lower(name) if name is not None and not self.is_xml else name

618

619 def get_prefix(self, el: bs4.Tag) -> str | None:

620 """Get prefix."""

621

622 prefix = self.get_prefix_name(el)

623 return util.lower(prefix) if prefix is not None and not self.is_xml else prefix

624

625 def find_bidi(self, el: bs4.Tag) -> int | None:

626 """Get directionality from element text."""

627

628 for node in self.get_children(el):

629

630 # Analyze child text nodes

631 if self.is_tag(node):

632

633 # Avoid analyzing certain elements specified in the specification.

634 direction = DIR_MAP.get(util.lower(self.get_attribute_by_name(node, 'dir', '')), None) # type: ignore[arg-type]

635 name = self.get_tag(node) # type: ignore[arg-type]

636 if (

637 (name and name in ('bdi', 'script', 'style', 'textarea', 'iframe')) or

638 not self.is_html_tag(node) or # type: ignore[arg-type]

639 direction is not None

640 ):

641 continue # pragma: no cover

642

643 # Check directionality of this node's text

644 value = self.find_bidi(node) # type: ignore[arg-type]

645 if value is not None:

646 return value

647

648 # Direction could not be determined

649 continue # pragma: no cover

650

651 # Skip `doctype` comments, etc.

652 if self.is_special_string(node):

653 continue

654

655 # Analyze text nodes for directionality.

656 for c in node: # type: ignore[attr-defined]

657 bidi = unicodedata.bidirectional(c)

658 if bidi in ('AL', 'R', 'L'):

659 return ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL

660 return None

661

662 def extended_language_filter(self, lang_range: str, lang_tag: str) -> bool:

663 """Filter the language tags."""

664

665 match = True

666 lang_range = RE_WILD_STRIP.sub('-', lang_range).lower()

667 ranges = lang_range.split('-')

668 subtags = lang_tag.lower().split('-')

669 length = len(ranges)

670 slength = len(subtags)

671 rindex = 0

672 sindex = 0

673 r = ranges[rindex]

674 s = subtags[sindex]

675

676 # Empty specified language should match unspecified language attributes

677 if length == 1 and slength == 1 and not r and r == s:

678 return True

679

680 # Primary tag needs to match

681 if (r != '*' and r != s) or (r == '*' and slength == 1 and not s):

682 match = False

683

684 rindex += 1

685 sindex += 1

686

687 # Match until we run out of ranges

688 while match and rindex < length:

689 r = ranges[rindex]

690 try:

691 s = subtags[sindex]

692 except IndexError:

693 # Ran out of subtags,

694 # but we still have ranges

695 match = False

696 continue

697

698 # Empty range

699 if not r:

700 match = False

701 continue

702

703 # Matched range

704 elif s == r:

705 rindex += 1

706

707 # Implicit wildcard cannot match

708 # singletons

709 elif len(s) == 1:

710 match = False

711 continue

712

713 # Implicitly matched, so grab next subtag

714 sindex += 1

715

716 return match

717

718 def match_attribute_name(

719 self,

720 el: bs4.Tag,

721 attr: str,

722 prefix: str | None

723 ) -> str | Sequence[str] | None:

724 """Match attribute name and return value if it exists."""

725

726 value = None

727 if self.supports_namespaces():

728 value = None

729 # If we have not defined namespaces, we can't very well find them, so don't bother trying.

730 if prefix:

731 ns = self.namespaces.get(prefix)

732 if ns is None and prefix != '*':

733 return None

734 else:

735 ns = None

736

737 for k, v in self.iter_attributes(el):

738

739 # Get attribute parts

740 namespace, name = self.split_namespace(el, k)

741

742 # Can't match a prefix attribute as we haven't specified one to match

743 # Try to match it normally as a whole `p:a` as selector may be trying `p\:a`.

744 if ns is None:

745 if (self.is_xml and attr == k) or (not self.is_xml and util.lower(attr) == util.lower(k)):

746 value = v

747 break

748 # Coverage is not finding this even though it is executed.

749 # Adding a print statement before this (and erasing coverage) causes coverage to find the line.

750 # Ignore the false positive message.

751 continue # pragma: no cover

752

753 # We can't match our desired prefix attribute as the attribute doesn't have a prefix

754 if namespace is None or (ns != namespace and prefix != '*'):

755 continue

756

757 # The attribute doesn't match.

758 if (util.lower(attr) != util.lower(name)) if not self.is_xml else (attr != name):

759 continue

760

761 value = v

762 break

763 else:

764 for k, v in self.iter_attributes(el):

765 if util.lower(attr) != util.lower(k):

766 continue

767 value = v

768 break

769 return value

770

771 def match_namespace(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:

772 """Match the namespace of the element."""

773

774 match = True

775 namespace = self.get_tag_ns(el)

776 default_namespace = self.namespaces.get('')

777 tag_ns = '' if tag.prefix is None else self.namespaces.get(tag.prefix)

778 # We must match the default namespace if one is not provided

779 if tag.prefix is None and (default_namespace is not None and namespace != default_namespace):

780 match = False

781 # If we specified `|tag`, we must not have a namespace.

782 elif (tag.prefix is not None and tag.prefix == '' and namespace):

783 match = False

784 # Verify prefix matches

785 elif (

786 tag.prefix and

787 tag.prefix != '*' and (tag_ns is None or namespace != tag_ns)

788 ):

789 match = False

790 return match

791

792 def match_attributes(self, el: bs4.Tag, attributes: tuple[ct.SelectorAttribute, ...]) -> bool:

793 """Match attributes."""

794

795 match = True

796 if attributes:

797 for a in attributes:

798 temp = self.match_attribute_name(el, a.attribute, a.prefix)

799 pattern = a.xml_type_pattern if self.is_xml and a.xml_type_pattern else a.pattern

800 if temp is None:

801 match = False

802 break

803 value = temp if isinstance(temp, str) else ' '.join(temp)

804 if pattern is None:

805 continue

806 elif pattern.match(value) is None:

807 match = False

808 break

809 return match

810

811 def match_tagname(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:

812 """Match tag name."""

813

814 name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name)

815 return not (

816 name is not None and

817 name not in (self.get_tag(el), '*')

818 )

819

820 def match_tag(self, el: bs4.Tag, tag: ct.SelectorTag | None) -> bool:

821 """Match the tag."""

822

823 match = True

824 if tag is not None:

825 # Verify namespace

826 if not self.match_namespace(el, tag):

827 match = False

828 if not self.match_tagname(el, tag):

829 match = False

830 return match

831

832 def match_past_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:

833 """Match past relationship."""

834

835 found = False

836 # I don't think this can ever happen, but it makes `mypy` happy

837 if isinstance(relation[0], ct.SelectorNull): # pragma: no cover

838 return found

839

840 if relation[0].rel_type == REL_PARENT:

841 parent = self.get_parent(el, no_iframe=self.iframe_restrict)

842 while not found and parent:

843 found = self.match_selectors(parent, relation)

844 parent = self.get_parent(parent, no_iframe=self.iframe_restrict)

845 elif relation[0].rel_type == REL_CLOSE_PARENT:

846 parent = self.get_parent(el, no_iframe=self.iframe_restrict)

847 if parent:

848 found = self.match_selectors(parent, relation)

849 elif relation[0].rel_type == REL_SIBLING:

850 sibling = self.get_previous_tag(el)

851 while not found and sibling:

852 found = self.match_selectors(sibling, relation)

853 sibling = self.get_previous_tag(sibling)

854 elif relation[0].rel_type == REL_CLOSE_SIBLING:

855 sibling = self.get_previous_tag(el)

856 if sibling and self.is_tag(sibling):

857 found = self.match_selectors(sibling, relation)

858 return found

859

860 def match_future_child(self, parent: bs4.Tag, relation: ct.SelectorList, recursive: bool = False) -> bool:

861 """Match future child."""

862

863 match = False

864 if recursive:

865 children = self.get_tag_descendants # type: Callable[..., Iterator[bs4.Tag]]

866 else:

867 children = self.get_tag_children

868 for child in children(parent, no_iframe=self.iframe_restrict):

869 match = self.match_selectors(child, relation)

870 if match:

871 break

872 return match

873

874 def match_future_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:

875 """Match future relationship."""

876

877 found = False

878 # I don't think this can ever happen, but it makes `mypy` happy

879 if isinstance(relation[0], ct.SelectorNull): # pragma: no cover

880 return found

881

882 if relation[0].rel_type == REL_HAS_PARENT:

883 found = self.match_future_child(el, relation, True)

884 elif relation[0].rel_type == REL_HAS_CLOSE_PARENT:

885 found = self.match_future_child(el, relation)

886 elif relation[0].rel_type == REL_HAS_SIBLING:

887 sibling = self.get_next_tag(el)

888 while not found and sibling:

889 found = self.match_selectors(sibling, relation)

890 sibling = self.get_next_tag(sibling)

891 elif relation[0].rel_type == REL_HAS_CLOSE_SIBLING:

892 sibling = self.get_next_tag(el)

893 if sibling and self.is_tag(sibling):

894 found = self.match_selectors(sibling, relation)

895 return found

896

897 def match_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:

898 """Match relationship to other elements."""

899

900 found = False

901

902 if isinstance(relation[0], ct.SelectorNull) or relation[0].rel_type is None:

903 return found

904

905 if relation[0].rel_type.startswith(':'):

906 found = self.match_future_relations(el, relation)

907 else:

908 found = self.match_past_relations(el, relation)

909

910 return found

911

912 def match_id(self, el: bs4.Tag, ids: tuple[str, ...]) -> bool:

913 """Match element's ID."""

914

915 found = True

916 for i in ids:

917 if i != self.get_attribute_by_name(el, 'id', ''):

918 found = False

919 break

920 return found

921

922 def match_classes(self, el: bs4.Tag, classes: tuple[str, ...]) -> bool:

923 """Match element's classes."""

924

925 current_classes = self.get_classes(el)

926 found = True

927 for c in classes:

928 if c not in current_classes:

929 found = False

930 break

931 return found

932

933 def match_root(self, el: bs4.Tag) -> bool:

934 """Match element as root."""

935

936 is_root = self.is_root(el)

937 if is_root:

938 sibling = self.get_previous(el) # type: Any

939 while is_root and sibling is not None:

940 if (

941 self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or

942 self.is_cdata(sibling)

943 ):

944 is_root = False

945 else:

946 sibling = self.get_previous(sibling)

947 if is_root:

948 sibling = self.get_next(el)

949 while is_root and sibling is not None:

950 if (

951 self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or

952 self.is_cdata(sibling)

953 ):

954 is_root = False

955 else:

956 sibling = self.get_next(sibling)

957 return is_root

958

959 def match_scope(self, el: bs4.Tag) -> bool:

960 """Match element as scope."""

961

962 return self.scope is el

963

964 def match_nth_tag_type(self, el: bs4.Tag, child: bs4.Tag) -> bool:

965 """Match tag type for `nth` matches."""

966

967 return (

968 (self.get_tag(child) == self.get_tag(el)) and

969 (self.get_tag_ns(child) == self.get_tag_ns(el))

970 )

971

972 def match_nth(self, el: bs4.Tag, nth: tuple[ct.SelectorNth, ...]) -> bool:

973 """Match `nth` elements."""

974

975 matched = True

976

977 for n in nth:

978 matched = False

979 if n.selectors and not self.match_selectors(el, n.selectors):

980 break

981 parent = self.get_parent(el) # type: bs4.Tag | None

982 if parent is None:

983 parent = cast('bs4.Tag', self.create_fake_parent(el))

984 last = n.last

985 last_index = len(parent) - 1

986 index = last_index if last else 0

987 relative_index = 0

988 a = n.a

989 b = n.b

990 var = n.n

991 count = 0

992 count_incr = 1

993 factor = -1 if last else 1

994 idx = last_idx = a * count + b if var else a

995

996 # We can only adjust bounds within a variable index

997 if var:

998 # Abort if our nth index is out of bounds and only getting further out of bounds as we increment.

999 # Otherwise, increment to try to get in bounds.

1000 adjust = None

1001 while idx < 1 or idx > last_index:

1002 if idx < 0:

1003 diff_low = 0 - idx

1004 if adjust is not None and adjust == 1:

1005 break

1006 adjust = -1

1007 count += count_incr

1008 idx = last_idx = a * count + b if var else a

1009 diff = 0 - idx

1010 if diff >= diff_low:

1011 break

1012 else:

1013 diff_high = idx - last_index

1014 if adjust is not None and adjust == -1:

1015 break

1016 adjust = 1

1017 count += count_incr

1018 idx = last_idx = a * count + b if var else a

1019 diff = idx - last_index

1020 if diff >= diff_high:

1021 break

1022 diff_high = diff

1023

1024 # If a < 0, our count is working backwards, so floor the index by increasing the count.

1025 # Find the count that yields the lowest, in bound value and use that.

1026 # Lastly reverse count increment so that we'll increase our index.

1027 lowest = count

1028 if a < 0:

1029 while idx >= 1:

1030 lowest = count

1031 count += count_incr

1032 idx = last_idx = a * count + b if var else a

1033 count_incr = -1

1034 count = lowest

1035 idx = last_idx = a * count + b if var else a

1036

1037 # Evaluate elements while our calculated nth index is still in range

1038 while 1 <= idx <= last_index + 1:

1039 child = None # type: bs4.element.PageElement | None

1040 # Evaluate while our child index is still in range.

1041 for child in self.get_children(parent, start=index, reverse=factor < 0):

1042 index += factor

1043 if not isinstance(child, bs4.Tag):

1044 continue

1045 # Handle `of S` in `nth-child`

1046 if n.selectors and not self.match_selectors(child, n.selectors):

1047 continue

1048 # Handle `of-type`

1049 if n.of_type and not self.match_nth_tag_type(el, child):

1050 continue

1051 relative_index += 1

1052 if relative_index == idx:

1053 if child is el:

1054 matched = True

1055 else:

1056 break

1057 if child is el:

1058 break

1059 if child is el:

1060 break

1061 last_idx = idx

1062 count += count_incr

1063 if count < 0:

1064 # Count is counting down and has now ventured into invalid territory.

1065 break

1066 idx = a * count + b if var else a

1067 if last_idx == idx:

1068 break

1069 if not matched:

1070 break

1071 return matched

1072

1073 def match_empty(self, el: bs4.Tag) -> bool:

1074 """Check if element is empty (if requested)."""

1075

1076 is_empty = True

1077 for child in self.get_children(el):

1078 if self.is_tag(child):

1079 is_empty = False

1080 break

1081 elif self.is_content_string(child) and RE_NOT_EMPTY.search(child): # type: ignore[call-overload]

1082 is_empty = False

1083 break

1084 return is_empty

1085

1086 def match_subselectors(self, el: bs4.Tag, selectors: tuple[ct.SelectorList, ...]) -> bool:

1087 """Match selectors."""

1088

1089 match = True

1090 for sel in selectors:

1091 if not self.match_selectors(el, sel):

1092 match = False

1093 return match

1094

1095 def match_contains(self, el: bs4.Tag, contains: tuple[ct.SelectorContains, ...]) -> bool:

1096 """Match element if it contains text."""

1097

1098 match = True

1099 content = None # type: str | Sequence[str] | None

1100 for contain_list in contains:

1101 if content is None:

1102 if contain_list.own:

1103 content = self.get_own_text(el, no_iframe=self.is_html)

1104 else:

1105 content = self.get_text(el, no_iframe=self.is_html)

1106 found = False

1107 for text in contain_list.text:

1108 if contain_list.own:

1109 for c in content:

1110 if text in c:

1111 found = True

1112 break

1113 if found:

1114 break

1115 else:

1116 if text in content:

1117 found = True

1118 break

1119 if not found:

1120 match = False

1121 return match

1122

1123 def match_default(self, el: bs4.Tag) -> bool:

1124 """Match default."""

1125

1126 match = False

1127

1128 # Find this input's form

1129 form = None # type: bs4.Tag | None

1130 parent = self.get_parent(el, no_iframe=True)

1131 while parent and form is None:

1132 if self.get_tag(parent) == 'form' and self.is_html_tag(parent):

1133 form = parent

1134 else:

1135 parent = self.get_parent(parent, no_iframe=True)

1136

1137 if form is not None:

1138 # Look in form cache to see if we've already located its default button

1139 found_form = False

1140 for f, t in self.cached_default_forms:

1141 if f is form:

1142 found_form = True

1143 if t is el:

1144 match = True

1145 break

1146

1147 # We didn't have the form cached, so look for its default button

1148 if not found_form:

1149 for child in self.get_tag_descendants(form, no_iframe=True):

1150 name = self.get_tag(child)

1151 # Can't do nested forms (haven't figured out why we never hit this)

1152 if name == 'form': # pragma: no cover

1153 break

1154 if name in ('input', 'button'):

1155 v = self.get_attribute_by_name(child, 'type', '')

1156 if v and util.lower(v) == 'submit':

1157 self.cached_default_forms.append((form, child))

1158 if el is child:

1159 match = True

1160 break

1161 return match

1162

1163 def match_indeterminate(self, el: bs4.Tag) -> bool:

1164 """Match default."""

1165

1166 match = False

1167 name = cast(str, self.get_attribute_by_name(el, 'name'))

1168

1169 def get_parent_form(el: bs4.Tag) -> bs4.Tag | None:

1170 """Find this input's form."""

1171 form = None

1172 parent = self.get_parent(el, no_iframe=True)

1173 while form is None:

1174 if self.get_tag(parent) == 'form' and self.is_html_tag(parent):

1175 form = parent

1176 break

1177 last_parent = parent

1178 parent = self.get_parent(parent, no_iframe=True)

1179 if parent is None:

1180 form = last_parent

1181 break

1182 return form

1183

1184 form = get_parent_form(el)

1185

1186 # Look in form cache to see if we've already evaluated that its fellow radio buttons are indeterminate

1187 if form is not None:

1188 found_form = False

1189 for f, n, i in self.cached_indeterminate_forms:

1190 if f is form and n == name:

1191 found_form = True

1192 if i is True:

1193 match = True

1194 break

1195

1196 # We didn't have the form cached, so validate that the radio button is indeterminate

1197 if not found_form:

1198 checked = False

1199 for child in self.get_tag_descendants(form, no_iframe=True):

1200 if child is el:

1201 continue

1202 tag_name = self.get_tag(child)

1203 if tag_name == 'input':

1204 is_radio = False

1205 check = False

1206 has_name = False

1207 for k, v in self.iter_attributes(child):

1208 if util.lower(k) == 'type' and util.lower(v) == 'radio':

1209 is_radio = True

1210 elif util.lower(k) == 'name' and v == name:

1211 has_name = True

1212 elif util.lower(k) == 'checked':

1213 check = True

1214 if is_radio and check and has_name and get_parent_form(child) is form:

1215 checked = True

1216 break

1217 if checked:

1218 break

1219 if not checked:

1220 match = True

1221 self.cached_indeterminate_forms.append((form, name, match))

1222

1223 return match

1224

1225 def match_lang(self, el: bs4.Tag, langs: tuple[ct.SelectorLang, ...]) -> bool:

1226 """Match languages."""

1227

1228 match = False

1229 has_ns = self.supports_namespaces()

1230 root = self.root

1231 has_html_namespace = self.has_html_namespace

1232

1233 # Walk parents looking for `lang` (HTML) or `xml:lang` XML property.

1234 parent = el # type: bs4.Tag | None

1235 found_lang = None

1236 last = None

1237 while not found_lang:

1238 has_html_ns = self.has_html_ns(parent)

1239 for k, v in self.iter_attributes(parent):

1240 attr_ns, attr = self.split_namespace(parent, k)

1241 if (

1242 ((not has_ns or has_html_ns) and (util.lower(k) if not self.is_xml else k) == 'lang') or

1243 (

1244 has_ns and not has_html_ns and attr_ns == NS_XML and

1245 (util.lower(attr) if not self.is_xml and attr is not None else attr) == 'lang'

1246 )

1247 ):

1248 found_lang = v

1249 break

1250 last = parent

1251 parent = self.get_parent(parent, no_iframe=self.is_html)

1252

1253 if parent is None:

1254 root = last

1255 has_html_namespace = self.has_html_ns(root)

1256 parent = last

1257 break

1258

1259 # Use cached meta language.

1260 if found_lang is None and self.cached_meta_lang:

1261 for cache in self.cached_meta_lang:

1262 if root is not None and cast(str, root) is cache[0]:

1263 found_lang = cache[1]

1264

1265 # If we couldn't find a language, and the document is HTML, look to meta to determine language.

1266 if found_lang is None and (not self.is_xml or (has_html_namespace and root and root.name == 'html')):

1267 # Find head

1268 found = False

1269 for tag in ('html', 'head'):

1270 found = False

1271 for child in self.get_tag_children(parent, no_iframe=self.is_html):

1272 if self.get_tag(child) == tag and self.is_html_tag(child):

1273 found = True

1274 parent = child

1275 break

1276 if not found: # pragma: no cover

1277 break

1278

1279 # Search meta tags

1280 if found and parent is not None:

1281 for child2 in parent:

1282 if isinstance(child2, bs4.Tag) and self.get_tag(child2) == 'meta' and self.is_html_tag(parent):

1283 c_lang = False

1284 content = None

1285 for k, v in self.iter_attributes(child2):

1286 if util.lower(k) == 'http-equiv' and util.lower(v) == 'content-language':

1287 c_lang = True

1288 if util.lower(k) == 'content':

1289 content = v

1290 if c_lang and content:

1291 found_lang = content

1292 self.cached_meta_lang.append((cast(str, root), cast(str, found_lang)))

1293 break

1294 if found_lang is not None:

1295 break

1296 if found_lang is None:

1297 self.cached_meta_lang.append((cast(str, root), ''))

1298

1299 # If we determined a language, compare.

1300 if found_lang is not None:

1301 for patterns in langs:

1302 match = False

1303 for pattern in patterns:

1304 if self.extended_language_filter(pattern, cast(str, found_lang)):

1305 match = True

1306 if not match:

1307 break

1308

1309 return match

1310

1311 def match_dir(self, el: bs4.Tag | None, directionality: int) -> bool:

1312 """Check directionality."""

1313

1314 # If we have to match both left and right, we can't match either.

1315 if directionality & ct.SEL_DIR_LTR and directionality & ct.SEL_DIR_RTL:

1316 return False

1317

1318 if el is None or not self.is_html_tag(el):

1319 return False

1320

1321 # Element has defined direction of left to right or right to left

1322 direction = DIR_MAP.get(util.lower(self.get_attribute_by_name(el, 'dir', '')), None)

1323 if direction not in (None, 0):

1324 return direction == directionality

1325

1326 # Element is the document element (the root) and no direction assigned, assume left to right.

1327 is_root = self.is_root(el)

1328 if is_root and direction is None:

1329 return ct.SEL_DIR_LTR == directionality

1330

1331 # If `input[type=telephone]` and no direction is assigned, assume left to right.

1332 name = self.get_tag(el)

1333 is_input = name == 'input'

1334 is_textarea = name == 'textarea'

1335 is_bdi = name == 'bdi'

1336 itype = util.lower(self.get_attribute_by_name(el, 'type', '')) if is_input else ''

1337 if is_input and itype == 'tel' and direction is None:

1338 return ct.SEL_DIR_LTR == directionality

1339

1340 # Auto handling for text inputs

1341 if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0:

1342 if is_textarea:

1343 value = ''.join(node for node in self.get_contents(el, no_iframe=True) if self.is_content_string(node)) # type: ignore[misc]

1344 else:

1345 value = cast(str, self.get_attribute_by_name(el, 'value', ''))

1346 if value:

1347 for c in value:

1348 bidi = unicodedata.bidirectional(c)

1349 if bidi in ('AL', 'R', 'L'):

1350 direction = ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL

1351 return direction == directionality

1352 # Assume left to right

1353 return ct.SEL_DIR_LTR == directionality

1354 elif is_root:

1355 return ct.SEL_DIR_LTR == directionality

1356 return self.match_dir(self.get_parent(el, no_iframe=True), directionality)

1357

1358 # Auto handling for `bdi` and other non text inputs.

1359 if (is_bdi and direction is None) or direction == 0:

1360 direction = self.find_bidi(el)

1361 if direction is not None:

1362 return direction == directionality

1363 elif is_root:

1364 return ct.SEL_DIR_LTR == directionality

1365 return self.match_dir(self.get_parent(el, no_iframe=True), directionality)

1366

1367 # Match parents direction

1368 return self.match_dir(self.get_parent(el, no_iframe=True), directionality)

1369

1370 def match_range(self, el: bs4.Tag, condition: int) -> bool:

1371 """

1372 Match range.

1373

1374 Behavior is modeled after what we see in browsers. Browsers seem to evaluate

1375 if the value is out of range, and if not, it is in range. So a missing value

1376 will not evaluate out of range; therefore, value is in range. Personally, I

1377 feel like this should evaluate as neither in or out of range.

1378 """

1379

1380 out_of_range = False

1381

1382 itype = util.lower(self.get_attribute_by_name(el, 'type'))

1383 mn = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'min', None)))

1384 mx = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'max', None)))

1385

1386 # There is no valid min or max, so we cannot evaluate a range

1387 if mn is None and mx is None:

1388 return False

1389

1390 value = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'value', None)))

1391 if value is not None:

1392 if itype in ("date", "datetime-local", "month", "week", "number", "range"):

1393 if mn is not None and value < mn:

1394 out_of_range = True

1395 if not out_of_range and mx is not None and value > mx:

1396 out_of_range = True

1397 elif itype == "time":

1398 if mn is not None and mx is not None and mn > mx:

1399 # Time is periodic, so this is a reversed/discontinuous range

1400 if value < mn and value > mx:

1401 out_of_range = True

1402 else:

1403 if mn is not None and value < mn:

1404 out_of_range = True

1405 if not out_of_range and mx is not None and value > mx:

1406 out_of_range = True

1407

1408 return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range

1409

1410 def match_defined(self, el: bs4.Tag) -> bool:

1411 """

1412 Match defined.

1413

1414 `:defined` is related to custom elements in a browser.

1415

1416 - If the document is XML (not XHTML), all tags will match.

1417 - Tags that are not custom (don't have a hyphen) are marked defined.

1418 - If the tag has a prefix (without or without a namespace), it will not match.

1419

1420 This is of course requires the parser to provide us with the proper prefix and namespace info,

1421 if it doesn't, there is nothing we can do.

1422 """

1423

1424 name = self.get_tag(el)

1425 return (

1426 name is not None and (

1427 name.find('-') == -1 or

1428 name.find(':') != -1 or

1429 self.get_prefix(el) is not None

1430 )

1431 )

1432

1433 def match_placeholder_shown(self, el: bs4.Tag) -> bool:

1434 """

1435 Match placeholder shown according to HTML spec.

1436

1437 - text area should be checked if they have content. A single newline does not count as content.

1438

1439 """

1440

1441 match = False

1442 content = self.get_text(el)

1443 if content in ('', '\n'):

1444 match = True

1445

1446 return match

1447

1448 def match_selectors(self, el: bs4.Tag, selectors: ct.SelectorList) -> bool:

1449 """Check if element matches one of the selectors."""

1450

1451 match = False

1452 is_not = selectors.is_not

1453 is_html = selectors.is_html

1454

1455 # Internal selector lists that use the HTML flag, will automatically get the `html` namespace.

1456 if is_html:

1457 namespaces = self.namespaces

1458 iframe_restrict = self.iframe_restrict

1459 self.namespaces = {'html': NS_XHTML}

1460 self.iframe_restrict = True

1461

1462 if not is_html or self.is_html:

1463 for selector in selectors:

1464 match = is_not

1465 # We have a un-matchable situation (like `:focus` as you can focus an element in this environment)

1466 if isinstance(selector, ct.SelectorNull):

1467 continue

1468 # Verify tag matches

1469 if not self.match_tag(el, selector.tag):

1470 continue

1471 # Verify tag is defined

1472 if selector.flags & ct.SEL_DEFINED and not self.match_defined(el):

1473 continue

1474 # Verify element is root

1475 if selector.flags & ct.SEL_ROOT and not self.match_root(el):

1476 continue

1477 # Verify element is scope

1478 if selector.flags & ct.SEL_SCOPE and not self.match_scope(el):

1479 continue

1480 # Verify element has placeholder shown

1481 if selector.flags & ct.SEL_PLACEHOLDER_SHOWN and not self.match_placeholder_shown(el):

1482 continue

1483 # Verify `nth` matches

1484 if not self.match_nth(el, selector.nth):

1485 continue

1486 if selector.flags & ct.SEL_EMPTY and not self.match_empty(el):

1487 continue

1488 # Verify id matches

1489 if selector.ids and not self.match_id(el, selector.ids):

1490 continue

1491 # Verify classes match

1492 if selector.classes and not self.match_classes(el, selector.classes):

1493 continue

1494 # Verify attribute(s) match

1495 if not self.match_attributes(el, selector.attributes):

1496 continue

1497 # Verify ranges

1498 if selector.flags & RANGES and not self.match_range(el, selector.flags & RANGES):

1499 continue

1500 # Verify language patterns

1501 if selector.lang and not self.match_lang(el, selector.lang):

1502 continue

1503 # Verify pseudo selector patterns

1504 if selector.selectors and not self.match_subselectors(el, selector.selectors):

1505 continue

1506 # Verify relationship selectors

1507 if selector.relation and not self.match_relations(el, selector.relation):

1508 continue

1509 # Validate that the current default selector match corresponds to the first submit button in the form

1510 if selector.flags & ct.SEL_DEFAULT and not self.match_default(el):

1511 continue

1512 # Validate that the unset radio button is among radio buttons with the same name in a form that are

1513 # also not set.

1514 if selector.flags & ct.SEL_INDETERMINATE and not self.match_indeterminate(el):

1515 continue

1516 # Validate element directionality

1517 if selector.flags & DIR_FLAGS and not self.match_dir(el, selector.flags & DIR_FLAGS):

1518 continue

1519 # Validate that the tag contains the specified text.

1520 if selector.contains and not self.match_contains(el, selector.contains):

1521 continue

1522 match = not is_not

1523 break

1524

1525 # Restore actual namespaces being used for external selector lists

1526 if is_html:

1527 self.namespaces = namespaces

1528 self.iframe_restrict = iframe_restrict

1529

1530 return match

1531

1532 def select(self, limit: int = 0) -> Iterator[bs4.Tag]:

1533 """Match all tags under the targeted tag."""

1534

1535 lim = None if limit < 1 else limit

1536

1537 for child in self.get_tag_descendants(self.tag):

1538 if self.match(child):

1539 yield child

1540 if lim is not None:

1541 lim -= 1

1542 if lim < 1:

1543 break

1544

1545 def closest(self) -> bs4.Tag | None:

1546 """Match closest ancestor."""

1547

1548 current = self.tag # type: bs4.Tag | None

1549 closest = None

1550 while closest is None and current is not None:

1551 if self.match(current):

1552 closest = current

1553 else:

1554 current = self.get_parent(current)

1555 return closest

1556

1557 def filter(self) -> list[bs4.Tag]: # noqa A001

1558 """Filter tag's children."""

1559

1560 return [

1561 tag for tag in self.get_contents(self.tag)

1562 if isinstance(tag, bs4.Tag) and self.match(tag)

1563 ]

1564

1565 def match(self, el: bs4.Tag) -> bool:

1566 """Match."""

1567

1568 return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)

1569

1570

1571class SoupSieve(ct.Immutable):

1572 """Compiled Soup Sieve selector matching object."""

1573

1574 pattern: str

1575 selectors: ct.SelectorList

1576 namespaces: ct.Namespaces | None

1577 custom: dict[str, str]

1578 flags: int

1579

1580 __slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash")

1581

1582 def __init__(

1583 self,

1584 pattern: str,

1585 selectors: ct.SelectorList,

1586 namespaces: ct.Namespaces | None,

1587 custom: ct.CustomSelectors | None,

1588 flags: int

1589 ):

1590 """Initialize."""

1591

1592 super().__init__(

1593 pattern=pattern,

1594 selectors=selectors,

1595 namespaces=namespaces,

1596 custom=custom,

1597 flags=flags

1598 )

1599

1600 def match(self, tag: bs4.Tag) -> bool:

1601 """Match."""

1602

1603 return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag)

1604

1605 def closest(self, tag: bs4.Tag) -> bs4.Tag | None:

1606 """Match closest ancestor."""

1607

1608 return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest()

1609

1610 def filter(self, iterable: Iterable[bs4.Tag]) -> list[bs4.Tag]: # noqa A001

1611 """

1612 Filter.

1613

1614 `CSSMatch` can cache certain searches for tags of the same document,

1615 so if we are given a tag, all tags are from the same document,

1616 and we can take advantage of the optimization.

1617

1618 Any other kind of iterable could have tags from different documents or detached tags,

1619 so for those, we use a new `CSSMatch` for each item in the iterable.

1620 """

1621

1622 if isinstance(iterable, bs4.Tag):

1623 return CSSMatch(self.selectors, iterable, self.namespaces, self.flags).filter()

1624 else:

1625 return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]

1626

1627 def select_one(self, tag: bs4.Tag) -> bs4.Tag | None:

1628 """Select a single tag."""

1629

1630 tags = self.select(tag, limit=1)

1631 return tags[0] if tags else None

1632

1633 def select(self, tag: bs4.Tag, limit: int = 0) -> list[bs4.Tag]:

1634 """Select the specified tags."""

1635

1636 return list(self.iselect(tag, limit))

1637

1638 def iselect(self, tag: bs4.Tag, limit: int = 0) -> Iterator[bs4.Tag]:

1639 """Iterate the specified tags."""

1640

1641 yield from CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit)

1642

1643 def __repr__(self) -> str: # pragma: no cover

1644 """Representation."""

1645

1646 return (

1647 f"SoupSieve(pattern={self.pattern!r}, namespaces={self.namespaces!r}, "

1648 f"custom={self.custom!r}, flags={self.flags!r})"

1649 )

1650

1651 __str__ = __repr__

1652

1653

1654ct.pickle_register(SoupSieve)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/soupsieve/css_match.py: 58%

970 statements