Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/soupsieve/css

1"""CSS matcher."""

2from __future__ import annotations

3from datetime import datetime

4from . import util

5import re

6from . import css_types as ct

7import unicodedata

8import bs4 # type: ignore[import-untyped]

9from typing import Iterator, Iterable, Any, Callable, Sequence, cast # noqa: F401

11# Empty tag pattern (whitespace okay)

12RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')

14RE_NOT_WS = re.compile('[^ \t\r\n\f]+')

16# Relationships

17REL_PARENT = ' '

18REL_CLOSE_PARENT = '>'

19REL_SIBLING = '~'

20REL_CLOSE_SIBLING = '+'

22# Relationships for :has() (forward looking)

23REL_HAS_PARENT = ': '

24REL_HAS_CLOSE_PARENT = ':>'

25REL_HAS_SIBLING = ':~'

26REL_HAS_CLOSE_SIBLING = ':+'

28NS_XHTML = 'http://www.w3.org/1999/xhtml'

29NS_XML = 'http://www.w3.org/XML/1998/namespace'

31DIR_FLAGS = ct.SEL_DIR_LTR | ct.SEL_DIR_RTL

32RANGES = ct.SEL_IN_RANGE | ct.SEL_OUT_OF_RANGE

34DIR_MAP = {

35 'ltr': ct.SEL_DIR_LTR,

36 'rtl': ct.SEL_DIR_RTL,

37 'auto': 0

38}

40RE_NUM = re.compile(r"^(?P<value>-?(?:[0-9]{1,}(\.[0-9]+)?|\.[0-9]+))$")

41RE_TIME = re.compile(r'^(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$')

42RE_MONTH = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})$')

43RE_WEEK = re.compile(r'^(?P<year>[0-9]{4,})-W(?P<week>[0-9]{2})$')

44RE_DATE = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})$')

45RE_DATETIME = re.compile(

46 r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})T(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$'

47)

48RE_WILD_STRIP = re.compile(r'(?:(?:-\*-)(?:\*(?:-|$))*|-\*$)')

50MONTHS_30 = (4, 6, 9, 11) # April, June, September, and November

51FEB = 2

52SHORT_MONTH = 30

53LONG_MONTH = 31

54FEB_MONTH = 28

55FEB_LEAP_MONTH = 29

56DAYS_IN_WEEK = 7

59class _FakeParent:

60 """

61 Fake parent class.

63 When we have a fragment with no `BeautifulSoup` document object,

64 we can't evaluate `nth` selectors properly. Create a temporary

65 fake parent so we can traverse the root element as a child.

66 """

68 def __init__(self, element: bs4.Tag) -> None:

69 """Initialize."""

71 self.contents = [element]

73 def __len__(self) -> bs4.PageElement:

74 """Length."""

76 return len(self.contents)

79class _DocumentNav:

80 """Navigate a Beautiful Soup document."""

82 @classmethod

83 def assert_valid_input(cls, tag: Any) -> None:

84 """Check if valid input tag or document."""

86 # Fail on unexpected types.

87 if not cls.is_tag(tag):

88 raise TypeError(f"Expected a BeautifulSoup 'Tag', but instead received type {type(tag)}")

90 @staticmethod

91 def is_doc(obj: bs4.Tag) -> bool:

92 """Is `BeautifulSoup` object."""

93 return isinstance(obj, bs4.BeautifulSoup)

95 @staticmethod

96 def is_tag(obj: bs4.PageElement) -> bool:

97 """Is tag."""

98 return isinstance(obj, bs4.Tag)

100 @staticmethod

101 def is_declaration(obj: bs4.PageElement) -> bool: # pragma: no cover

102 """Is declaration."""

103 return isinstance(obj, bs4.Declaration)

104

105 @staticmethod

106 def is_cdata(obj: bs4.PageElement) -> bool:

107 """Is CDATA."""

108 return isinstance(obj, bs4.CData)

109

110 @staticmethod

111 def is_processing_instruction(obj: bs4.PageElement) -> bool: # pragma: no cover

112 """Is processing instruction."""

113 return isinstance(obj, bs4.ProcessingInstruction)

114

115 @staticmethod

116 def is_navigable_string(obj: bs4.PageElement) -> bool:

117 """Is navigable string."""

118 return isinstance(obj, bs4.NavigableString)

119

120 @staticmethod

121 def is_special_string(obj: bs4.PageElement) -> bool:

122 """Is special string."""

123 return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))

124

125 @classmethod

126 def is_content_string(cls, obj: bs4.PageElement) -> bool:

127 """Check if node is content string."""

128

129 return cls.is_navigable_string(obj) and not cls.is_special_string(obj)

130

131 @staticmethod

132 def create_fake_parent(el: bs4.Tag) -> _FakeParent:

133 """Create fake parent for a given element."""

134

135 return _FakeParent(el)

136

137 @staticmethod

138 def is_xml_tree(el: bs4.Tag) -> bool:

139 """Check if element (or document) is from a XML tree."""

140

141 return bool(el._is_xml)

142

143 def is_iframe(self, el: bs4.Tag) -> bool:

144 """Check if element is an `iframe`."""

145

146 return bool(

147 ((el.name if self.is_xml_tree(el) else util.lower(el.name)) == 'iframe') and

148 self.is_html_tag(el) # type: ignore[attr-defined]

149 )

150

151 def is_root(self, el: bs4.Tag) -> bool:

152 """

153 Return whether element is a root element.

154

155 We check that the element is the root of the tree (which we have already pre-calculated),

156 and we check if it is the root element under an `iframe`.

157 """

158

159 root = self.root and self.root is el # type: ignore[attr-defined]

160 if not root:

161 parent = self.get_parent(el)

162 root = parent is not None and self.is_html and self.is_iframe(parent) # type: ignore[attr-defined]

163 return root

164

165 def get_contents(self, el: bs4.Tag, no_iframe: bool = False) -> Iterator[bs4.PageElement]:

166 """Get contents or contents in reverse."""

167 if not no_iframe or not self.is_iframe(el):

168 yield from el.contents

169

170 def get_children(

171 self,

172 el: bs4.Tag,

173 start: int | None = None,

174 reverse: bool = False,

175 tags: bool = True,

176 no_iframe: bool = False

177 ) -> Iterator[bs4.PageElement]:

178 """Get children."""

179

180 if not no_iframe or not self.is_iframe(el):

181 last = len(el.contents) - 1

182 if start is None:

183 index = last if reverse else 0

184 else:

185 index = start

186 end = -1 if reverse else last + 1

187 incr = -1 if reverse else 1

188

189 if 0 <= index <= last:

190 while index != end:

191 node = el.contents[index]

192 index += incr

193 if not tags or self.is_tag(node):

194 yield node

195

196 def get_descendants(

197 self,

198 el: bs4.Tag,

199 tags: bool = True,

200 no_iframe: bool = False

201 ) -> Iterator[bs4.PageElement]:

202 """Get descendants."""

203

204 if not no_iframe or not self.is_iframe(el):

205 next_good = None

206 for child in el.descendants:

207

208 if next_good is not None:

209 if child is not next_good:

210 continue

211 next_good = None

212

213 is_tag = self.is_tag(child)

214

215 if no_iframe and is_tag and self.is_iframe(child):

216 if child.next_sibling is not None:

217 next_good = child.next_sibling

218 else:

219 last_child = child

220 while self.is_tag(last_child) and last_child.contents:

221 last_child = last_child.contents[-1]

222 next_good = last_child.next_element

223 yield child

224 if next_good is None:

225 break

226 # Coverage isn't seeing this even though it's executed

227 continue # pragma: no cover

228

229 if not tags or is_tag:

230 yield child

231

232 def get_parent(self, el: bs4.Tag, no_iframe: bool = False) -> bs4.Tag:

233 """Get parent."""

234

235 parent = el.parent

236 if no_iframe and parent is not None and self.is_iframe(parent):

237 parent = None

238 return parent

239

240 @staticmethod

241 def get_tag_name(el: bs4.Tag) -> str | None:

242 """Get tag."""

243

244 return cast('str | None', el.name)

245

246 @staticmethod

247 def get_prefix_name(el: bs4.Tag) -> str | None:

248 """Get prefix."""

249

250 return cast('str | None', el.prefix)

251

252 @staticmethod

253 def get_uri(el: bs4.Tag) -> str | None:

254 """Get namespace `URI`."""

255

256 return cast('str | None', el.namespace)

257

258 @classmethod

259 def get_next(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:

260 """Get next sibling tag."""

261

262 sibling = el.next_sibling

263 while tags and not cls.is_tag(sibling) and sibling is not None:

264 sibling = sibling.next_sibling

265 return sibling

266

267 @classmethod

268 def get_previous(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:

269 """Get previous sibling tag."""

270

271 sibling = el.previous_sibling

272 while tags and not cls.is_tag(sibling) and sibling is not None:

273 sibling = sibling.previous_sibling

274 return sibling

275

276 @staticmethod

277 def has_html_ns(el: bs4.Tag) -> bool:

278 """

279 Check if element has an HTML namespace.

280

281 This is a bit different than whether a element is treated as having an HTML namespace,

282 like we do in the case of `is_html_tag`.

283 """

284

285 ns = getattr(el, 'namespace') if el else None # noqa: B009

286 return bool(ns and ns == NS_XHTML)

287

288 @staticmethod

289 def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[str | None, str | None]:

290 """Return namespace and attribute name without the prefix."""

291

292 return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)

293

294 @classmethod

295 def normalize_value(cls, value: Any) -> str | Sequence[str]:

296 """Normalize the value to be a string or list of strings."""

297

298 # Treat `None` as empty string.

299 if value is None:

300 return ''

301

302 # Pass through strings

303 if (isinstance(value, str)):

304 return value

305

306 # If it's a byte string, convert it to Unicode, treating it as UTF-8.

307 if isinstance(value, bytes):

308 return value.decode("utf8")

309

310 # BeautifulSoup supports sequences of attribute values, so make sure the children are strings.

311 if isinstance(value, Sequence):

312 new_value = []

313 for v in value:

314 if not isinstance(v, (str, bytes)) and isinstance(v, Sequence):

315 # This is most certainly a user error and will crash and burn later.

316 # To keep things working, we'll do what we do with all objects,

317 # And convert them to strings.

318 new_value.append(str(v))

319 else:

320 # Convert the child to a string

321 new_value.append(cast(str, cls.normalize_value(v)))

322 return new_value

323

324 # Try and make anything else a string

325 return str(value)

326

327 @classmethod

328 def get_attribute_by_name(

329 cls,

330 el: bs4.Tag,

331 name: str,

332 default: str | Sequence[str] | None = None

333 ) -> str | Sequence[str] | None:

334 """Get attribute by name."""

335

336 value = default

337 if el._is_xml:

338 try:

339 value = cls.normalize_value(el.attrs[name])

340 except KeyError:

341 pass

342 else:

343 for k, v in el.attrs.items():

344 if util.lower(k) == name:

345 value = cls.normalize_value(v)

346 break

347 return value

348

349 @classmethod

350 def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, str | Sequence[str] | None]]:

351 """Iterate attributes."""

352

353 for k, v in el.attrs.items():

354 yield k, cls.normalize_value(v)

355

356 @classmethod

357 def get_classes(cls, el: bs4.Tag) -> Sequence[str]:

358 """Get classes."""

359

360 classes = cls.get_attribute_by_name(el, 'class', [])

361 if isinstance(classes, str):

362 classes = RE_NOT_WS.findall(classes)

363 return cast(Sequence[str], classes)

364

365 def get_text(self, el: bs4.Tag, no_iframe: bool = False) -> str:

366 """Get text."""

367

368 return ''.join(

369 [node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]

370 )

371

372 def get_own_text(self, el: bs4.Tag, no_iframe: bool = False) -> list[str]:

373 """Get Own Text."""

374

375 return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]

376

377

378class Inputs:

379 """Class for parsing and validating input items."""

380

381 @staticmethod

382 def validate_day(year: int, month: int, day: int) -> bool:

383 """Validate day."""

384

385 max_days = LONG_MONTH

386 if month == FEB:

387 max_days = FEB_LEAP_MONTH if ((year % 4 == 0) and (year % 100 != 0)) or (year % 400 == 0) else FEB_MONTH

388 elif month in MONTHS_30:

389 max_days = SHORT_MONTH

390 return 1 <= day <= max_days

391

392 @staticmethod

393 def validate_week(year: int, week: int) -> bool:

394 """Validate week."""

395

396 max_week = datetime.strptime(f"{12}-{31}-{year}", "%m-%d-%Y").isocalendar()[1]

397 if max_week == 1:

398 max_week = 53

399 return 1 <= week <= max_week

400

401 @staticmethod

402 def validate_month(month: int) -> bool:

403 """Validate month."""

404

405 return 1 <= month <= 12

406

407 @staticmethod

408 def validate_year(year: int) -> bool:

409 """Validate year."""

410

411 return 1 <= year

412

413 @staticmethod

414 def validate_hour(hour: int) -> bool:

415 """Validate hour."""

416

417 return 0 <= hour <= 23

418

419 @staticmethod

420 def validate_minutes(minutes: int) -> bool:

421 """Validate minutes."""

422

423 return 0 <= minutes <= 59

424

425 @classmethod

426 def parse_value(cls, itype: str, value: str | None) -> tuple[float, ...] | None:

427 """Parse the input value."""

428

429 parsed = None # type: tuple[float, ...] | None

430 if value is None:

431 return value

432 if itype == "date":

433 m = RE_DATE.match(value)

434 if m:

435 year = int(m.group('year'), 10)

436 month = int(m.group('month'), 10)

437 day = int(m.group('day'), 10)

438 if cls.validate_year(year) and cls.validate_month(month) and cls.validate_day(year, month, day):

439 parsed = (year, month, day)

440 elif itype == "month":

441 m = RE_MONTH.match(value)

442 if m:

443 year = int(m.group('year'), 10)

444 month = int(m.group('month'), 10)

445 if cls.validate_year(year) and cls.validate_month(month):

446 parsed = (year, month)

447 elif itype == "week":

448 m = RE_WEEK.match(value)

449 if m:

450 year = int(m.group('year'), 10)

451 week = int(m.group('week'), 10)

452 if cls.validate_year(year) and cls.validate_week(year, week):

453 parsed = (year, week)

454 elif itype == "time":

455 m = RE_TIME.match(value)

456 if m:

457 hour = int(m.group('hour'), 10)

458 minutes = int(m.group('minutes'), 10)

459 if cls.validate_hour(hour) and cls.validate_minutes(minutes):

460 parsed = (hour, minutes)

461 elif itype == "datetime-local":

462 m = RE_DATETIME.match(value)

463 if m:

464 year = int(m.group('year'), 10)

465 month = int(m.group('month'), 10)

466 day = int(m.group('day'), 10)

467 hour = int(m.group('hour'), 10)

468 minutes = int(m.group('minutes'), 10)

469 if (

470 cls.validate_year(year) and cls.validate_month(month) and cls.validate_day(year, month, day) and

471 cls.validate_hour(hour) and cls.validate_minutes(minutes)

472 ):

473 parsed = (year, month, day, hour, minutes)

474 elif itype in ("number", "range"):

475 m = RE_NUM.match(value)

476 if m:

477 parsed = (float(m.group('value')),)

478 return parsed

479

480

481class CSSMatch(_DocumentNav):

482 """Perform CSS matching."""

483

484 def __init__(

485 self,

486 selectors: ct.SelectorList,

487 scope: bs4.Tag,

488 namespaces: ct.Namespaces | None,

489 flags: int

490 ) -> None:

491 """Initialize."""

492

493 self.assert_valid_input(scope)

494 self.tag = scope

495 self.cached_meta_lang = [] # type: list[tuple[str, str]]

496 self.cached_default_forms = [] # type: list[tuple[bs4.Tag, bs4.Tag]]

497 self.cached_indeterminate_forms = [] # type: list[tuple[bs4.Tag, str, bool]]

498 self.selectors = selectors

499 self.namespaces = {} if namespaces is None else namespaces # type: ct.Namespaces | dict[str, str]

500 self.flags = flags

501 self.iframe_restrict = False

502

503 # Find the root element for the whole tree

504 doc = scope

505 parent = self.get_parent(doc)

506 while parent:

507 doc = parent

508 parent = self.get_parent(doc)

509 root = None

510 if not self.is_doc(doc):

511 root = doc

512 else:

513 for child in self.get_children(doc):

514 root = child

515 break

516

517 self.root = root

518 self.scope = scope if scope is not doc else root

519 self.has_html_namespace = self.has_html_ns(root)

520

521 # A document can be both XML and HTML (XHTML)

522 self.is_xml = self.is_xml_tree(doc)

523 self.is_html = not self.is_xml or self.has_html_namespace

524

525 def supports_namespaces(self) -> bool:

526 """Check if namespaces are supported in the HTML type."""

527

528 return self.is_xml or self.has_html_namespace

529

530 def get_tag_ns(self, el: bs4.Tag) -> str:

531 """Get tag namespace."""

532

533 if self.supports_namespaces():

534 namespace = ''

535 ns = self.get_uri(el)

536 if ns:

537 namespace = ns

538 else:

539 namespace = NS_XHTML

540 return namespace

541

542 def is_html_tag(self, el: bs4.Tag) -> bool:

543 """Check if tag is in HTML namespace."""

544

545 return self.get_tag_ns(el) == NS_XHTML

546

547 def get_tag(self, el: bs4.Tag) -> str | None:

548 """Get tag."""

549

550 name = self.get_tag_name(el)

551 return util.lower(name) if name is not None and not self.is_xml else name

552

553 def get_prefix(self, el: bs4.Tag) -> str | None:

554 """Get prefix."""

555

556 prefix = self.get_prefix_name(el)

557 return util.lower(prefix) if prefix is not None and not self.is_xml else prefix

558

559 def find_bidi(self, el: bs4.Tag) -> int | None:

560 """Get directionality from element text."""

561

562 for node in self.get_children(el, tags=False):

563

564 # Analyze child text nodes

565 if self.is_tag(node):

566

567 # Avoid analyzing certain elements specified in the specification.

568 direction = DIR_MAP.get(util.lower(self.get_attribute_by_name(node, 'dir', '')), None)

569 if (

570 self.get_tag(node) in ('bdi', 'script', 'style', 'textarea', 'iframe') or

571 not self.is_html_tag(node) or

572 direction is not None

573 ):

574 continue # pragma: no cover

575

576 # Check directionality of this node's text

577 value = self.find_bidi(node)

578 if value is not None:

579 return value

580

581 # Direction could not be determined

582 continue # pragma: no cover

583

584 # Skip `doctype` comments, etc.

585 if self.is_special_string(node):

586 continue

587

588 # Analyze text nodes for directionality.

589 for c in node:

590 bidi = unicodedata.bidirectional(c)

591 if bidi in ('AL', 'R', 'L'):

592 return ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL

593 return None

594

595 def extended_language_filter(self, lang_range: str, lang_tag: str) -> bool:

596 """Filter the language tags."""

597

598 match = True

599 lang_range = RE_WILD_STRIP.sub('-', lang_range).lower()

600 ranges = lang_range.split('-')

601 subtags = lang_tag.lower().split('-')

602 length = len(ranges)

603 slength = len(subtags)

604 rindex = 0

605 sindex = 0

606 r = ranges[rindex]

607 s = subtags[sindex]

608

609 # Empty specified language should match unspecified language attributes

610 if length == 1 and slength == 1 and not r and r == s:

611 return True

612

613 # Primary tag needs to match

614 if (r != '*' and r != s) or (r == '*' and slength == 1 and not s):

615 match = False

616

617 rindex += 1

618 sindex += 1

619

620 # Match until we run out of ranges

621 while match and rindex < length:

622 r = ranges[rindex]

623 try:

624 s = subtags[sindex]

625 except IndexError:

626 # Ran out of subtags,

627 # but we still have ranges

628 match = False

629 continue

630

631 # Empty range

632 if not r:

633 match = False

634 continue

635

636 # Matched range

637 elif s == r:

638 rindex += 1

639

640 # Implicit wildcard cannot match

641 # singletons

642 elif len(s) == 1:

643 match = False

644 continue

645

646 # Implicitly matched, so grab next subtag

647 sindex += 1

648

649 return match

650

651 def match_attribute_name(

652 self,

653 el: bs4.Tag,

654 attr: str,

655 prefix: str | None

656 ) -> str | Sequence[str] | None:

657 """Match attribute name and return value if it exists."""

658

659 value = None

660 if self.supports_namespaces():

661 value = None

662 # If we have not defined namespaces, we can't very well find them, so don't bother trying.

663 if prefix:

664 ns = self.namespaces.get(prefix)

665 if ns is None and prefix != '*':

666 return None

667 else:

668 ns = None

669

670 for k, v in self.iter_attributes(el):

671

672 # Get attribute parts

673 namespace, name = self.split_namespace(el, k)

674

675 # Can't match a prefix attribute as we haven't specified one to match

676 # Try to match it normally as a whole `p:a` as selector may be trying `p\:a`.

677 if ns is None:

678 if (self.is_xml and attr == k) or (not self.is_xml and util.lower(attr) == util.lower(k)):

679 value = v

680 break

681 # Coverage is not finding this even though it is executed.

682 # Adding a print statement before this (and erasing coverage) causes coverage to find the line.

683 # Ignore the false positive message.

684 continue # pragma: no cover

685

686 # We can't match our desired prefix attribute as the attribute doesn't have a prefix

687 if namespace is None or ns != namespace and prefix != '*':

688 continue

689

690 # The attribute doesn't match.

691 if (util.lower(attr) != util.lower(name)) if not self.is_xml else (attr != name):

692 continue

693

694 value = v

695 break

696 else:

697 for k, v in self.iter_attributes(el):

698 if util.lower(attr) != util.lower(k):

699 continue

700 value = v

701 break

702 return value

703

704 def match_namespace(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:

705 """Match the namespace of the element."""

706

707 match = True

708 namespace = self.get_tag_ns(el)

709 default_namespace = self.namespaces.get('')

710 tag_ns = '' if tag.prefix is None else self.namespaces.get(tag.prefix)

711 # We must match the default namespace if one is not provided

712 if tag.prefix is None and (default_namespace is not None and namespace != default_namespace):

713 match = False

714 # If we specified `|tag`, we must not have a namespace.

715 elif (tag.prefix is not None and tag.prefix == '' and namespace):

716 match = False

717 # Verify prefix matches

718 elif (

719 tag.prefix and

720 tag.prefix != '*' and (tag_ns is None or namespace != tag_ns)

721 ):

722 match = False

723 return match

724

725 def match_attributes(self, el: bs4.Tag, attributes: tuple[ct.SelectorAttribute, ...]) -> bool:

726 """Match attributes."""

727

728 match = True

729 if attributes:

730 for a in attributes:

731 temp = self.match_attribute_name(el, a.attribute, a.prefix)

732 pattern = a.xml_type_pattern if self.is_xml and a.xml_type_pattern else a.pattern

733 if temp is None:

734 match = False

735 break

736 value = temp if isinstance(temp, str) else ' '.join(temp)

737 if pattern is None:

738 continue

739 elif pattern.match(value) is None:

740 match = False

741 break

742 return match

743

744 def match_tagname(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:

745 """Match tag name."""

746

747 name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name)

748 return not (

749 name is not None and

750 name not in (self.get_tag(el), '*')

751 )

752

753 def match_tag(self, el: bs4.Tag, tag: ct.SelectorTag | None) -> bool:

754 """Match the tag."""

755

756 match = True

757 if tag is not None:

758 # Verify namespace

759 if not self.match_namespace(el, tag):

760 match = False

761 if not self.match_tagname(el, tag):

762 match = False

763 return match

764

765 def match_past_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:

766 """Match past relationship."""

767

768 found = False

769 # I don't think this can ever happen, but it makes `mypy` happy

770 if isinstance(relation[0], ct.SelectorNull): # pragma: no cover

771 return found

772

773 if relation[0].rel_type == REL_PARENT:

774 parent = self.get_parent(el, no_iframe=self.iframe_restrict)

775 while not found and parent:

776 found = self.match_selectors(parent, relation)

777 parent = self.get_parent(parent, no_iframe=self.iframe_restrict)

778 elif relation[0].rel_type == REL_CLOSE_PARENT:

779 parent = self.get_parent(el, no_iframe=self.iframe_restrict)

780 if parent:

781 found = self.match_selectors(parent, relation)

782 elif relation[0].rel_type == REL_SIBLING:

783 sibling = self.get_previous(el)

784 while not found and sibling:

785 found = self.match_selectors(sibling, relation)

786 sibling = self.get_previous(sibling)

787 elif relation[0].rel_type == REL_CLOSE_SIBLING:

788 sibling = self.get_previous(el)

789 if sibling and self.is_tag(sibling):

790 found = self.match_selectors(sibling, relation)

791 return found

792

793 def match_future_child(self, parent: bs4.Tag, relation: ct.SelectorList, recursive: bool = False) -> bool:

794 """Match future child."""

795

796 match = False

797 if recursive:

798 children = self.get_descendants # type: Callable[..., Iterator[bs4.Tag]]

799 else:

800 children = self.get_children

801 for child in children(parent, no_iframe=self.iframe_restrict):

802 match = self.match_selectors(child, relation)

803 if match:

804 break

805 return match

806

807 def match_future_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:

808 """Match future relationship."""

809

810 found = False

811 # I don't think this can ever happen, but it makes `mypy` happy

812 if isinstance(relation[0], ct.SelectorNull): # pragma: no cover

813 return found

814

815 if relation[0].rel_type == REL_HAS_PARENT:

816 found = self.match_future_child(el, relation, True)

817 elif relation[0].rel_type == REL_HAS_CLOSE_PARENT:

818 found = self.match_future_child(el, relation)

819 elif relation[0].rel_type == REL_HAS_SIBLING:

820 sibling = self.get_next(el)

821 while not found and sibling:

822 found = self.match_selectors(sibling, relation)

823 sibling = self.get_next(sibling)

824 elif relation[0].rel_type == REL_HAS_CLOSE_SIBLING:

825 sibling = self.get_next(el)

826 if sibling and self.is_tag(sibling):

827 found = self.match_selectors(sibling, relation)

828 return found

829

830 def match_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:

831 """Match relationship to other elements."""

832

833 found = False

834

835 if isinstance(relation[0], ct.SelectorNull) or relation[0].rel_type is None:

836 return found

837

838 if relation[0].rel_type.startswith(':'):

839 found = self.match_future_relations(el, relation)

840 else:

841 found = self.match_past_relations(el, relation)

842

843 return found

844

845 def match_id(self, el: bs4.Tag, ids: tuple[str, ...]) -> bool:

846 """Match element's ID."""

847

848 found = True

849 for i in ids:

850 if i != self.get_attribute_by_name(el, 'id', ''):

851 found = False

852 break

853 return found

854

855 def match_classes(self, el: bs4.Tag, classes: tuple[str, ...]) -> bool:

856 """Match element's classes."""

857

858 current_classes = self.get_classes(el)

859 found = True

860 for c in classes:

861 if c not in current_classes:

862 found = False

863 break

864 return found

865

866 def match_root(self, el: bs4.Tag) -> bool:

867 """Match element as root."""

868

869 is_root = self.is_root(el)

870 if is_root:

871 sibling = self.get_previous(el, tags=False)

872 while is_root and sibling is not None:

873 if (

874 self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or

875 self.is_cdata(sibling)

876 ):

877 is_root = False

878 else:

879 sibling = self.get_previous(sibling, tags=False)

880 if is_root:

881 sibling = self.get_next(el, tags=False)

882 while is_root and sibling is not None:

883 if (

884 self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or

885 self.is_cdata(sibling)

886 ):

887 is_root = False

888 else:

889 sibling = self.get_next(sibling, tags=False)

890 return is_root

891

892 def match_scope(self, el: bs4.Tag) -> bool:

893 """Match element as scope."""

894

895 return self.scope is el

896

897 def match_nth_tag_type(self, el: bs4.Tag, child: bs4.Tag) -> bool:

898 """Match tag type for `nth` matches."""

899

900 return (

901 (self.get_tag(child) == self.get_tag(el)) and

902 (self.get_tag_ns(child) == self.get_tag_ns(el))

903 )

904

905 def match_nth(self, el: bs4.Tag, nth: bs4.Tag) -> bool:

906 """Match `nth` elements."""

907

908 matched = True

909

910 for n in nth:

911 matched = False

912 if n.selectors and not self.match_selectors(el, n.selectors):

913 break

914 parent = self.get_parent(el)

915 if parent is None:

916 parent = self.create_fake_parent(el)

917 last = n.last

918 last_index = len(parent) - 1

919 index = last_index if last else 0

920 relative_index = 0

921 a = n.a

922 b = n.b

923 var = n.n

924 count = 0

925 count_incr = 1

926 factor = -1 if last else 1

927 idx = last_idx = a * count + b if var else a

928

929 # We can only adjust bounds within a variable index

930 if var:

931 # Abort if our nth index is out of bounds and only getting further out of bounds as we increment.

932 # Otherwise, increment to try to get in bounds.

933 adjust = None

934 while idx < 1 or idx > last_index:

935 if idx < 0:

936 diff_low = 0 - idx

937 if adjust is not None and adjust == 1:

938 break

939 adjust = -1

940 count += count_incr

941 idx = last_idx = a * count + b if var else a

942 diff = 0 - idx

943 if diff >= diff_low:

944 break

945 else:

946 diff_high = idx - last_index

947 if adjust is not None and adjust == -1:

948 break

949 adjust = 1

950 count += count_incr

951 idx = last_idx = a * count + b if var else a

952 diff = idx - last_index

953 if diff >= diff_high:

954 break

955 diff_high = diff

956

957 # If a < 0, our count is working backwards, so floor the index by increasing the count.

958 # Find the count that yields the lowest, in bound value and use that.

959 # Lastly reverse count increment so that we'll increase our index.

960 lowest = count

961 if a < 0:

962 while idx >= 1:

963 lowest = count

964 count += count_incr

965 idx = last_idx = a * count + b if var else a

966 count_incr = -1

967 count = lowest

968 idx = last_idx = a * count + b if var else a

969

970 # Evaluate elements while our calculated nth index is still in range

971 while 1 <= idx <= last_index + 1:

972 child = None

973 # Evaluate while our child index is still in range.

974 for child in self.get_children(parent, start=index, reverse=factor < 0, tags=False):

975 index += factor

976 if not self.is_tag(child):

977 continue

978 # Handle `of S` in `nth-child`

979 if n.selectors and not self.match_selectors(child, n.selectors):

980 continue

981 # Handle `of-type`

982 if n.of_type and not self.match_nth_tag_type(el, child):

983 continue

984 relative_index += 1

985 if relative_index == idx:

986 if child is el:

987 matched = True

988 else:

989 break

990 if child is el:

991 break

992 if child is el:

993 break

994 last_idx = idx

995 count += count_incr

996 if count < 0:

997 # Count is counting down and has now ventured into invalid territory.

998 break

999 idx = a * count + b if var else a

1000 if last_idx == idx:

1001 break

1002 if not matched:

1003 break

1004 return matched

1005

1006 def match_empty(self, el: bs4.Tag) -> bool:

1007 """Check if element is empty (if requested)."""

1008

1009 is_empty = True

1010 for child in self.get_children(el, tags=False):

1011 if self.is_tag(child):

1012 is_empty = False

1013 break

1014 elif self.is_content_string(child) and RE_NOT_EMPTY.search(child):

1015 is_empty = False

1016 break

1017 return is_empty

1018

1019 def match_subselectors(self, el: bs4.Tag, selectors: tuple[ct.SelectorList, ...]) -> bool:

1020 """Match selectors."""

1021

1022 match = True

1023 for sel in selectors:

1024 if not self.match_selectors(el, sel):

1025 match = False

1026 return match

1027

1028 def match_contains(self, el: bs4.Tag, contains: tuple[ct.SelectorContains, ...]) -> bool:

1029 """Match element if it contains text."""

1030

1031 match = True

1032 content = None # type: str | Sequence[str] | None

1033 for contain_list in contains:

1034 if content is None:

1035 if contain_list.own:

1036 content = self.get_own_text(el, no_iframe=self.is_html)

1037 else:

1038 content = self.get_text(el, no_iframe=self.is_html)

1039 found = False

1040 for text in contain_list.text:

1041 if contain_list.own:

1042 for c in content:

1043 if text in c:

1044 found = True

1045 break

1046 if found:

1047 break

1048 else:

1049 if text in content:

1050 found = True

1051 break

1052 if not found:

1053 match = False

1054 return match

1055

1056 def match_default(self, el: bs4.Tag) -> bool:

1057 """Match default."""

1058

1059 match = False

1060

1061 # Find this input's form

1062 form = None

1063 parent = self.get_parent(el, no_iframe=True)

1064 while parent and form is None:

1065 if self.get_tag(parent) == 'form' and self.is_html_tag(parent):

1066 form = parent

1067 else:

1068 parent = self.get_parent(parent, no_iframe=True)

1069

1070 # Look in form cache to see if we've already located its default button

1071 found_form = False

1072 for f, t in self.cached_default_forms:

1073 if f is form:

1074 found_form = True

1075 if t is el:

1076 match = True

1077 break

1078

1079 # We didn't have the form cached, so look for its default button

1080 if not found_form:

1081 for child in self.get_descendants(form, no_iframe=True):

1082 name = self.get_tag(child)

1083 # Can't do nested forms (haven't figured out why we never hit this)

1084 if name == 'form': # pragma: no cover

1085 break

1086 if name in ('input', 'button'):

1087 v = self.get_attribute_by_name(child, 'type', '')

1088 if v and util.lower(v) == 'submit':

1089 self.cached_default_forms.append((form, child))

1090 if el is child:

1091 match = True

1092 break

1093 return match

1094

1095 def match_indeterminate(self, el: bs4.Tag) -> bool:

1096 """Match default."""

1097

1098 match = False

1099 name = cast(str, self.get_attribute_by_name(el, 'name'))

1100

1101 def get_parent_form(el: bs4.Tag) -> bs4.Tag | None:

1102 """Find this input's form."""

1103 form = None

1104 parent = self.get_parent(el, no_iframe=True)

1105 while form is None:

1106 if self.get_tag(parent) == 'form' and self.is_html_tag(parent):

1107 form = parent

1108 break

1109 last_parent = parent

1110 parent = self.get_parent(parent, no_iframe=True)

1111 if parent is None:

1112 form = last_parent

1113 break

1114 return form

1115

1116 form = get_parent_form(el)

1117

1118 # Look in form cache to see if we've already evaluated that its fellow radio buttons are indeterminate

1119 found_form = False

1120 for f, n, i in self.cached_indeterminate_forms:

1121 if f is form and n == name:

1122 found_form = True

1123 if i is True:

1124 match = True

1125 break

1126

1127 # We didn't have the form cached, so validate that the radio button is indeterminate

1128 if not found_form:

1129 checked = False

1130 for child in self.get_descendants(form, no_iframe=True):

1131 if child is el:

1132 continue

1133 tag_name = self.get_tag(child)

1134 if tag_name == 'input':

1135 is_radio = False

1136 check = False

1137 has_name = False

1138 for k, v in self.iter_attributes(child):

1139 if util.lower(k) == 'type' and util.lower(v) == 'radio':

1140 is_radio = True

1141 elif util.lower(k) == 'name' and v == name:

1142 has_name = True

1143 elif util.lower(k) == 'checked':

1144 check = True

1145 if is_radio and check and has_name and get_parent_form(child) is form:

1146 checked = True

1147 break

1148 if checked:

1149 break

1150 if not checked:

1151 match = True

1152 self.cached_indeterminate_forms.append((form, name, match))

1153

1154 return match

1155

1156 def match_lang(self, el: bs4.Tag, langs: tuple[ct.SelectorLang, ...]) -> bool:

1157 """Match languages."""

1158

1159 match = False

1160 has_ns = self.supports_namespaces()

1161 root = self.root

1162 has_html_namespace = self.has_html_namespace

1163

1164 # Walk parents looking for `lang` (HTML) or `xml:lang` XML property.

1165 parent = el

1166 found_lang = None

1167 last = None

1168 while not found_lang:

1169 has_html_ns = self.has_html_ns(parent)

1170 for k, v in self.iter_attributes(parent):

1171 attr_ns, attr = self.split_namespace(parent, k)

1172 if (

1173 ((not has_ns or has_html_ns) and (util.lower(k) if not self.is_xml else k) == 'lang') or

1174 (

1175 has_ns and not has_html_ns and attr_ns == NS_XML and

1176 (util.lower(attr) if not self.is_xml and attr is not None else attr) == 'lang'

1177 )

1178 ):

1179 found_lang = v

1180 break

1181 last = parent

1182 parent = self.get_parent(parent, no_iframe=self.is_html)

1183

1184 if parent is None:

1185 root = last

1186 has_html_namespace = self.has_html_ns(root)

1187 parent = last

1188 break

1189

1190 # Use cached meta language.

1191 if found_lang is None and self.cached_meta_lang:

1192 for cache in self.cached_meta_lang:

1193 if root is cache[0]:

1194 found_lang = cache[1]

1195

1196 # If we couldn't find a language, and the document is HTML, look to meta to determine language.

1197 if found_lang is None and (not self.is_xml or (has_html_namespace and root.name == 'html')):

1198 # Find head

1199 found = False

1200 for tag in ('html', 'head'):

1201 found = False

1202 for child in self.get_children(parent, no_iframe=self.is_html):

1203 if self.get_tag(child) == tag and self.is_html_tag(child):

1204 found = True

1205 parent = child

1206 break

1207 if not found: # pragma: no cover

1208 break

1209

1210 # Search meta tags

1211 if found:

1212 for child in parent:

1213 if self.is_tag(child) and self.get_tag(child) == 'meta' and self.is_html_tag(parent):

1214 c_lang = False

1215 content = None

1216 for k, v in self.iter_attributes(child):

1217 if util.lower(k) == 'http-equiv' and util.lower(v) == 'content-language':

1218 c_lang = True

1219 if util.lower(k) == 'content':

1220 content = v

1221 if c_lang and content:

1222 found_lang = content

1223 self.cached_meta_lang.append((cast(str, root), cast(str, found_lang)))

1224 break

1225 if found_lang is not None:

1226 break

1227 if found_lang is None:

1228 self.cached_meta_lang.append((cast(str, root), ''))

1229

1230 # If we determined a language, compare.

1231 if found_lang is not None:

1232 for patterns in langs:

1233 match = False

1234 for pattern in patterns:

1235 if self.extended_language_filter(pattern, cast(str, found_lang)):

1236 match = True

1237 if not match:

1238 break

1239

1240 return match

1241

1242 def match_dir(self, el: bs4.Tag, directionality: int) -> bool:

1243 """Check directionality."""

1244

1245 # If we have to match both left and right, we can't match either.

1246 if directionality & ct.SEL_DIR_LTR and directionality & ct.SEL_DIR_RTL:

1247 return False

1248

1249 if el is None or not self.is_html_tag(el):

1250 return False

1251

1252 # Element has defined direction of left to right or right to left

1253 direction = DIR_MAP.get(util.lower(self.get_attribute_by_name(el, 'dir', '')), None)

1254 if direction not in (None, 0):

1255 return direction == directionality

1256

1257 # Element is the document element (the root) and no direction assigned, assume left to right.

1258 is_root = self.is_root(el)

1259 if is_root and direction is None:

1260 return ct.SEL_DIR_LTR == directionality

1261

1262 # If `input[type=telephone]` and no direction is assigned, assume left to right.

1263 name = self.get_tag(el)

1264 is_input = name == 'input'

1265 is_textarea = name == 'textarea'

1266 is_bdi = name == 'bdi'

1267 itype = util.lower(self.get_attribute_by_name(el, 'type', '')) if is_input else ''

1268 if is_input and itype == 'tel' and direction is None:

1269 return ct.SEL_DIR_LTR == directionality

1270

1271 # Auto handling for text inputs

1272 if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0:

1273 if is_textarea:

1274 value = ''.join(node for node in self.get_contents(el, no_iframe=True) if self.is_content_string(node))

1275 else:

1276 value = cast(str, self.get_attribute_by_name(el, 'value', ''))

1277 if value:

1278 for c in value:

1279 bidi = unicodedata.bidirectional(c)

1280 if bidi in ('AL', 'R', 'L'):

1281 direction = ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL

1282 return direction == directionality

1283 # Assume left to right

1284 return ct.SEL_DIR_LTR == directionality

1285 elif is_root:

1286 return ct.SEL_DIR_LTR == directionality

1287 return self.match_dir(self.get_parent(el, no_iframe=True), directionality)

1288

1289 # Auto handling for `bdi` and other non text inputs.

1290 if (is_bdi and direction is None) or direction == 0:

1291 direction = self.find_bidi(el)

1292 if direction is not None:

1293 return direction == directionality

1294 elif is_root:

1295 return ct.SEL_DIR_LTR == directionality

1296 return self.match_dir(self.get_parent(el, no_iframe=True), directionality)

1297

1298 # Match parents direction

1299 return self.match_dir(self.get_parent(el, no_iframe=True), directionality)

1300

1301 def match_range(self, el: bs4.Tag, condition: int) -> bool:

1302 """

1303 Match range.

1304

1305 Behavior is modeled after what we see in browsers. Browsers seem to evaluate

1306 if the value is out of range, and if not, it is in range. So a missing value

1307 will not evaluate out of range; therefore, value is in range. Personally, I

1308 feel like this should evaluate as neither in or out of range.

1309 """

1310

1311 out_of_range = False

1312

1313 itype = util.lower(self.get_attribute_by_name(el, 'type'))

1314 mn = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'min', None)))

1315 mx = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'max', None)))

1316

1317 # There is no valid min or max, so we cannot evaluate a range

1318 if mn is None and mx is None:

1319 return False

1320

1321 value = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'value', None)))

1322 if value is not None:

1323 if itype in ("date", "datetime-local", "month", "week", "number", "range"):

1324 if mn is not None and value < mn:

1325 out_of_range = True

1326 if not out_of_range and mx is not None and value > mx:

1327 out_of_range = True

1328 elif itype == "time":

1329 if mn is not None and mx is not None and mn > mx:

1330 # Time is periodic, so this is a reversed/discontinuous range

1331 if value < mn and value > mx:

1332 out_of_range = True

1333 else:

1334 if mn is not None and value < mn:

1335 out_of_range = True

1336 if not out_of_range and mx is not None and value > mx:

1337 out_of_range = True

1338

1339 return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range

1340

1341 def match_defined(self, el: bs4.Tag) -> bool:

1342 """

1343 Match defined.

1344

1345 `:defined` is related to custom elements in a browser.

1346

1347 - If the document is XML (not XHTML), all tags will match.

1348 - Tags that are not custom (don't have a hyphen) are marked defined.

1349 - If the tag has a prefix (without or without a namespace), it will not match.

1350

1351 This is of course requires the parser to provide us with the proper prefix and namespace info,

1352 if it doesn't, there is nothing we can do.

1353 """

1354

1355 name = self.get_tag(el)

1356 return (

1357 name is not None and (

1358 name.find('-') == -1 or

1359 name.find(':') != -1 or

1360 self.get_prefix(el) is not None

1361 )

1362 )

1363

1364 def match_placeholder_shown(self, el: bs4.Tag) -> bool:

1365 """

1366 Match placeholder shown according to HTML spec.

1367

1368 - text area should be checked if they have content. A single newline does not count as content.

1369

1370 """

1371

1372 match = False

1373 content = self.get_text(el)

1374 if content in ('', '\n'):

1375 match = True

1376

1377 return match

1378

1379 def match_selectors(self, el: bs4.Tag, selectors: ct.SelectorList) -> bool:

1380 """Check if element matches one of the selectors."""

1381

1382 match = False

1383 is_not = selectors.is_not

1384 is_html = selectors.is_html

1385

1386 # Internal selector lists that use the HTML flag, will automatically get the `html` namespace.

1387 if is_html:

1388 namespaces = self.namespaces

1389 iframe_restrict = self.iframe_restrict

1390 self.namespaces = {'html': NS_XHTML}

1391 self.iframe_restrict = True

1392

1393 if not is_html or self.is_html:

1394 for selector in selectors:

1395 match = is_not

1396 # We have a un-matchable situation (like `:focus` as you can focus an element in this environment)

1397 if isinstance(selector, ct.SelectorNull):

1398 continue

1399 # Verify tag matches

1400 if not self.match_tag(el, selector.tag):

1401 continue

1402 # Verify tag is defined

1403 if selector.flags & ct.SEL_DEFINED and not self.match_defined(el):

1404 continue

1405 # Verify element is root

1406 if selector.flags & ct.SEL_ROOT and not self.match_root(el):

1407 continue

1408 # Verify element is scope

1409 if selector.flags & ct.SEL_SCOPE and not self.match_scope(el):

1410 continue

1411 # Verify element has placeholder shown

1412 if selector.flags & ct.SEL_PLACEHOLDER_SHOWN and not self.match_placeholder_shown(el):

1413 continue

1414 # Verify `nth` matches

1415 if not self.match_nth(el, selector.nth):

1416 continue

1417 if selector.flags & ct.SEL_EMPTY and not self.match_empty(el):

1418 continue

1419 # Verify id matches

1420 if selector.ids and not self.match_id(el, selector.ids):

1421 continue

1422 # Verify classes match

1423 if selector.classes and not self.match_classes(el, selector.classes):

1424 continue

1425 # Verify attribute(s) match

1426 if not self.match_attributes(el, selector.attributes):

1427 continue

1428 # Verify ranges

1429 if selector.flags & RANGES and not self.match_range(el, selector.flags & RANGES):

1430 continue

1431 # Verify language patterns

1432 if selector.lang and not self.match_lang(el, selector.lang):

1433 continue

1434 # Verify pseudo selector patterns

1435 if selector.selectors and not self.match_subselectors(el, selector.selectors):

1436 continue

1437 # Verify relationship selectors

1438 if selector.relation and not self.match_relations(el, selector.relation):

1439 continue

1440 # Validate that the current default selector match corresponds to the first submit button in the form

1441 if selector.flags & ct.SEL_DEFAULT and not self.match_default(el):

1442 continue

1443 # Validate that the unset radio button is among radio buttons with the same name in a form that are

1444 # also not set.

1445 if selector.flags & ct.SEL_INDETERMINATE and not self.match_indeterminate(el):

1446 continue

1447 # Validate element directionality

1448 if selector.flags & DIR_FLAGS and not self.match_dir(el, selector.flags & DIR_FLAGS):

1449 continue

1450 # Validate that the tag contains the specified text.

1451 if selector.contains and not self.match_contains(el, selector.contains):

1452 continue

1453 match = not is_not

1454 break

1455

1456 # Restore actual namespaces being used for external selector lists

1457 if is_html:

1458 self.namespaces = namespaces

1459 self.iframe_restrict = iframe_restrict

1460

1461 return match

1462

1463 def select(self, limit: int = 0) -> Iterator[bs4.Tag]:

1464 """Match all tags under the targeted tag."""

1465

1466 lim = None if limit < 1 else limit

1467

1468 for child in self.get_descendants(self.tag):

1469 if self.match(child):

1470 yield child

1471 if lim is not None:

1472 lim -= 1

1473 if lim < 1:

1474 break

1475

1476 def closest(self) -> bs4.Tag | None:

1477 """Match closest ancestor."""

1478

1479 current = self.tag

1480 closest = None

1481 while closest is None and current is not None:

1482 if self.match(current):

1483 closest = current

1484 else:

1485 current = self.get_parent(current)

1486 return closest

1487

1488 def filter(self) -> list[bs4.Tag]: # noqa A001

1489 """Filter tag's children."""

1490

1491 return [tag for tag in self.get_contents(self.tag) if not self.is_navigable_string(tag) and self.match(tag)]

1492

1493 def match(self, el: bs4.Tag) -> bool:

1494 """Match."""

1495

1496 return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)

1497

1498

1499class SoupSieve(ct.Immutable):

1500 """Compiled Soup Sieve selector matching object."""

1501

1502 pattern: str

1503 selectors: ct.SelectorList

1504 namespaces: ct.Namespaces | None

1505 custom: dict[str, str]

1506 flags: int

1507

1508 __slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash")

1509

1510 def __init__(

1511 self,

1512 pattern: str,

1513 selectors: ct.SelectorList,

1514 namespaces: ct.Namespaces | None,

1515 custom: ct.CustomSelectors | None,

1516 flags: int

1517 ):

1518 """Initialize."""

1519

1520 super().__init__(

1521 pattern=pattern,

1522 selectors=selectors,

1523 namespaces=namespaces,

1524 custom=custom,

1525 flags=flags

1526 )

1527

1528 def match(self, tag: bs4.Tag) -> bool:

1529 """Match."""

1530

1531 return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag)

1532

1533 def closest(self, tag: bs4.Tag) -> bs4.Tag:

1534 """Match closest ancestor."""

1535

1536 return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest()

1537

1538 def filter(self, iterable: Iterable[bs4.Tag]) -> list[bs4.Tag]: # noqa A001

1539 """

1540 Filter.

1541

1542 `CSSMatch` can cache certain searches for tags of the same document,

1543 so if we are given a tag, all tags are from the same document,

1544 and we can take advantage of the optimization.

1545

1546 Any other kind of iterable could have tags from different documents or detached tags,

1547 so for those, we use a new `CSSMatch` for each item in the iterable.

1548 """

1549

1550 if CSSMatch.is_tag(iterable):

1551 return CSSMatch(self.selectors, iterable, self.namespaces, self.flags).filter()

1552 else:

1553 return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]

1554

1555 def select_one(self, tag: bs4.Tag) -> bs4.Tag:

1556 """Select a single tag."""

1557

1558 tags = self.select(tag, limit=1)

1559 return tags[0] if tags else None

1560

1561 def select(self, tag: bs4.Tag, limit: int = 0) -> list[bs4.Tag]:

1562 """Select the specified tags."""

1563

1564 return list(self.iselect(tag, limit))

1565

1566 def iselect(self, tag: bs4.Tag, limit: int = 0) -> Iterator[bs4.Tag]:

1567 """Iterate the specified tags."""

1568

1569 yield from CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit)

1570

1571 def __repr__(self) -> str: # pragma: no cover

1572 """Representation."""

1573

1574 return (

1575 f"SoupSieve(pattern={self.pattern!r}, namespaces={self.namespaces!r}, "

1576 f"custom={self.custom!r}, flags={self.flags!r})"

1577 )

1578

1579 __str__ = __repr__

1580

1581

1582ct.pickle_register(SoupSieve)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/soupsieve/css_match.py: 22%

953 statements