Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/soupsieve/css

1"""CSS matcher."""

2from __future__ import annotations

3from datetime import datetime

4from . import util

5import re

6from . import css_types as ct

7import unicodedata

8import bs4 # type: ignore[import]

9from typing import Iterator, Iterable, Any, Callable, Sequence, cast # noqa: F401

11# Empty tag pattern (whitespace okay)

12RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')

14RE_NOT_WS = re.compile('[^ \t\r\n\f]+')

16# Relationships

17REL_PARENT = ' '

18REL_CLOSE_PARENT = '>'

19REL_SIBLING = '~'

20REL_CLOSE_SIBLING = '+'

22# Relationships for :has() (forward looking)

23REL_HAS_PARENT = ': '

24REL_HAS_CLOSE_PARENT = ':>'

25REL_HAS_SIBLING = ':~'

26REL_HAS_CLOSE_SIBLING = ':+'

28NS_XHTML = 'http://www.w3.org/1999/xhtml'

29NS_XML = 'http://www.w3.org/XML/1998/namespace'

31DIR_FLAGS = ct.SEL_DIR_LTR | ct.SEL_DIR_RTL

32RANGES = ct.SEL_IN_RANGE | ct.SEL_OUT_OF_RANGE

34DIR_MAP = {

35 'ltr': ct.SEL_DIR_LTR,

36 'rtl': ct.SEL_DIR_RTL,

37 'auto': 0

38}

40RE_NUM = re.compile(r"^(?P<value>-?(?:[0-9]{1,}(\.[0-9]+)?|\.[0-9]+))$")

41RE_TIME = re.compile(r'^(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$')

42RE_MONTH = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})$')

43RE_WEEK = re.compile(r'^(?P<year>[0-9]{4,})-W(?P<week>[0-9]{2})$')

44RE_DATE = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})$')

45RE_DATETIME = re.compile(

46 r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})T(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$'

47)

48RE_WILD_STRIP = re.compile(r'(?:(?:-\*-)(?:\*(?:-|$))*|-\*$)')

50MONTHS_30 = (4, 6, 9, 11) # April, June, September, and November

51FEB = 2

52SHORT_MONTH = 30

53LONG_MONTH = 31

54FEB_MONTH = 28

55FEB_LEAP_MONTH = 29

56DAYS_IN_WEEK = 7

59class _FakeParent:

60 """

61 Fake parent class.

63 When we have a fragment with no `BeautifulSoup` document object,

64 we can't evaluate `nth` selectors properly. Create a temporary

65 fake parent so we can traverse the root element as a child.

66 """

68 def __init__(self, element: bs4.Tag) -> None:

69 """Initialize."""

71 self.contents = [element]

73 def __len__(self) -> bs4.PageElement:

74 """Length."""

76 return len(self.contents)

79class _DocumentNav:

80 """Navigate a Beautiful Soup document."""

82 @classmethod

83 def assert_valid_input(cls, tag: Any) -> None:

84 """Check if valid input tag or document."""

86 # Fail on unexpected types.

87 if not cls.is_tag(tag):

88 raise TypeError("Expected a BeautifulSoup 'Tag', but instead received type {}".format(type(tag)))

90 @staticmethod

91 def is_doc(obj: bs4.Tag) -> bool:

92 """Is `BeautifulSoup` object."""

93 return isinstance(obj, bs4.BeautifulSoup)

95 @staticmethod

96 def is_tag(obj: bs4.PageElement) -> bool:

97 """Is tag."""

98 return isinstance(obj, bs4.Tag)

100 @staticmethod

101 def is_declaration(obj: bs4.PageElement) -> bool: # pragma: no cover

102 """Is declaration."""

103 return isinstance(obj, bs4.Declaration)

104

105 @staticmethod

106 def is_cdata(obj: bs4.PageElement) -> bool:

107 """Is CDATA."""

108 return isinstance(obj, bs4.CData)

109

110 @staticmethod

111 def is_processing_instruction(obj: bs4.PageElement) -> bool: # pragma: no cover

112 """Is processing instruction."""

113 return isinstance(obj, bs4.ProcessingInstruction)

114

115 @staticmethod

116 def is_navigable_string(obj: bs4.PageElement) -> bool:

117 """Is navigable string."""

118 return isinstance(obj, bs4.NavigableString)

119

120 @staticmethod

121 def is_special_string(obj: bs4.PageElement) -> bool:

122 """Is special string."""

123 return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))

124

125 @classmethod

126 def is_content_string(cls, obj: bs4.PageElement) -> bool:

127 """Check if node is content string."""

128

129 return cls.is_navigable_string(obj) and not cls.is_special_string(obj)

130

131 @staticmethod

132 def create_fake_parent(el: bs4.Tag) -> _FakeParent:

133 """Create fake parent for a given element."""

134

135 return _FakeParent(el)

136

137 @staticmethod

138 def is_xml_tree(el: bs4.Tag) -> bool:

139 """Check if element (or document) is from a XML tree."""

140

141 return bool(el._is_xml)

142

143 def is_iframe(self, el: bs4.Tag) -> bool:

144 """Check if element is an `iframe`."""

145

146 return bool(

147 ((el.name if self.is_xml_tree(el) else util.lower(el.name)) == 'iframe') and

148 self.is_html_tag(el) # type: ignore[attr-defined]

149 )

150

151 def is_root(self, el: bs4.Tag) -> bool:

152 """

153 Return whether element is a root element.

154

155 We check that the element is the root of the tree (which we have already pre-calculated),

156 and we check if it is the root element under an `iframe`.

157 """

158

159 root = self.root and self.root is el # type: ignore[attr-defined]

160 if not root:

161 parent = self.get_parent(el)

162 root = parent is not None and self.is_html and self.is_iframe(parent) # type: ignore[attr-defined]

163 return root

164

165 def get_contents(self, el: bs4.Tag, no_iframe: bool = False) -> Iterator[bs4.PageElement]:

166 """Get contents or contents in reverse."""

167 if not no_iframe or not self.is_iframe(el):

168 for content in el.contents:

169 yield content

170

171 def get_children(

172 self,

173 el: bs4.Tag,

174 start: int | None = None,

175 reverse: bool = False,

176 tags: bool = True,

177 no_iframe: bool = False

178 ) -> Iterator[bs4.PageElement]:

179 """Get children."""

180

181 if not no_iframe or not self.is_iframe(el):

182 last = len(el.contents) - 1

183 if start is None:

184 index = last if reverse else 0

185 else:

186 index = start

187 end = -1 if reverse else last + 1

188 incr = -1 if reverse else 1

189

190 if 0 <= index <= last:

191 while index != end:

192 node = el.contents[index]

193 index += incr

194 if not tags or self.is_tag(node):

195 yield node

196

197 def get_descendants(

198 self,

199 el: bs4.Tag,

200 tags: bool = True,

201 no_iframe: bool = False

202 ) -> Iterator[bs4.PageElement]:

203 """Get descendants."""

204

205 if not no_iframe or not self.is_iframe(el):

206 next_good = None

207 for child in el.descendants:

208

209 if next_good is not None:

210 if child is not next_good:

211 continue

212 next_good = None

213

214 is_tag = self.is_tag(child)

215

216 if no_iframe and is_tag and self.is_iframe(child):

217 if child.next_sibling is not None:

218 next_good = child.next_sibling

219 else:

220 last_child = child

221 while self.is_tag(last_child) and last_child.contents:

222 last_child = last_child.contents[-1]

223 next_good = last_child.next_element

224 yield child

225 if next_good is None:

226 break

227 # Coverage isn't seeing this even though it's executed

228 continue # pragma: no cover

229

230 if not tags or is_tag:

231 yield child

232

233 def get_parent(self, el: bs4.Tag, no_iframe: bool = False) -> bs4.Tag:

234 """Get parent."""

235

236 parent = el.parent

237 if no_iframe and parent is not None and self.is_iframe(parent):

238 parent = None

239 return parent

240

241 @staticmethod

242 def get_tag_name(el: bs4.Tag) -> str | None:

243 """Get tag."""

244

245 return cast('str | None', el.name)

246

247 @staticmethod

248 def get_prefix_name(el: bs4.Tag) -> str | None:

249 """Get prefix."""

250

251 return cast('str | None', el.prefix)

252

253 @staticmethod

254 def get_uri(el: bs4.Tag) -> str | None:

255 """Get namespace `URI`."""

256

257 return cast('str | None', el.namespace)

258

259 @classmethod

260 def get_next(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:

261 """Get next sibling tag."""

262

263 sibling = el.next_sibling

264 while tags and not cls.is_tag(sibling) and sibling is not None:

265 sibling = sibling.next_sibling

266 return sibling

267

268 @classmethod

269 def get_previous(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:

270 """Get previous sibling tag."""

271

272 sibling = el.previous_sibling

273 while tags and not cls.is_tag(sibling) and sibling is not None:

274 sibling = sibling.previous_sibling

275 return sibling

276

277 @staticmethod

278 def has_html_ns(el: bs4.Tag) -> bool:

279 """

280 Check if element has an HTML namespace.

281

282 This is a bit different than whether a element is treated as having an HTML namespace,

283 like we do in the case of `is_html_tag`.

284 """

285

286 ns = getattr(el, 'namespace') if el else None

287 return bool(ns and ns == NS_XHTML)

288

289 @staticmethod

290 def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[str | None, str | None]:

291 """Return namespace and attribute name without the prefix."""

292

293 return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)

294

295 @classmethod

296 def normalize_value(cls, value: Any) -> str | Sequence[str]:

297 """Normalize the value to be a string or list of strings."""

298

299 # Treat `None` as empty string.

300 if value is None:

301 return ''

302

303 # Pass through strings

304 if (isinstance(value, str)):

305 return value

306

307 # If it's a byte string, convert it to Unicode, treating it as UTF-8.

308 if isinstance(value, bytes):

309 return value.decode("utf8")

310

311 # BeautifulSoup supports sequences of attribute values, so make sure the children are strings.

312 if isinstance(value, Sequence):

313 new_value = []

314 for v in value:

315 if not isinstance(v, (str, bytes)) and isinstance(v, Sequence):

316 # This is most certainly a user error and will crash and burn later.

317 # To keep things working, we'll do what we do with all objects,

318 # And convert them to strings.

319 new_value.append(str(v))

320 else:

321 # Convert the child to a string

322 new_value.append(cast(str, cls.normalize_value(v)))

323 return new_value

324

325 # Try and make anything else a string

326 return str(value)

327

328 @classmethod

329 def get_attribute_by_name(

330 cls,

331 el: bs4.Tag,

332 name: str,

333 default: str | Sequence[str] | None = None

334 ) -> str | Sequence[str] | None:

335 """Get attribute by name."""

336

337 value = default

338 if el._is_xml:

339 try:

340 value = cls.normalize_value(el.attrs[name])

341 except KeyError:

342 pass

343 else:

344 for k, v in el.attrs.items():

345 if util.lower(k) == name:

346 value = cls.normalize_value(v)

347 break

348 return value

349

350 @classmethod

351 def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, str | Sequence[str] | None]]:

352 """Iterate attributes."""

353

354 for k, v in el.attrs.items():

355 yield k, cls.normalize_value(v)

356

357 @classmethod

358 def get_classes(cls, el: bs4.Tag) -> Sequence[str]:

359 """Get classes."""

360

361 classes = cls.get_attribute_by_name(el, 'class', [])

362 if isinstance(classes, str):

363 classes = RE_NOT_WS.findall(classes)

364 return cast(Sequence[str], classes)

365

366 def get_text(self, el: bs4.Tag, no_iframe: bool = False) -> str:

367 """Get text."""

368

369 return ''.join(

370 [node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]

371 )

372

373 def get_own_text(self, el: bs4.Tag, no_iframe: bool = False) -> list[str]:

374 """Get Own Text."""

375

376 return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]

377

378

379class Inputs:

380 """Class for parsing and validating input items."""

381

382 @staticmethod

383 def validate_day(year: int, month: int, day: int) -> bool:

384 """Validate day."""

385

386 max_days = LONG_MONTH

387 if month == FEB:

388 max_days = FEB_LEAP_MONTH if ((year % 4 == 0) and (year % 100 != 0)) or (year % 400 == 0) else FEB_MONTH

389 elif month in MONTHS_30:

390 max_days = SHORT_MONTH

391 return 1 <= day <= max_days

392

393 @staticmethod

394 def validate_week(year: int, week: int) -> bool:

395 """Validate week."""

396

397 max_week = datetime.strptime("{}-{}-{}".format(12, 31, year), "%m-%d-%Y").isocalendar()[1]

398 if max_week == 1:

399 max_week = 53

400 return 1 <= week <= max_week

401

402 @staticmethod

403 def validate_month(month: int) -> bool:

404 """Validate month."""

405

406 return 1 <= month <= 12

407

408 @staticmethod

409 def validate_year(year: int) -> bool:

410 """Validate year."""

411

412 return 1 <= year

413

414 @staticmethod

415 def validate_hour(hour: int) -> bool:

416 """Validate hour."""

417

418 return 0 <= hour <= 23

419

420 @staticmethod

421 def validate_minutes(minutes: int) -> bool:

422 """Validate minutes."""

423

424 return 0 <= minutes <= 59

425

426 @classmethod

427 def parse_value(cls, itype: str, value: str | None) -> tuple[float, ...] | None:

428 """Parse the input value."""

429

430 parsed = None # type: tuple[float, ...] | None

431 if value is None:

432 return value

433 if itype == "date":

434 m = RE_DATE.match(value)

435 if m:

436 year = int(m.group('year'), 10)

437 month = int(m.group('month'), 10)

438 day = int(m.group('day'), 10)

439 if cls.validate_year(year) and cls.validate_month(month) and cls.validate_day(year, month, day):

440 parsed = (year, month, day)

441 elif itype == "month":

442 m = RE_MONTH.match(value)

443 if m:

444 year = int(m.group('year'), 10)

445 month = int(m.group('month'), 10)

446 if cls.validate_year(year) and cls.validate_month(month):

447 parsed = (year, month)

448 elif itype == "week":

449 m = RE_WEEK.match(value)

450 if m:

451 year = int(m.group('year'), 10)

452 week = int(m.group('week'), 10)

453 if cls.validate_year(year) and cls.validate_week(year, week):

454 parsed = (year, week)

455 elif itype == "time":

456 m = RE_TIME.match(value)

457 if m:

458 hour = int(m.group('hour'), 10)

459 minutes = int(m.group('minutes'), 10)

460 if cls.validate_hour(hour) and cls.validate_minutes(minutes):

461 parsed = (hour, minutes)

462 elif itype == "datetime-local":

463 m = RE_DATETIME.match(value)

464 if m:

465 year = int(m.group('year'), 10)

466 month = int(m.group('month'), 10)

467 day = int(m.group('day'), 10)

468 hour = int(m.group('hour'), 10)

469 minutes = int(m.group('minutes'), 10)

470 if (

471 cls.validate_year(year) and cls.validate_month(month) and cls.validate_day(year, month, day) and

472 cls.validate_hour(hour) and cls.validate_minutes(minutes)

473 ):

474 parsed = (year, month, day, hour, minutes)

475 elif itype in ("number", "range"):

476 m = RE_NUM.match(value)

477 if m:

478 parsed = (float(m.group('value')),)

479 return parsed

480

481

482class CSSMatch(_DocumentNav):

483 """Perform CSS matching."""

484

485 def __init__(

486 self,

487 selectors: ct.SelectorList,

488 scope: bs4.Tag,

489 namespaces: ct.Namespaces | None,

490 flags: int

491 ) -> None:

492 """Initialize."""

493

494 self.assert_valid_input(scope)

495 self.tag = scope

496 self.cached_meta_lang = [] # type: list[tuple[str, str]]

497 self.cached_default_forms = [] # type: list[tuple[bs4.Tag, bs4.Tag]]

498 self.cached_indeterminate_forms = [] # type: list[tuple[bs4.Tag, str, bool]]

499 self.selectors = selectors

500 self.namespaces = {} if namespaces is None else namespaces # type: ct.Namespaces | dict[str, str]

501 self.flags = flags

502 self.iframe_restrict = False

503

504 # Find the root element for the whole tree

505 doc = scope

506 parent = self.get_parent(doc)

507 while parent:

508 doc = parent

509 parent = self.get_parent(doc)

510 root = None

511 if not self.is_doc(doc):

512 root = doc

513 else:

514 for child in self.get_children(doc):

515 root = child

516 break

517

518 self.root = root

519 self.scope = scope if scope is not doc else root

520 self.has_html_namespace = self.has_html_ns(root)

521

522 # A document can be both XML and HTML (XHTML)

523 self.is_xml = self.is_xml_tree(doc)

524 self.is_html = not self.is_xml or self.has_html_namespace

525

526 def supports_namespaces(self) -> bool:

527 """Check if namespaces are supported in the HTML type."""

528

529 return self.is_xml or self.has_html_namespace

530

531 def get_tag_ns(self, el: bs4.Tag) -> str:

532 """Get tag namespace."""

533

534 if self.supports_namespaces():

535 namespace = ''

536 ns = self.get_uri(el)

537 if ns:

538 namespace = ns

539 else:

540 namespace = NS_XHTML

541 return namespace

542

543 def is_html_tag(self, el: bs4.Tag) -> bool:

544 """Check if tag is in HTML namespace."""

545

546 return self.get_tag_ns(el) == NS_XHTML

547

548 def get_tag(self, el: bs4.Tag) -> str | None:

549 """Get tag."""

550

551 name = self.get_tag_name(el)

552 return util.lower(name) if name is not None and not self.is_xml else name

553

554 def get_prefix(self, el: bs4.Tag) -> str | None:

555 """Get prefix."""

556

557 prefix = self.get_prefix_name(el)

558 return util.lower(prefix) if prefix is not None and not self.is_xml else prefix

559

560 def find_bidi(self, el: bs4.Tag) -> int | None:

561 """Get directionality from element text."""

562

563 for node in self.get_children(el, tags=False):

564

565 # Analyze child text nodes

566 if self.is_tag(node):

567

568 # Avoid analyzing certain elements specified in the specification.

569 direction = DIR_MAP.get(util.lower(self.get_attribute_by_name(node, 'dir', '')), None)

570 if (

571 self.get_tag(node) in ('bdi', 'script', 'style', 'textarea', 'iframe') or

572 not self.is_html_tag(node) or

573 direction is not None

574 ):

575 continue # pragma: no cover

576

577 # Check directionality of this node's text

578 value = self.find_bidi(node)

579 if value is not None:

580 return value

581

582 # Direction could not be determined

583 continue # pragma: no cover

584

585 # Skip `doctype` comments, etc.

586 if self.is_special_string(node):

587 continue

588

589 # Analyze text nodes for directionality.

590 for c in node:

591 bidi = unicodedata.bidirectional(c)

592 if bidi in ('AL', 'R', 'L'):

593 return ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL

594 return None

595

596 def extended_language_filter(self, lang_range: str, lang_tag: str) -> bool:

597 """Filter the language tags."""

598

599 match = True

600 lang_range = RE_WILD_STRIP.sub('-', lang_range).lower()

601 ranges = lang_range.split('-')

602 subtags = lang_tag.lower().split('-')

603 length = len(ranges)

604 slength = len(subtags)

605 rindex = 0

606 sindex = 0

607 r = ranges[rindex]

608 s = subtags[sindex]

609

610 # Empty specified language should match unspecified language attributes

611 if length == 1 and slength == 1 and not r and r == s:

612 return True

613

614 # Primary tag needs to match

615 if (r != '*' and r != s) or (r == '*' and slength == 1 and not s):

616 match = False

617

618 rindex += 1

619 sindex += 1

620

621 # Match until we run out of ranges

622 while match and rindex < length:

623 r = ranges[rindex]

624 try:

625 s = subtags[sindex]

626 except IndexError:

627 # Ran out of subtags,

628 # but we still have ranges

629 match = False

630 continue

631

632 # Empty range

633 if not r:

634 match = False

635 continue

636

637 # Matched range

638 elif s == r:

639 rindex += 1

640

641 # Implicit wildcard cannot match

642 # singletons

643 elif len(s) == 1:

644 match = False

645 continue

646

647 # Implicitly matched, so grab next subtag

648 sindex += 1

649

650 return match

651

652 def match_attribute_name(

653 self,

654 el: bs4.Tag,

655 attr: str,

656 prefix: str | None

657 ) -> str | Sequence[str] | None:

658 """Match attribute name and return value if it exists."""

659

660 value = None

661 if self.supports_namespaces():

662 value = None

663 # If we have not defined namespaces, we can't very well find them, so don't bother trying.

664 if prefix:

665 ns = self.namespaces.get(prefix)

666 if ns is None and prefix != '*':

667 return None

668 else:

669 ns = None

670

671 for k, v in self.iter_attributes(el):

672

673 # Get attribute parts

674 namespace, name = self.split_namespace(el, k)

675

676 # Can't match a prefix attribute as we haven't specified one to match

677 # Try to match it normally as a whole `p:a` as selector may be trying `p\:a`.

678 if ns is None:

679 if (self.is_xml and attr == k) or (not self.is_xml and util.lower(attr) == util.lower(k)):

680 value = v

681 break

682 # Coverage is not finding this even though it is executed.

683 # Adding a print statement before this (and erasing coverage) causes coverage to find the line.

684 # Ignore the false positive message.

685 continue # pragma: no cover

686

687 # We can't match our desired prefix attribute as the attribute doesn't have a prefix

688 if namespace is None or ns != namespace and prefix != '*':

689 continue

690

691 # The attribute doesn't match.

692 if (util.lower(attr) != util.lower(name)) if not self.is_xml else (attr != name):

693 continue

694

695 value = v

696 break

697 else:

698 for k, v in self.iter_attributes(el):

699 if util.lower(attr) != util.lower(k):

700 continue

701 value = v

702 break

703 return value

704

705 def match_namespace(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:

706 """Match the namespace of the element."""

707

708 match = True

709 namespace = self.get_tag_ns(el)

710 default_namespace = self.namespaces.get('')

711 tag_ns = '' if tag.prefix is None else self.namespaces.get(tag.prefix)

712 # We must match the default namespace if one is not provided

713 if tag.prefix is None and (default_namespace is not None and namespace != default_namespace):

714 match = False

715 # If we specified `|tag`, we must not have a namespace.

716 elif (tag.prefix is not None and tag.prefix == '' and namespace):

717 match = False

718 # Verify prefix matches

719 elif (

720 tag.prefix and

721 tag.prefix != '*' and (tag_ns is None or namespace != tag_ns)

722 ):

723 match = False

724 return match

725

726 def match_attributes(self, el: bs4.Tag, attributes: tuple[ct.SelectorAttribute, ...]) -> bool:

727 """Match attributes."""

728

729 match = True

730 if attributes:

731 for a in attributes:

732 temp = self.match_attribute_name(el, a.attribute, a.prefix)

733 pattern = a.xml_type_pattern if self.is_xml and a.xml_type_pattern else a.pattern

734 if temp is None:

735 match = False

736 break

737 value = temp if isinstance(temp, str) else ' '.join(temp)

738 if pattern is None:

739 continue

740 elif pattern.match(value) is None:

741 match = False

742 break

743 return match

744

745 def match_tagname(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:

746 """Match tag name."""

747

748 name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name)

749 return not (

750 name is not None and

751 name not in (self.get_tag(el), '*')

752 )

753

754 def match_tag(self, el: bs4.Tag, tag: ct.SelectorTag | None) -> bool:

755 """Match the tag."""

756

757 match = True

758 if tag is not None:

759 # Verify namespace

760 if not self.match_namespace(el, tag):

761 match = False

762 if not self.match_tagname(el, tag):

763 match = False

764 return match

765

766 def match_past_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:

767 """Match past relationship."""

768

769 found = False

770 # I don't think this can ever happen, but it makes `mypy` happy

771 if isinstance(relation[0], ct.SelectorNull): # pragma: no cover

772 return found

773

774 if relation[0].rel_type == REL_PARENT:

775 parent = self.get_parent(el, no_iframe=self.iframe_restrict)

776 while not found and parent:

777 found = self.match_selectors(parent, relation)

778 parent = self.get_parent(parent, no_iframe=self.iframe_restrict)

779 elif relation[0].rel_type == REL_CLOSE_PARENT:

780 parent = self.get_parent(el, no_iframe=self.iframe_restrict)

781 if parent:

782 found = self.match_selectors(parent, relation)

783 elif relation[0].rel_type == REL_SIBLING:

784 sibling = self.get_previous(el)

785 while not found and sibling:

786 found = self.match_selectors(sibling, relation)

787 sibling = self.get_previous(sibling)

788 elif relation[0].rel_type == REL_CLOSE_SIBLING:

789 sibling = self.get_previous(el)

790 if sibling and self.is_tag(sibling):

791 found = self.match_selectors(sibling, relation)

792 return found

793

794 def match_future_child(self, parent: bs4.Tag, relation: ct.SelectorList, recursive: bool = False) -> bool:

795 """Match future child."""

796

797 match = False

798 if recursive:

799 children = self.get_descendants # type: Callable[..., Iterator[bs4.Tag]]

800 else:

801 children = self.get_children

802 for child in children(parent, no_iframe=self.iframe_restrict):

803 match = self.match_selectors(child, relation)

804 if match:

805 break

806 return match

807

808 def match_future_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:

809 """Match future relationship."""

810

811 found = False

812 # I don't think this can ever happen, but it makes `mypy` happy

813 if isinstance(relation[0], ct.SelectorNull): # pragma: no cover

814 return found

815

816 if relation[0].rel_type == REL_HAS_PARENT:

817 found = self.match_future_child(el, relation, True)

818 elif relation[0].rel_type == REL_HAS_CLOSE_PARENT:

819 found = self.match_future_child(el, relation)

820 elif relation[0].rel_type == REL_HAS_SIBLING:

821 sibling = self.get_next(el)

822 while not found and sibling:

823 found = self.match_selectors(sibling, relation)

824 sibling = self.get_next(sibling)

825 elif relation[0].rel_type == REL_HAS_CLOSE_SIBLING:

826 sibling = self.get_next(el)

827 if sibling and self.is_tag(sibling):

828 found = self.match_selectors(sibling, relation)

829 return found

830

831 def match_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:

832 """Match relationship to other elements."""

833

834 found = False

835

836 if isinstance(relation[0], ct.SelectorNull) or relation[0].rel_type is None:

837 return found

838

839 if relation[0].rel_type.startswith(':'):

840 found = self.match_future_relations(el, relation)

841 else:

842 found = self.match_past_relations(el, relation)

843

844 return found

845

846 def match_id(self, el: bs4.Tag, ids: tuple[str, ...]) -> bool:

847 """Match element's ID."""

848

849 found = True

850 for i in ids:

851 if i != self.get_attribute_by_name(el, 'id', ''):

852 found = False

853 break

854 return found

855

856 def match_classes(self, el: bs4.Tag, classes: tuple[str, ...]) -> bool:

857 """Match element's classes."""

858

859 current_classes = self.get_classes(el)

860 found = True

861 for c in classes:

862 if c not in current_classes:

863 found = False

864 break

865 return found

866

867 def match_root(self, el: bs4.Tag) -> bool:

868 """Match element as root."""

869

870 is_root = self.is_root(el)

871 if is_root:

872 sibling = self.get_previous(el, tags=False)

873 while is_root and sibling is not None:

874 if (

875 self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or

876 self.is_cdata(sibling)

877 ):

878 is_root = False

879 else:

880 sibling = self.get_previous(sibling, tags=False)

881 if is_root:

882 sibling = self.get_next(el, tags=False)

883 while is_root and sibling is not None:

884 if (

885 self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or

886 self.is_cdata(sibling)

887 ):

888 is_root = False

889 else:

890 sibling = self.get_next(sibling, tags=False)

891 return is_root

892

893 def match_scope(self, el: bs4.Tag) -> bool:

894 """Match element as scope."""

895

896 return self.scope is el

897

898 def match_nth_tag_type(self, el: bs4.Tag, child: bs4.Tag) -> bool:

899 """Match tag type for `nth` matches."""

900

901 return (

902 (self.get_tag(child) == self.get_tag(el)) and

903 (self.get_tag_ns(child) == self.get_tag_ns(el))

904 )

905

906 def match_nth(self, el: bs4.Tag, nth: bs4.Tag) -> bool:

907 """Match `nth` elements."""

908

909 matched = True

910

911 for n in nth:

912 matched = False

913 if n.selectors and not self.match_selectors(el, n.selectors):

914 break

915 parent = self.get_parent(el)

916 if parent is None:

917 parent = self.create_fake_parent(el)

918 last = n.last

919 last_index = len(parent) - 1

920 index = last_index if last else 0

921 relative_index = 0

922 a = n.a

923 b = n.b

924 var = n.n

925 count = 0

926 count_incr = 1

927 factor = -1 if last else 1

928 idx = last_idx = a * count + b if var else a

929

930 # We can only adjust bounds within a variable index

931 if var:

932 # Abort if our nth index is out of bounds and only getting further out of bounds as we increment.

933 # Otherwise, increment to try to get in bounds.

934 adjust = None

935 while idx < 1 or idx > last_index:

936 if idx < 0:

937 diff_low = 0 - idx

938 if adjust is not None and adjust == 1:

939 break

940 adjust = -1

941 count += count_incr

942 idx = last_idx = a * count + b if var else a

943 diff = 0 - idx

944 if diff >= diff_low:

945 break

946 else:

947 diff_high = idx - last_index

948 if adjust is not None and adjust == -1:

949 break

950 adjust = 1

951 count += count_incr

952 idx = last_idx = a * count + b if var else a

953 diff = idx - last_index

954 if diff >= diff_high:

955 break

956 diff_high = diff

957

958 # If a < 0, our count is working backwards, so floor the index by increasing the count.

959 # Find the count that yields the lowest, in bound value and use that.

960 # Lastly reverse count increment so that we'll increase our index.

961 lowest = count

962 if a < 0:

963 while idx >= 1:

964 lowest = count

965 count += count_incr

966 idx = last_idx = a * count + b if var else a

967 count_incr = -1

968 count = lowest

969 idx = last_idx = a * count + b if var else a

970

971 # Evaluate elements while our calculated nth index is still in range

972 while 1 <= idx <= last_index + 1:

973 child = None

974 # Evaluate while our child index is still in range.

975 for child in self.get_children(parent, start=index, reverse=factor < 0, tags=False):

976 index += factor

977 if not self.is_tag(child):

978 continue

979 # Handle `of S` in `nth-child`

980 if n.selectors and not self.match_selectors(child, n.selectors):

981 continue

982 # Handle `of-type`

983 if n.of_type and not self.match_nth_tag_type(el, child):

984 continue

985 relative_index += 1

986 if relative_index == idx:

987 if child is el:

988 matched = True

989 else:

990 break

991 if child is el:

992 break

993 if child is el:

994 break

995 last_idx = idx

996 count += count_incr

997 if count < 0:

998 # Count is counting down and has now ventured into invalid territory.

999 break

1000 idx = a * count + b if var else a

1001 if last_idx == idx:

1002 break

1003 if not matched:

1004 break

1005 return matched

1006

1007 def match_empty(self, el: bs4.Tag) -> bool:

1008 """Check if element is empty (if requested)."""

1009

1010 is_empty = True

1011 for child in self.get_children(el, tags=False):

1012 if self.is_tag(child):

1013 is_empty = False

1014 break

1015 elif self.is_content_string(child) and RE_NOT_EMPTY.search(child):

1016 is_empty = False

1017 break

1018 return is_empty

1019

1020 def match_subselectors(self, el: bs4.Tag, selectors: tuple[ct.SelectorList, ...]) -> bool:

1021 """Match selectors."""

1022

1023 match = True

1024 for sel in selectors:

1025 if not self.match_selectors(el, sel):

1026 match = False

1027 return match

1028

1029 def match_contains(self, el: bs4.Tag, contains: tuple[ct.SelectorContains, ...]) -> bool:

1030 """Match element if it contains text."""

1031

1032 match = True

1033 content = None # type: str | Sequence[str] | None

1034 for contain_list in contains:

1035 if content is None:

1036 if contain_list.own:

1037 content = self.get_own_text(el, no_iframe=self.is_html)

1038 else:

1039 content = self.get_text(el, no_iframe=self.is_html)

1040 found = False

1041 for text in contain_list.text:

1042 if contain_list.own:

1043 for c in content:

1044 if text in c:

1045 found = True

1046 break

1047 if found:

1048 break

1049 else:

1050 if text in content:

1051 found = True

1052 break

1053 if not found:

1054 match = False

1055 return match

1056

1057 def match_default(self, el: bs4.Tag) -> bool:

1058 """Match default."""

1059

1060 match = False

1061

1062 # Find this input's form

1063 form = None

1064 parent = self.get_parent(el, no_iframe=True)

1065 while parent and form is None:

1066 if self.get_tag(parent) == 'form' and self.is_html_tag(parent):

1067 form = parent

1068 else:

1069 parent = self.get_parent(parent, no_iframe=True)

1070

1071 # Look in form cache to see if we've already located its default button

1072 found_form = False

1073 for f, t in self.cached_default_forms:

1074 if f is form:

1075 found_form = True

1076 if t is el:

1077 match = True

1078 break

1079

1080 # We didn't have the form cached, so look for its default button

1081 if not found_form:

1082 for child in self.get_descendants(form, no_iframe=True):

1083 name = self.get_tag(child)

1084 # Can't do nested forms (haven't figured out why we never hit this)

1085 if name == 'form': # pragma: no cover

1086 break

1087 if name in ('input', 'button'):

1088 v = self.get_attribute_by_name(child, 'type', '')

1089 if v and util.lower(v) == 'submit':

1090 self.cached_default_forms.append((form, child))

1091 if el is child:

1092 match = True

1093 break

1094 return match

1095

1096 def match_indeterminate(self, el: bs4.Tag) -> bool:

1097 """Match default."""

1098

1099 match = False

1100 name = cast(str, self.get_attribute_by_name(el, 'name'))

1101

1102 def get_parent_form(el: bs4.Tag) -> bs4.Tag | None:

1103 """Find this input's form."""

1104 form = None

1105 parent = self.get_parent(el, no_iframe=True)

1106 while form is None:

1107 if self.get_tag(parent) == 'form' and self.is_html_tag(parent):

1108 form = parent

1109 break

1110 last_parent = parent

1111 parent = self.get_parent(parent, no_iframe=True)

1112 if parent is None:

1113 form = last_parent

1114 break

1115 return form

1116

1117 form = get_parent_form(el)

1118

1119 # Look in form cache to see if we've already evaluated that its fellow radio buttons are indeterminate

1120 found_form = False

1121 for f, n, i in self.cached_indeterminate_forms:

1122 if f is form and n == name:

1123 found_form = True

1124 if i is True:

1125 match = True

1126 break

1127

1128 # We didn't have the form cached, so validate that the radio button is indeterminate

1129 if not found_form:

1130 checked = False

1131 for child in self.get_descendants(form, no_iframe=True):

1132 if child is el:

1133 continue

1134 tag_name = self.get_tag(child)

1135 if tag_name == 'input':

1136 is_radio = False

1137 check = False

1138 has_name = False

1139 for k, v in self.iter_attributes(child):

1140 if util.lower(k) == 'type' and util.lower(v) == 'radio':

1141 is_radio = True

1142 elif util.lower(k) == 'name' and v == name:

1143 has_name = True

1144 elif util.lower(k) == 'checked':

1145 check = True

1146 if is_radio and check and has_name and get_parent_form(child) is form:

1147 checked = True

1148 break

1149 if checked:

1150 break

1151 if not checked:

1152 match = True

1153 self.cached_indeterminate_forms.append((form, name, match))

1154

1155 return match

1156

1157 def match_lang(self, el: bs4.Tag, langs: tuple[ct.SelectorLang, ...]) -> bool:

1158 """Match languages."""

1159

1160 match = False

1161 has_ns = self.supports_namespaces()

1162 root = self.root

1163 has_html_namespace = self.has_html_namespace

1164

1165 # Walk parents looking for `lang` (HTML) or `xml:lang` XML property.

1166 parent = el

1167 found_lang = None

1168 last = None

1169 while not found_lang:

1170 has_html_ns = self.has_html_ns(parent)

1171 for k, v in self.iter_attributes(parent):

1172 attr_ns, attr = self.split_namespace(parent, k)

1173 if (

1174 ((not has_ns or has_html_ns) and (util.lower(k) if not self.is_xml else k) == 'lang') or

1175 (

1176 has_ns and not has_html_ns and attr_ns == NS_XML and

1177 (util.lower(attr) if not self.is_xml and attr is not None else attr) == 'lang'

1178 )

1179 ):

1180 found_lang = v

1181 break

1182 last = parent

1183 parent = self.get_parent(parent, no_iframe=self.is_html)

1184

1185 if parent is None:

1186 root = last

1187 has_html_namespace = self.has_html_ns(root)

1188 parent = last

1189 break

1190

1191 # Use cached meta language.

1192 if found_lang is None and self.cached_meta_lang:

1193 for cache in self.cached_meta_lang:

1194 if root is cache[0]:

1195 found_lang = cache[1]

1196

1197 # If we couldn't find a language, and the document is HTML, look to meta to determine language.

1198 if found_lang is None and (not self.is_xml or (has_html_namespace and root.name == 'html')):

1199 # Find head

1200 found = False

1201 for tag in ('html', 'head'):

1202 found = False

1203 for child in self.get_children(parent, no_iframe=self.is_html):

1204 if self.get_tag(child) == tag and self.is_html_tag(child):

1205 found = True

1206 parent = child

1207 break

1208 if not found: # pragma: no cover

1209 break

1210

1211 # Search meta tags

1212 if found:

1213 for child in parent:

1214 if self.is_tag(child) and self.get_tag(child) == 'meta' and self.is_html_tag(parent):

1215 c_lang = False

1216 content = None

1217 for k, v in self.iter_attributes(child):

1218 if util.lower(k) == 'http-equiv' and util.lower(v) == 'content-language':

1219 c_lang = True

1220 if util.lower(k) == 'content':

1221 content = v

1222 if c_lang and content:

1223 found_lang = content

1224 self.cached_meta_lang.append((cast(str, root), cast(str, found_lang)))

1225 break

1226 if found_lang is not None:

1227 break

1228 if found_lang is None:

1229 self.cached_meta_lang.append((cast(str, root), ''))

1230

1231 # If we determined a language, compare.

1232 if found_lang is not None:

1233 for patterns in langs:

1234 match = False

1235 for pattern in patterns:

1236 if self.extended_language_filter(pattern, cast(str, found_lang)):

1237 match = True

1238 if not match:

1239 break

1240

1241 return match

1242

1243 def match_dir(self, el: bs4.Tag, directionality: int) -> bool:

1244 """Check directionality."""

1245

1246 # If we have to match both left and right, we can't match either.

1247 if directionality & ct.SEL_DIR_LTR and directionality & ct.SEL_DIR_RTL:

1248 return False

1249

1250 if el is None or not self.is_html_tag(el):

1251 return False

1252

1253 # Element has defined direction of left to right or right to left

1254 direction = DIR_MAP.get(util.lower(self.get_attribute_by_name(el, 'dir', '')), None)

1255 if direction not in (None, 0):

1256 return direction == directionality

1257

1258 # Element is the document element (the root) and no direction assigned, assume left to right.

1259 is_root = self.is_root(el)

1260 if is_root and direction is None:

1261 return ct.SEL_DIR_LTR == directionality

1262

1263 # If `input[type=telephone]` and no direction is assigned, assume left to right.

1264 name = self.get_tag(el)

1265 is_input = name == 'input'

1266 is_textarea = name == 'textarea'

1267 is_bdi = name == 'bdi'

1268 itype = util.lower(self.get_attribute_by_name(el, 'type', '')) if is_input else ''

1269 if is_input and itype == 'tel' and direction is None:

1270 return ct.SEL_DIR_LTR == directionality

1271

1272 # Auto handling for text inputs

1273 if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0:

1274 if is_textarea:

1275 temp = []

1276 for node in self.get_contents(el, no_iframe=True):

1277 if self.is_content_string(node):

1278 temp.append(node)

1279 value = ''.join(temp)

1280 else:

1281 value = cast(str, self.get_attribute_by_name(el, 'value', ''))

1282 if value:

1283 for c in value:

1284 bidi = unicodedata.bidirectional(c)

1285 if bidi in ('AL', 'R', 'L'):

1286 direction = ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL

1287 return direction == directionality

1288 # Assume left to right

1289 return ct.SEL_DIR_LTR == directionality

1290 elif is_root:

1291 return ct.SEL_DIR_LTR == directionality

1292 return self.match_dir(self.get_parent(el, no_iframe=True), directionality)

1293

1294 # Auto handling for `bdi` and other non text inputs.

1295 if (is_bdi and direction is None) or direction == 0:

1296 direction = self.find_bidi(el)

1297 if direction is not None:

1298 return direction == directionality

1299 elif is_root:

1300 return ct.SEL_DIR_LTR == directionality

1301 return self.match_dir(self.get_parent(el, no_iframe=True), directionality)

1302

1303 # Match parents direction

1304 return self.match_dir(self.get_parent(el, no_iframe=True), directionality)

1305

1306 def match_range(self, el: bs4.Tag, condition: int) -> bool:

1307 """

1308 Match range.

1309

1310 Behavior is modeled after what we see in browsers. Browsers seem to evaluate

1311 if the value is out of range, and if not, it is in range. So a missing value

1312 will not evaluate out of range; therefore, value is in range. Personally, I

1313 feel like this should evaluate as neither in or out of range.

1314 """

1315

1316 out_of_range = False

1317

1318 itype = util.lower(self.get_attribute_by_name(el, 'type'))

1319 mn = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'min', None)))

1320 mx = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'max', None)))

1321

1322 # There is no valid min or max, so we cannot evaluate a range

1323 if mn is None and mx is None:

1324 return False

1325

1326 value = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'value', None)))

1327 if value is not None:

1328 if itype in ("date", "datetime-local", "month", "week", "number", "range"):

1329 if mn is not None and value < mn:

1330 out_of_range = True

1331 if not out_of_range and mx is not None and value > mx:

1332 out_of_range = True

1333 elif itype == "time":

1334 if mn is not None and mx is not None and mn > mx:

1335 # Time is periodic, so this is a reversed/discontinuous range

1336 if value < mn and value > mx:

1337 out_of_range = True

1338 else:

1339 if mn is not None and value < mn:

1340 out_of_range = True

1341 if not out_of_range and mx is not None and value > mx:

1342 out_of_range = True

1343

1344 return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range

1345

1346 def match_defined(self, el: bs4.Tag) -> bool:

1347 """

1348 Match defined.

1349

1350 `:defined` is related to custom elements in a browser.

1351

1352 - If the document is XML (not XHTML), all tags will match.

1353 - Tags that are not custom (don't have a hyphen) are marked defined.

1354 - If the tag has a prefix (without or without a namespace), it will not match.

1355

1356 This is of course requires the parser to provide us with the proper prefix and namespace info,

1357 if it doesn't, there is nothing we can do.

1358 """

1359

1360 name = self.get_tag(el)

1361 return (

1362 name is not None and (

1363 name.find('-') == -1 or

1364 name.find(':') != -1 or

1365 self.get_prefix(el) is not None

1366 )

1367 )

1368

1369 def match_placeholder_shown(self, el: bs4.Tag) -> bool:

1370 """

1371 Match placeholder shown according to HTML spec.

1372

1373 - text area should be checked if they have content. A single newline does not count as content.

1374

1375 """

1376

1377 match = False

1378 content = self.get_text(el)

1379 if content in ('', '\n'):

1380 match = True

1381

1382 return match

1383

1384 def match_selectors(self, el: bs4.Tag, selectors: ct.SelectorList) -> bool:

1385 """Check if element matches one of the selectors."""

1386

1387 match = False

1388 is_not = selectors.is_not

1389 is_html = selectors.is_html

1390

1391 # Internal selector lists that use the HTML flag, will automatically get the `html` namespace.

1392 if is_html:

1393 namespaces = self.namespaces

1394 iframe_restrict = self.iframe_restrict

1395 self.namespaces = {'html': NS_XHTML}

1396 self.iframe_restrict = True

1397

1398 if not is_html or self.is_html:

1399 for selector in selectors:

1400 match = is_not

1401 # We have a un-matchable situation (like `:focus` as you can focus an element in this environment)

1402 if isinstance(selector, ct.SelectorNull):

1403 continue

1404 # Verify tag matches

1405 if not self.match_tag(el, selector.tag):

1406 continue

1407 # Verify tag is defined

1408 if selector.flags & ct.SEL_DEFINED and not self.match_defined(el):

1409 continue

1410 # Verify element is root

1411 if selector.flags & ct.SEL_ROOT and not self.match_root(el):

1412 continue

1413 # Verify element is scope

1414 if selector.flags & ct.SEL_SCOPE and not self.match_scope(el):

1415 continue

1416 # Verify element has placeholder shown

1417 if selector.flags & ct.SEL_PLACEHOLDER_SHOWN and not self.match_placeholder_shown(el):

1418 continue

1419 # Verify `nth` matches

1420 if not self.match_nth(el, selector.nth):

1421 continue

1422 if selector.flags & ct.SEL_EMPTY and not self.match_empty(el):

1423 continue

1424 # Verify id matches

1425 if selector.ids and not self.match_id(el, selector.ids):

1426 continue

1427 # Verify classes match

1428 if selector.classes and not self.match_classes(el, selector.classes):

1429 continue

1430 # Verify attribute(s) match

1431 if not self.match_attributes(el, selector.attributes):

1432 continue

1433 # Verify ranges

1434 if selector.flags & RANGES and not self.match_range(el, selector.flags & RANGES):

1435 continue

1436 # Verify language patterns

1437 if selector.lang and not self.match_lang(el, selector.lang):

1438 continue

1439 # Verify pseudo selector patterns

1440 if selector.selectors and not self.match_subselectors(el, selector.selectors):

1441 continue

1442 # Verify relationship selectors

1443 if selector.relation and not self.match_relations(el, selector.relation):

1444 continue

1445 # Validate that the current default selector match corresponds to the first submit button in the form

1446 if selector.flags & ct.SEL_DEFAULT and not self.match_default(el):

1447 continue

1448 # Validate that the unset radio button is among radio buttons with the same name in a form that are

1449 # also not set.

1450 if selector.flags & ct.SEL_INDETERMINATE and not self.match_indeterminate(el):

1451 continue

1452 # Validate element directionality

1453 if selector.flags & DIR_FLAGS and not self.match_dir(el, selector.flags & DIR_FLAGS):

1454 continue

1455 # Validate that the tag contains the specified text.

1456 if selector.contains and not self.match_contains(el, selector.contains):

1457 continue

1458 match = not is_not

1459 break

1460

1461 # Restore actual namespaces being used for external selector lists

1462 if is_html:

1463 self.namespaces = namespaces

1464 self.iframe_restrict = iframe_restrict

1465

1466 return match

1467

1468 def select(self, limit: int = 0) -> Iterator[bs4.Tag]:

1469 """Match all tags under the targeted tag."""

1470

1471 lim = None if limit < 1 else limit

1472

1473 for child in self.get_descendants(self.tag):

1474 if self.match(child):

1475 yield child

1476 if lim is not None:

1477 lim -= 1

1478 if lim < 1:

1479 break

1480

1481 def closest(self) -> bs4.Tag | None:

1482 """Match closest ancestor."""

1483

1484 current = self.tag

1485 closest = None

1486 while closest is None and current is not None:

1487 if self.match(current):

1488 closest = current

1489 else:

1490 current = self.get_parent(current)

1491 return closest

1492

1493 def filter(self) -> list[bs4.Tag]: # noqa A001

1494 """Filter tag's children."""

1495

1496 return [tag for tag in self.get_contents(self.tag) if not self.is_navigable_string(tag) and self.match(tag)]

1497

1498 def match(self, el: bs4.Tag) -> bool:

1499 """Match."""

1500

1501 return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)

1502

1503

1504class SoupSieve(ct.Immutable):

1505 """Compiled Soup Sieve selector matching object."""

1506

1507 pattern: str

1508 selectors: ct.SelectorList

1509 namespaces: ct.Namespaces | None

1510 custom: dict[str, str]

1511 flags: int

1512

1513 __slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash")

1514

1515 def __init__(

1516 self,

1517 pattern: str,

1518 selectors: ct.SelectorList,

1519 namespaces: ct.Namespaces | None,

1520 custom: ct.CustomSelectors | None,

1521 flags: int

1522 ):

1523 """Initialize."""

1524

1525 super().__init__(

1526 pattern=pattern,

1527 selectors=selectors,

1528 namespaces=namespaces,

1529 custom=custom,

1530 flags=flags

1531 )

1532

1533 def match(self, tag: bs4.Tag) -> bool:

1534 """Match."""

1535

1536 return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag)

1537

1538 def closest(self, tag: bs4.Tag) -> bs4.Tag:

1539 """Match closest ancestor."""

1540

1541 return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest()

1542

1543 def filter(self, iterable: Iterable[bs4.Tag]) -> list[bs4.Tag]: # noqa A001

1544 """

1545 Filter.

1546

1547 `CSSMatch` can cache certain searches for tags of the same document,

1548 so if we are given a tag, all tags are from the same document,

1549 and we can take advantage of the optimization.

1550

1551 Any other kind of iterable could have tags from different documents or detached tags,

1552 so for those, we use a new `CSSMatch` for each item in the iterable.

1553 """

1554

1555 if CSSMatch.is_tag(iterable):

1556 return CSSMatch(self.selectors, iterable, self.namespaces, self.flags).filter()

1557 else:

1558 return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]

1559

1560 def select_one(self, tag: bs4.Tag) -> bs4.Tag:

1561 """Select a single tag."""

1562

1563 tags = self.select(tag, limit=1)

1564 return tags[0] if tags else None

1565

1566 def select(self, tag: bs4.Tag, limit: int = 0) -> list[bs4.Tag]:

1567 """Select the specified tags."""

1568

1569 return list(self.iselect(tag, limit))

1570

1571 def iselect(self, tag: bs4.Tag, limit: int = 0) -> Iterator[bs4.Tag]:

1572 """Iterate the specified tags."""

1573

1574 for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit):

1575 yield el

1576

1577 def __repr__(self) -> str: # pragma: no cover

1578 """Representation."""

1579

1580 return "SoupSieve(pattern={!r}, namespaces={!r}, custom={!r}, flags={!r})".format(

1581 self.pattern,

1582 self.namespaces,

1583 self.custom,

1584 self.flags

1585 )

1586

1587 __str__ = __repr__

1588

1589

1590ct.pickle_register(SoupSieve)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/soupsieve/css_match.py: 17%

959 statements