Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/bs4/css.py: 43%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

51 statements  

1"""Integration code for CSS selectors using `Soup Sieve <https://facelessuser.github.io/soupsieve/>`_ (pypi: ``soupsieve``). 

2 

3Acquire a `CSS` object through the `element.Tag.css` attribute of 

4the starting point of your CSS selector, or (if you want to run a 

5selector against the entire document) of the `BeautifulSoup` object 

6itself. 

7 

8The main advantage of doing this instead of using ``soupsieve`` 

9functions is that you don't need to keep passing the `element.Tag` to be 

10selected against, since the `CSS` object is permanently scoped to that 

11`element.Tag`. 

12 

13""" 

14 

15from __future__ import annotations 

16 

17from types import ModuleType 

18from typing import ( 

19 Any, 

20 cast, 

21 Iterable, 

22 Iterator, 

23 MutableSequence, 

24 Optional, 

25 TYPE_CHECKING, 

26) 

27import warnings 

28from bs4._typing import _NamespaceMapping 

29 

30if TYPE_CHECKING: 

31 from soupsieve import SoupSieve 

32 from bs4 import element 

33 from bs4.element import ResultSet, Tag 

34 

35soupsieve: Optional[ModuleType] 

36try: 

37 import soupsieve 

38except ImportError: 

39 soupsieve = None 

40 warnings.warn( 

41 "The soupsieve package is not installed. CSS selectors cannot be used." 

42 ) 

43 

44 

45class CSS(object): 

46 """A proxy object against the ``soupsieve`` library, to simplify its 

47 CSS selector API. 

48 

49 You don't need to instantiate this class yourself; instead, use 

50 `element.Tag.css`. 

51 

52 :param tag: All CSS selectors run by this object will use this as 

53 their starting point. 

54 

55 :param api: An optional drop-in replacement for the ``soupsieve`` module, 

56 intended for use in unit tests. 

57 """ 

58 

59 def __init__(self, tag: element.Tag, api: Optional[ModuleType] = None): 

60 if api is None: 

61 api = soupsieve 

62 if api is None: 

63 raise NotImplementedError( 

64 "Cannot execute CSS selectors because the soupsieve package is not installed." 

65 ) 

66 self.api = api 

67 self.tag = tag 

68 

69 def escape(self, ident: str) -> str: 

70 """Escape a CSS identifier. 

71 

72 This is a simple wrapper around `soupsieve.escape() <https://facelessuser.github.io/soupsieve/api/#soupsieveescape>`_. See the 

73 documentation for that function for more information. 

74 """ 

75 if soupsieve is None: 

76 raise NotImplementedError( 

77 "Cannot escape CSS identifiers because the soupsieve package is not installed." 

78 ) 

79 return cast(str, self.api.escape(ident)) 

80 

81 def _ns( 

82 self, ns: Optional[_NamespaceMapping], select: str 

83 ) -> Optional[_NamespaceMapping]: 

84 """Normalize a dictionary of namespaces.""" 

85 if not isinstance(select, self.api.SoupSieve) and ns is None: 

86 # If the selector is a precompiled pattern, it already has 

87 # a namespace context compiled in, which cannot be 

88 # replaced. 

89 ns = self.tag._namespaces 

90 return ns 

91 

92 def _rs(self, results: MutableSequence[Tag]) -> ResultSet[Tag]: 

93 """Normalize a list of results to a py:class:`ResultSet`. 

94 

95 A py:class:`ResultSet` is more consistent with the rest of 

96 Beautiful Soup's API, and :py:meth:`ResultSet.__getattr__` has 

97 a helpful error message if you try to treat a list of results 

98 as a single result (a common mistake). 

99 """ 

100 # Import here to avoid circular import 

101 from bs4 import ResultSet 

102 

103 return ResultSet(None, results) 

104 

105 def compile( 

106 self, 

107 select: str, 

108 namespaces: Optional[_NamespaceMapping] = None, 

109 flags: int = 0, 

110 **kwargs: Any, 

111 ) -> SoupSieve: 

112 """Pre-compile a selector and return the compiled object. 

113 

114 :param selector: A CSS selector. 

115 

116 :param namespaces: A dictionary mapping namespace prefixes 

117 used in the CSS selector to namespace URIs. By default, 

118 Beautiful Soup will use the prefixes it encountered while 

119 parsing the document. 

120 

121 :param flags: Flags to be passed into Soup Sieve's 

122 `soupsieve.compile() <https://facelessuser.github.io/soupsieve/api/#soupsievecompile>`_ method. 

123 

124 :param kwargs: Keyword arguments to be passed into Soup Sieve's 

125 `soupsieve.compile() <https://facelessuser.github.io/soupsieve/api/#soupsievecompile>`_ method. 

126 

127 :return: A precompiled selector object. 

128 :rtype: soupsieve.SoupSieve 

129 """ 

130 return self.api.compile(select, self._ns(namespaces, select), flags, **kwargs) 

131 

132 def select_one( 

133 self, 

134 select: str, 

135 namespaces: Optional[_NamespaceMapping] = None, 

136 flags: int = 0, 

137 **kwargs: Any, 

138 ) -> element.Tag | None: 

139 """Perform a CSS selection operation on the current Tag and return the 

140 first result, if any. 

141 

142 This uses the Soup Sieve library. For more information, see 

143 that library's documentation for the `soupsieve.select_one() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect_one>`_ method. 

144 

145 :param selector: A CSS selector. 

146 

147 :param namespaces: A dictionary mapping namespace prefixes 

148 used in the CSS selector to namespace URIs. By default, 

149 Beautiful Soup will use the prefixes it encountered while 

150 parsing the document. 

151 

152 :param flags: Flags to be passed into Soup Sieve's 

153 `soupsieve.select_one() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect_one>`_ method. 

154 

155 :param kwargs: Keyword arguments to be passed into Soup Sieve's 

156 `soupsieve.select_one() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect_one>`_ method. 

157 """ 

158 return self.api.select_one( 

159 select, self.tag, self._ns(namespaces, select), flags, **kwargs 

160 ) 

161 

162 def select( 

163 self, 

164 select: str, 

165 namespaces: Optional[_NamespaceMapping] = None, 

166 limit: int = 0, 

167 flags: int = 0, 

168 **kwargs: Any, 

169 ) -> ResultSet[element.Tag]: 

170 """Perform a CSS selection operation on the current `element.Tag`. 

171 

172 This uses the Soup Sieve library. For more information, see 

173 that library's documentation for the `soupsieve.select() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect>`_ method. 

174 

175 :param selector: A CSS selector. 

176 

177 :param namespaces: A dictionary mapping namespace prefixes 

178 used in the CSS selector to namespace URIs. By default, 

179 Beautiful Soup will pass in the prefixes it encountered while 

180 parsing the document. 

181 

182 :param limit: After finding this number of results, stop looking. 

183 

184 :param flags: Flags to be passed into Soup Sieve's 

185 `soupsieve.select() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect>`_ method. 

186 

187 :param kwargs: Keyword arguments to be passed into Soup Sieve's 

188 `soupsieve.select() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect>`_ method. 

189 """ 

190 if limit is None: 

191 limit = 0 

192 

193 return self._rs( 

194 self.api.select( 

195 select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs 

196 ) 

197 ) 

198 

199 def iselect( 

200 self, 

201 select: str, 

202 namespaces: Optional[_NamespaceMapping] = None, 

203 limit: int = 0, 

204 flags: int = 0, 

205 **kwargs: Any, 

206 ) -> Iterator[element.Tag]: 

207 """Perform a CSS selection operation on the current `element.Tag`. 

208 

209 This uses the Soup Sieve library. For more information, see 

210 that library's documentation for the `soupsieve.iselect() 

211 <https://facelessuser.github.io/soupsieve/api/#soupsieveiselect>`_ 

212 method. It is the same as select(), but it returns a generator 

213 instead of a list. 

214 

215 :param selector: A string containing a CSS selector. 

216 

217 :param namespaces: A dictionary mapping namespace prefixes 

218 used in the CSS selector to namespace URIs. By default, 

219 Beautiful Soup will pass in the prefixes it encountered while 

220 parsing the document. 

221 

222 :param limit: After finding this number of results, stop looking. 

223 

224 :param flags: Flags to be passed into Soup Sieve's 

225 `soupsieve.iselect() <https://facelessuser.github.io/soupsieve/api/#soupsieveiselect>`_ method. 

226 

227 :param kwargs: Keyword arguments to be passed into Soup Sieve's 

228 `soupsieve.iselect() <https://facelessuser.github.io/soupsieve/api/#soupsieveiselect>`_ method. 

229 """ 

230 return self.api.iselect( 

231 select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs 

232 ) 

233 

234 def closest( 

235 self, 

236 select: str, 

237 namespaces: Optional[_NamespaceMapping] = None, 

238 flags: int = 0, 

239 **kwargs: Any, 

240 ) -> Optional[element.Tag]: 

241 """Find the `element.Tag` closest to this one that matches the given selector. 

242 

243 This uses the Soup Sieve library. For more information, see 

244 that library's documentation for the `soupsieve.closest() 

245 <https://facelessuser.github.io/soupsieve/api/#soupsieveclosest>`_ 

246 method. 

247 

248 :param selector: A string containing a CSS selector. 

249 

250 :param namespaces: A dictionary mapping namespace prefixes 

251 used in the CSS selector to namespace URIs. By default, 

252 Beautiful Soup will pass in the prefixes it encountered while 

253 parsing the document. 

254 

255 :param flags: Flags to be passed into Soup Sieve's 

256 `soupsieve.closest() <https://facelessuser.github.io/soupsieve/api/#soupsieveclosest>`_ method. 

257 

258 :param kwargs: Keyword arguments to be passed into Soup Sieve's 

259 `soupsieve.closest() <https://facelessuser.github.io/soupsieve/api/#soupsieveclosest>`_ method. 

260 

261 """ 

262 return self.api.closest( 

263 select, self.tag, self._ns(namespaces, select), flags, **kwargs 

264 ) 

265 

266 def match( 

267 self, 

268 select: str, 

269 namespaces: Optional[_NamespaceMapping] = None, 

270 flags: int = 0, 

271 **kwargs: Any, 

272 ) -> bool: 

273 """Check whether or not this `element.Tag` matches the given CSS selector. 

274 

275 This uses the Soup Sieve library. For more information, see 

276 that library's documentation for the `soupsieve.match() 

277 <https://facelessuser.github.io/soupsieve/api/#soupsievematch>`_ 

278 method. 

279 

280 :param: a CSS selector. 

281 

282 :param namespaces: A dictionary mapping namespace prefixes 

283 used in the CSS selector to namespace URIs. By default, 

284 Beautiful Soup will pass in the prefixes it encountered while 

285 parsing the document. 

286 

287 :param flags: Flags to be passed into Soup Sieve's 

288 `soupsieve.match() 

289 <https://facelessuser.github.io/soupsieve/api/#soupsievematch>`_ 

290 method. 

291 

292 :param kwargs: Keyword arguments to be passed into SoupSieve's 

293 `soupsieve.match() 

294 <https://facelessuser.github.io/soupsieve/api/#soupsievematch>`_ 

295 method. 

296 """ 

297 return cast( 

298 bool, 

299 self.api.match( 

300 select, self.tag, self._ns(namespaces, select), flags, **kwargs 

301 ), 

302 ) 

303 

304 def filter( 

305 self, 

306 select: str, 

307 namespaces: Optional[_NamespaceMapping] = None, 

308 flags: int = 0, 

309 **kwargs: Any, 

310 ) -> ResultSet[element.Tag]: 

311 """Filter this `element.Tag`'s direct children based on the given CSS selector. 

312 

313 This uses the Soup Sieve library. It works the same way as 

314 passing a `element.Tag` into that library's `soupsieve.filter() 

315 <https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_ 

316 method. For more information, see the documentation for 

317 `soupsieve.filter() 

318 <https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_. 

319 

320 :param namespaces: A dictionary mapping namespace prefixes 

321 used in the CSS selector to namespace URIs. By default, 

322 Beautiful Soup will pass in the prefixes it encountered while 

323 parsing the document. 

324 

325 :param flags: Flags to be passed into Soup Sieve's 

326 `soupsieve.filter() 

327 <https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_ 

328 method. 

329 

330 :param kwargs: Keyword arguments to be passed into SoupSieve's 

331 `soupsieve.filter() 

332 <https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_ 

333 method. 

334 """ 

335 return self._rs( 

336 self.api.filter( 

337 select, self.tag, self._ns(namespaces, select), flags, **kwargs 

338 ) 

339 )