Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/bs4/css.py: 38%

39 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1"""Integration code for CSS selectors using Soup Sieve (pypi: soupsieve).""" 

2 

3import warnings 

4try: 

5 import soupsieve 

6except ImportError as e: 

7 soupsieve = None 

8 warnings.warn( 

9 'The soupsieve package is not installed. CSS selectors cannot be used.' 

10 ) 

11 

12 

13class CSS(object): 

14 """A proxy object against the soupsieve library, to simplify its 

15 CSS selector API. 

16 

17 Acquire this object through the .css attribute on the 

18 BeautifulSoup object, or on the Tag you want to use as the 

19 starting point for a CSS selector. 

20 

21 The main advantage of doing this is that the tag to be selected 

22 against doesn't need to be explicitly specified in the function 

23 calls, since it's already scoped to a tag. 

24 """ 

25 

26 def __init__(self, tag, api=soupsieve): 

27 """Constructor. 

28 

29 You don't need to instantiate this class yourself; instead, 

30 access the .css attribute on the BeautifulSoup object, or on 

31 the Tag you want to use as the starting point for your CSS 

32 selector. 

33 

34 :param tag: All CSS selectors will use this as their starting 

35 point. 

36 

37 :param api: A plug-in replacement for the soupsieve module, 

38 designed mainly for use in tests. 

39 """ 

40 if api is None: 

41 raise NotImplementedError( 

42 "Cannot execute CSS selectors because the soupsieve package is not installed." 

43 ) 

44 self.api = api 

45 self.tag = tag 

46 

47 def escape(self, ident): 

48 """Escape a CSS identifier. 

49 

50 This is a simple wrapper around soupselect.escape(). See the 

51 documentation for that function for more information. 

52 """ 

53 if soupsieve is None: 

54 raise NotImplementedError( 

55 "Cannot escape CSS identifiers because the soupsieve package is not installed." 

56 ) 

57 return self.api.escape(ident) 

58 

59 def _ns(self, ns, select): 

60 """Normalize a dictionary of namespaces.""" 

61 if not isinstance(select, self.api.SoupSieve) and ns is None: 

62 # If the selector is a precompiled pattern, it already has 

63 # a namespace context compiled in, which cannot be 

64 # replaced. 

65 ns = self.tag._namespaces 

66 return ns 

67 

68 def _rs(self, results): 

69 """Normalize a list of results to a Resultset. 

70 

71 A ResultSet is more consistent with the rest of Beautiful 

72 Soup's API, and ResultSet.__getattr__ has a helpful error 

73 message if you try to treat a list of results as a single 

74 result (a common mistake). 

75 """ 

76 # Import here to avoid circular import 

77 from bs4.element import ResultSet 

78 return ResultSet(None, results) 

79 

80 def compile(self, select, namespaces=None, flags=0, **kwargs): 

81 """Pre-compile a selector and return the compiled object. 

82 

83 :param selector: A CSS selector. 

84 

85 :param namespaces: A dictionary mapping namespace prefixes 

86 used in the CSS selector to namespace URIs. By default, 

87 Beautiful Soup will use the prefixes it encountered while 

88 parsing the document. 

89 

90 :param flags: Flags to be passed into Soup Sieve's 

91 soupsieve.compile() method. 

92 

93 :param kwargs: Keyword arguments to be passed into SoupSieve's 

94 soupsieve.compile() method. 

95 

96 :return: A precompiled selector object. 

97 :rtype: soupsieve.SoupSieve 

98 """ 

99 return self.api.compile( 

100 select, self._ns(namespaces, select), flags, **kwargs 

101 ) 

102 

103 def select_one(self, select, namespaces=None, flags=0, **kwargs): 

104 """Perform a CSS selection operation on the current Tag and return the 

105 first result. 

106 

107 This uses the Soup Sieve library. For more information, see 

108 that library's documentation for the soupsieve.select_one() 

109 method. 

110 

111 :param selector: A CSS selector. 

112 

113 :param namespaces: A dictionary mapping namespace prefixes 

114 used in the CSS selector to namespace URIs. By default, 

115 Beautiful Soup will use the prefixes it encountered while 

116 parsing the document. 

117 

118 :param flags: Flags to be passed into Soup Sieve's 

119 soupsieve.select_one() method. 

120 

121 :param kwargs: Keyword arguments to be passed into SoupSieve's 

122 soupsieve.select_one() method. 

123 

124 :return: A Tag, or None if the selector has no match. 

125 :rtype: bs4.element.Tag 

126 

127 """ 

128 return self.api.select_one( 

129 select, self.tag, self._ns(namespaces, select), flags, **kwargs 

130 ) 

131 

132 def select(self, select, namespaces=None, limit=0, flags=0, **kwargs): 

133 """Perform a CSS selection operation on the current Tag. 

134 

135 This uses the Soup Sieve library. For more information, see 

136 that library's documentation for the soupsieve.select() 

137 method. 

138 

139 :param selector: A string containing a CSS selector. 

140 

141 :param namespaces: A dictionary mapping namespace prefixes 

142 used in the CSS selector to namespace URIs. By default, 

143 Beautiful Soup will pass in the prefixes it encountered while 

144 parsing the document. 

145 

146 :param limit: After finding this number of results, stop looking. 

147 

148 :param flags: Flags to be passed into Soup Sieve's 

149 soupsieve.select() method. 

150 

151 :param kwargs: Keyword arguments to be passed into SoupSieve's 

152 soupsieve.select() method. 

153 

154 :return: A ResultSet of Tag objects. 

155 :rtype: bs4.element.ResultSet 

156 

157 """ 

158 if limit is None: 

159 limit = 0 

160 

161 return self._rs( 

162 self.api.select( 

163 select, self.tag, self._ns(namespaces, select), limit, flags, 

164 **kwargs 

165 ) 

166 ) 

167 

168 def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs): 

169 """Perform a CSS selection operation on the current Tag. 

170 

171 This uses the Soup Sieve library. For more information, see 

172 that library's documentation for the soupsieve.iselect() 

173 method. It is the same as select(), but it returns a generator 

174 instead of a list. 

175 

176 :param selector: A string containing a CSS selector. 

177 

178 :param namespaces: A dictionary mapping namespace prefixes 

179 used in the CSS selector to namespace URIs. By default, 

180 Beautiful Soup will pass in the prefixes it encountered while 

181 parsing the document. 

182 

183 :param limit: After finding this number of results, stop looking. 

184 

185 :param flags: Flags to be passed into Soup Sieve's 

186 soupsieve.iselect() method. 

187 

188 :param kwargs: Keyword arguments to be passed into SoupSieve's 

189 soupsieve.iselect() method. 

190 

191 :return: A generator 

192 :rtype: types.GeneratorType 

193 """ 

194 return self.api.iselect( 

195 select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs 

196 ) 

197 

198 def closest(self, select, namespaces=None, flags=0, **kwargs): 

199 """Find the Tag closest to this one that matches the given selector. 

200 

201 This uses the Soup Sieve library. For more information, see 

202 that library's documentation for the soupsieve.closest() 

203 method. 

204 

205 :param selector: A string containing a CSS selector. 

206 

207 :param namespaces: A dictionary mapping namespace prefixes 

208 used in the CSS selector to namespace URIs. By default, 

209 Beautiful Soup will pass in the prefixes it encountered while 

210 parsing the document. 

211 

212 :param flags: Flags to be passed into Soup Sieve's 

213 soupsieve.closest() method. 

214 

215 :param kwargs: Keyword arguments to be passed into SoupSieve's 

216 soupsieve.closest() method. 

217 

218 :return: A Tag, or None if there is no match. 

219 :rtype: bs4.Tag 

220 

221 """ 

222 return self.api.closest( 

223 select, self.tag, self._ns(namespaces, select), flags, **kwargs 

224 ) 

225 

226 def match(self, select, namespaces=None, flags=0, **kwargs): 

227 """Check whether this Tag matches the given CSS selector. 

228 

229 This uses the Soup Sieve library. For more information, see 

230 that library's documentation for the soupsieve.match() 

231 method. 

232 

233 :param: a CSS selector. 

234 

235 :param namespaces: A dictionary mapping namespace prefixes 

236 used in the CSS selector to namespace URIs. By default, 

237 Beautiful Soup will pass in the prefixes it encountered while 

238 parsing the document. 

239 

240 :param flags: Flags to be passed into Soup Sieve's 

241 soupsieve.match() method. 

242 

243 :param kwargs: Keyword arguments to be passed into SoupSieve's 

244 soupsieve.match() method. 

245 

246 :return: True if this Tag matches the selector; False otherwise. 

247 :rtype: bool 

248 """ 

249 return self.api.match( 

250 select, self.tag, self._ns(namespaces, select), flags, **kwargs 

251 ) 

252 

253 def filter(self, select, namespaces=None, flags=0, **kwargs): 

254 """Filter this Tag's direct children based on the given CSS selector. 

255 

256 This uses the Soup Sieve library. It works the same way as 

257 passing this Tag into that library's soupsieve.filter() 

258 method. More information, for more information see the 

259 documentation for soupsieve.filter(). 

260 

261 :param namespaces: A dictionary mapping namespace prefixes 

262 used in the CSS selector to namespace URIs. By default, 

263 Beautiful Soup will pass in the prefixes it encountered while 

264 parsing the document. 

265 

266 :param flags: Flags to be passed into Soup Sieve's 

267 soupsieve.filter() method. 

268 

269 :param kwargs: Keyword arguments to be passed into SoupSieve's 

270 soupsieve.filter() method. 

271 

272 :return: A ResultSet of Tag objects. 

273 :rtype: bs4.element.ResultSet 

274 

275 """ 

276 return self._rs( 

277 self.api.filter( 

278 select, self.tag, self._ns(namespaces, select), flags, **kwargs 

279 ) 

280 )