Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/common.py: 84%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

87 statements  

1# common.py 

2from .core import * 

3from .helpers import DelimitedList, any_open_tag, any_close_tag 

4from datetime import datetime 

5 

6 

7# some other useful expressions - using lower-case class name since we are really using this as a namespace 

8class pyparsing_common: 

9 """Here are some common low-level expressions that may be useful in 

10 jump-starting parser development: 

11 

12 - numeric forms (:class:`integers<integer>`, :class:`reals<real>`, 

13 :class:`scientific notation<sci_real>`) 

14 - common :class:`programming identifiers<identifier>` 

15 - network addresses (:class:`MAC<mac_address>`, 

16 :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`) 

17 - ISO8601 :class:`dates<iso8601_date>` and 

18 :class:`datetime<iso8601_datetime>` 

19 - :class:`UUID<uuid>` 

20 - :class:`comma-separated list<comma_separated_list>` 

21 - :class:`url` 

22 

23 Parse actions: 

24 

25 - :class:`convert_to_integer` 

26 - :class:`convert_to_float` 

27 - :class:`convert_to_date` 

28 - :class:`convert_to_datetime` 

29 - :class:`strip_html_tags` 

30 - :class:`upcase_tokens` 

31 - :class:`downcase_tokens` 

32 

33 Examples: 

34 

35 .. testcode:: 

36 

37 pyparsing_common.number.run_tests(''' 

38 # any int or real number, returned as the appropriate type 

39 100 

40 -100 

41 +100 

42 3.14159 

43 6.02e23 

44 1e-12 

45 ''') 

46 

47 .. testoutput:: 

48 :options: +NORMALIZE_WHITESPACE 

49 

50 

51 # any int or real number, returned as the appropriate type 

52 100 

53 [100] 

54 

55 -100 

56 [-100] 

57 

58 +100 

59 [100] 

60 

61 3.14159 

62 [3.14159] 

63 

64 6.02e23 

65 [6.02e+23] 

66 

67 1e-12 

68 [1e-12] 

69 

70 .. testcode:: 

71 

72 pyparsing_common.fnumber.run_tests(''' 

73 # any int or real number, returned as float 

74 100 

75 -100 

76 +100 

77 3.14159 

78 6.02e23 

79 1e-12 

80 ''') 

81 

82 .. testoutput:: 

83 :options: +NORMALIZE_WHITESPACE 

84 

85 

86 # any int or real number, returned as float 

87 100 

88 [100.0] 

89 

90 -100 

91 [-100.0] 

92 

93 +100 

94 [100.0] 

95 

96 3.14159 

97 [3.14159] 

98 

99 6.02e23 

100 [6.02e+23] 

101 

102 1e-12 

103 [1e-12] 

104 

105 .. testcode:: 

106 

107 pyparsing_common.hex_integer.run_tests(''' 

108 # hex numbers 

109 100 

110 FF 

111 ''') 

112 

113 .. testoutput:: 

114 :options: +NORMALIZE_WHITESPACE 

115 

116 

117 # hex numbers 

118 100 

119 [256] 

120 

121 FF 

122 [255] 

123 

124 .. testcode:: 

125 

126 pyparsing_common.fraction.run_tests(''' 

127 # fractions 

128 1/2 

129 -3/4 

130 ''') 

131 

132 .. testoutput:: 

133 :options: +NORMALIZE_WHITESPACE 

134 

135 

136 # fractions 

137 1/2 

138 [0.5] 

139 

140 -3/4 

141 [-0.75] 

142 

143 .. testcode:: 

144 

145 pyparsing_common.mixed_integer.run_tests(''' 

146 # mixed fractions 

147 1 

148 1/2 

149 -3/4 

150 1-3/4 

151 ''') 

152 

153 .. testoutput:: 

154 :options: +NORMALIZE_WHITESPACE 

155 

156 

157 # mixed fractions 

158 1 

159 [1] 

160 

161 1/2 

162 [0.5] 

163 

164 -3/4 

165 [-0.75] 

166 

167 1-3/4 

168 [1.75] 

169 .. testcode:: 

170 

171 import uuid 

172 pyparsing_common.uuid.set_parse_action(token_map(uuid.UUID)) 

173 pyparsing_common.uuid.run_tests(''' 

174 # uuid 

175 12345678-1234-5678-1234-567812345678 

176 ''') 

177 

178 .. testoutput:: 

179 :options: +NORMALIZE_WHITESPACE 

180 

181 

182 # uuid 

183 12345678-1234-5678-1234-567812345678 

184 [UUID('12345678-1234-5678-1234-567812345678')] 

185 """ 

186 

187 convert_to_integer = token_map(int) 

188 """ 

189 Parse action for converting parsed integers to Python int 

190 """ 

191 

192 convert_to_float = token_map(float) 

193 """ 

194 Parse action for converting parsed numbers to Python float 

195 """ 

196 

197 integer = Word(nums).set_name("integer").set_parse_action(convert_to_integer) 

198 """expression that parses an unsigned integer, returns an int""" 

199 

200 hex_integer = ( 

201 Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16)) 

202 ) 

203 """expression that parses a hexadecimal integer, returns an int""" 

204 

205 signed_integer = ( 

206 Regex(r"[+-]?\d+") 

207 .set_name("signed integer") 

208 .set_parse_action(convert_to_integer) 

209 ) 

210 """expression that parses an integer with optional leading sign, returns an int""" 

211 

212 fraction = ( 

213 signed_integer().set_parse_action(convert_to_float) 

214 + "/" 

215 + signed_integer().set_parse_action(convert_to_float) 

216 ).set_name("fraction") 

217 """fractional expression of an integer divided by an integer, returns a float""" 

218 fraction.add_parse_action(lambda tt: tt[0] / tt[-1]) 

219 

220 mixed_integer = ( 

221 fraction | signed_integer + Opt(Opt("-").suppress() + fraction) 

222 ).set_name("fraction or mixed integer-fraction") 

223 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" 

224 mixed_integer.add_parse_action(sum) 

225 

226 real = ( 

227 Regex(r"[+-]?(?:\d+\.\d*|\.\d+)") 

228 .set_name("real number") 

229 .set_parse_action(convert_to_float) 

230 ) 

231 """expression that parses a floating point number and returns a float""" 

232 

233 sci_real = ( 

234 Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)") 

235 .set_name("real number with scientific notation") 

236 .set_parse_action(convert_to_float) 

237 ) 

238 """expression that parses a floating point number with optional 

239 scientific notation and returns a float""" 

240 

241 # streamlining this expression makes the docs nicer-looking 

242 number = (sci_real | real | signed_integer).set_name("number").streamline() 

243 """any numeric expression, returns the corresponding Python type""" 

244 

245 fnumber = ( 

246 Regex(r"[+-]?\d+\.?\d*(?:[eE][+-]?\d+)?") 

247 .set_name("fnumber") 

248 .set_parse_action(convert_to_float) 

249 ) 

250 """any int or real number, returned as float""" 

251 

252 ieee_float = ( 

253 Regex(r"(?i:[+-]?(?:(?:\d+\.?\d*(?:e[+-]?\d+)?)|nan|inf(?:inity)?))") 

254 .set_name("ieee_float") 

255 .set_parse_action(convert_to_float) 

256 ) 

257 """any floating-point literal (int, real number, infinity, or NaN), returned as float""" 

258 

259 identifier = Word(identchars, identbodychars).set_name("identifier") 

260 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" 

261 

262 ipv4_address = Regex( 

263 r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}" 

264 ).set_name("IPv4 address") 

265 "IPv4 address (``0.0.0.0 - 255.255.255.255``)" 

266 

267 _ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer") 

268 _full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name( 

269 "full IPv6 address" 

270 ) 

271 _short_ipv6_address = ( 

272 Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6)) 

273 + "::" 

274 + Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6)) 

275 ).set_name("short IPv6 address") 

276 _short_ipv6_address.add_condition( 

277 lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8 

278 ) 

279 _mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address") 

280 ipv6_address = Combine( 

281 (_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name( 

282 "IPv6 address" 

283 ) 

284 ).set_name("IPv6 address") 

285 "IPv6 address (long, short, or mixed form)" 

286 

287 mac_address = Regex( 

288 r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}" 

289 ).set_name("MAC address") 

290 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" 

291 

292 @staticmethod 

293 def convert_to_date(fmt: str = "%Y-%m-%d"): 

294 """ 

295 Helper to create a parse action for converting parsed date string to Python datetime.date 

296 

297 Params - 

298 - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``) 

299 

300 Example: 

301 

302 .. testcode:: 

303 

304 date_expr = pyparsing_common.iso8601_date.copy() 

305 date_expr.set_parse_action(pyparsing_common.convert_to_date()) 

306 print(date_expr.parse_string("1999-12-31")) 

307 

308 prints: 

309 

310 .. testoutput:: 

311 

312 [datetime.date(1999, 12, 31)] 

313 """ 

314 

315 def cvt_fn(ss, ll, tt): 

316 try: 

317 return datetime.strptime(tt[0], fmt).date() 

318 except ValueError as ve: 

319 raise ParseException(ss, ll, str(ve)) 

320 

321 return cvt_fn 

322 

323 @staticmethod 

324 def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"): 

325 """Helper to create a parse action for converting parsed 

326 datetime string to Python datetime.datetime 

327 

328 Params - 

329 - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``) 

330 

331 Example: 

332 

333 .. testcode:: 

334 

335 dt_expr = pyparsing_common.iso8601_datetime.copy() 

336 dt_expr.set_parse_action(pyparsing_common.convert_to_datetime()) 

337 print(dt_expr.parse_string("1999-12-31T23:59:59.999")) 

338 

339 prints: 

340 

341 .. testoutput:: 

342 

343 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] 

344 """ 

345 

346 def cvt_fn(s, l, t): 

347 try: 

348 return datetime.strptime(t[0], fmt) 

349 except ValueError as ve: 

350 raise ParseException(s, l, str(ve)) 

351 

352 return cvt_fn 

353 

354 iso8601_date = Regex( 

355 r"(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?" 

356 ).set_name("ISO8601 date") 

357 "ISO8601 date (``yyyy-mm-dd``)" 

358 

359 iso8601_datetime = Regex( 

360 r"(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?" 

361 ).set_name("ISO8601 datetime") 

362 "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``" 

363 

364 uuid = Regex(r"[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name("UUID") 

365 "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)" 

366 

367 _html_stripper = any_open_tag.suppress() | any_close_tag.suppress() 

368 

369 @staticmethod 

370 def strip_html_tags(s: str, l: int, tokens: ParseResults): 

371 """Parse action to remove HTML tags from web page HTML source 

372 

373 Example: 

374 

375 .. testcode:: 

376 

377 # strip HTML links from normal text 

378 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' 

379 td, td_end = make_html_tags("TD") 

380 table_text = td + SkipTo(td_end).set_parse_action( 

381 pyparsing_common.strip_html_tags)("body") + td_end 

382 print(table_text.parse_string(text).body) 

383 

384 Prints: 

385 

386 .. testoutput:: 

387 

388 More info at the pyparsing wiki page 

389 """ 

390 return pyparsing_common._html_stripper.transform_string(tokens[0]) 

391 

392 _commasepitem = ( 

393 Combine( 

394 OneOrMore( 

395 ~Literal(",") 

396 + ~LineEnd() 

397 + Word(printables, exclude_chars=",") 

398 + Opt(White(" \t") + ~FollowedBy(LineEnd() | ",")) 

399 ) 

400 ) 

401 .streamline() 

402 .set_name("commaItem") 

403 ) 

404 comma_separated_list = DelimitedList( 

405 Opt(quoted_string.copy() | _commasepitem, default="") 

406 ).set_name("comma separated list") 

407 """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" 

408 

409 upcase_tokens = staticmethod(token_map(lambda t: t.upper())) 

410 """Parse action to convert tokens to upper case.""" 

411 

412 downcase_tokens = staticmethod(token_map(lambda t: t.lower())) 

413 """Parse action to convert tokens to lower case.""" 

414 

415 # fmt: off 

416 url = Regex( 

417 # https://mathiasbynens.be/demo/url-regex 

418 # https://gist.github.com/dperini/729294 

419 r"(?P<url>" + 

420 # protocol identifier (optional) 

421 # short syntax // still required 

422 r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)" + 

423 # user:pass BasicAuth (optional) 

424 r"(?:(?P<auth>\S+(?::\S*)?)@)?" + 

425 r"(?P<host>" + 

426 # IP address exclusion 

427 # private & local networks 

428 r"(?!(?:10|127)(?:\.\d{1,3}){3})" + 

429 r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" + 

430 r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" + 

431 # IP address dotted notation octets 

432 # excludes loopback network 0.0.0.0 

433 # excludes reserved space >= 224.0.0.0 

434 # excludes network & broadcast addresses 

435 # (first & last IP address of each class) 

436 r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" + 

437 r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" + 

438 r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" + 

439 r"|" + 

440 # host & domain names, may end with dot 

441 # can be replaced by a shortest alternative 

442 # (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+ 

443 r"(?:" + 

444 r"(?:" + 

445 r"[a-z0-9\u00a1-\uffff]" + 

446 r"[a-z0-9\u00a1-\uffff_-]{0,62}" + 

447 r")?" + 

448 r"[a-z0-9\u00a1-\uffff]\." + 

449 r")+" + 

450 # TLD identifier name, may end with dot 

451 r"(?:[a-z\u00a1-\uffff]{2,}\.?)" + 

452 r")" + 

453 # port number (optional) 

454 r"(:(?P<port>\d{2,5}))?" + 

455 # resource path (optional) 

456 r"(?P<path>\/[^?# ]*)?" + 

457 # query string (optional) 

458 r"(\?(?P<query>[^#]*))?" + 

459 # fragment (optional) 

460 r"(#(?P<fragment>\S*))?" + 

461 r")" 

462 ).set_name("url") 

463 """ 

464 URL (http/https/ftp scheme) 

465  

466 .. versionchanged:: 3.1.0 

467 ``url`` named group added 

468 """ 

469 # fmt: on 

470 

471 # pre-PEP8 compatibility names 

472 # fmt: off 

473 convertToInteger = staticmethod(replaced_by_pep8("convertToInteger", convert_to_integer)) 

474 convertToFloat = staticmethod(replaced_by_pep8("convertToFloat", convert_to_float)) 

475 convertToDate = staticmethod(replaced_by_pep8("convertToDate", convert_to_date)) 

476 convertToDatetime = staticmethod(replaced_by_pep8("convertToDatetime", convert_to_datetime)) 

477 stripHTMLTags = staticmethod(replaced_by_pep8("stripHTMLTags", strip_html_tags)) 

478 upcaseTokens = staticmethod(replaced_by_pep8("upcaseTokens", upcase_tokens)) 

479 downcaseTokens = staticmethod(replaced_by_pep8("downcaseTokens", downcase_tokens)) 

480 # fmt: on 

481 

482 

483_builtin_exprs = [ 

484 v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement) 

485]