Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/common.py: 81%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

93 statements  

1# common.py 

2from .core import * 

3from .helpers import DelimitedList, any_open_tag, any_close_tag 

4from datetime import datetime 

5import sys 

6 

7PY_310 = sys.version_info >= (3, 10) 

8 

9 

10# some other useful expressions - using lower-case class name since we are really using this as a namespace 

11class pyparsing_common: 

12 """Here are some common low-level expressions that may be useful in 

13 jump-starting parser development: 

14 

15 - numeric forms (:class:`integers<integer>`, :class:`reals<real>`, 

16 :class:`scientific notation<sci_real>`) 

17 - common :class:`programming identifiers<identifier>` 

18 - network addresses (:class:`MAC<mac_address>`, 

19 :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`) 

20 - ISO8601 :class:`dates<iso8601_date>` and 

21 :class:`datetime<iso8601_datetime>` 

22 - :class:`UUID<uuid>` 

23 - :class:`comma-separated list<comma_separated_list>` 

24 - :class:`url` 

25 

26 Parse actions: 

27 

28 - :class:`convert_to_integer` 

29 - :class:`convert_to_float` 

30 - :class:`convert_to_date` 

31 - :class:`convert_to_datetime` 

32 - :class:`strip_html_tags` 

33 - :class:`upcase_tokens` 

34 - :class:`downcase_tokens` 

35 

36 Examples: 

37 

38 .. testcode:: 

39 

40 pyparsing_common.number.run_tests(''' 

41 # any int or real number, returned as the appropriate type 

42 100 

43 -100 

44 +100 

45 3.14159 

46 6.02e23 

47 1e-12 

48 ''') 

49 

50 .. testoutput:: 

51 :options: +NORMALIZE_WHITESPACE 

52 

53 

54 # any int or real number, returned as the appropriate type 

55 100 

56 [100] 

57 

58 -100 

59 [-100] 

60 

61 +100 

62 [100] 

63 

64 3.14159 

65 [3.14159] 

66 

67 6.02e23 

68 [6.02e+23] 

69 

70 1e-12 

71 [1e-12] 

72 

73 .. testcode:: 

74 

75 pyparsing_common.fnumber.run_tests(''' 

76 # any int or real number, returned as float 

77 100 

78 -100 

79 +100 

80 3.14159 

81 6.02e23 

82 1e-12 

83 ''') 

84 

85 .. testoutput:: 

86 :options: +NORMALIZE_WHITESPACE 

87 

88 

89 # any int or real number, returned as float 

90 100 

91 [100.0] 

92 

93 -100 

94 [-100.0] 

95 

96 +100 

97 [100.0] 

98 

99 3.14159 

100 [3.14159] 

101 

102 6.02e23 

103 [6.02e+23] 

104 

105 1e-12 

106 [1e-12] 

107 

108 .. testcode:: 

109 

110 pyparsing_common.hex_integer.run_tests(''' 

111 # hex numbers 

112 100 

113 FF 

114 ''') 

115 

116 .. testoutput:: 

117 :options: +NORMALIZE_WHITESPACE 

118 

119 

120 # hex numbers 

121 100 

122 [256] 

123 

124 FF 

125 [255] 

126 

127 .. testcode:: 

128 

129 pyparsing_common.fraction.run_tests(''' 

130 # fractions 

131 1/2 

132 -3/4 

133 ''') 

134 

135 .. testoutput:: 

136 :options: +NORMALIZE_WHITESPACE 

137 

138 

139 # fractions 

140 1/2 

141 [0.5] 

142 

143 -3/4 

144 [-0.75] 

145 

146 .. testcode:: 

147 

148 pyparsing_common.mixed_integer.run_tests(''' 

149 # mixed fractions 

150 1 

151 1/2 

152 -3/4 

153 1-3/4 

154 ''') 

155 

156 .. testoutput:: 

157 :options: +NORMALIZE_WHITESPACE 

158 

159 

160 # mixed fractions 

161 1 

162 [1] 

163 

164 1/2 

165 [0.5] 

166 

167 -3/4 

168 [-0.75] 

169 

170 1-3/4 

171 [1.75] 

172 .. testcode:: 

173 

174 import uuid 

175 pyparsing_common.uuid.set_parse_action(token_map(uuid.UUID)) 

176 pyparsing_common.uuid.run_tests(''' 

177 # uuid 

178 12345678-1234-5678-1234-567812345678 

179 ''') 

180 

181 .. testoutput:: 

182 :options: +NORMALIZE_WHITESPACE 

183 

184 

185 # uuid 

186 12345678-1234-5678-1234-567812345678 

187 [UUID('12345678-1234-5678-1234-567812345678')] 

188 """ 

189 

190 @staticmethod 

191 def convert_to_integer(_, __, t): 

192 """ 

193 Parse action for converting parsed integers to Python int 

194 """ 

195 return [int(tt) for tt in t] 

196 

197 @staticmethod 

198 def convert_to_float(_, __, t): 

199 """ 

200 Parse action for converting parsed numbers to Python float 

201 """ 

202 return [float(tt) for tt in t] 

203 

204 integer = ( 

205 Word(nums) 

206 .set_name("integer") 

207 .set_parse_action( 

208 convert_to_integer 

209 if PY_310 

210 else lambda t: [int(tt) for tt in t] # type: ignore[misc] 

211 ) 

212 ) 

213 """expression that parses an unsigned integer, converts to an int""" 

214 

215 hex_integer = ( 

216 Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16)) 

217 ) 

218 """expression that parses a hexadecimal integer, converts to an int""" 

219 

220 signed_integer = ( 

221 Regex(r"[+-]?\d+") 

222 .set_name("signed integer") 

223 .set_parse_action( 

224 convert_to_integer 

225 if PY_310 

226 else lambda t: [int(tt) for tt in t] # type: ignore[misc] 

227 ) 

228 ) 

229 """expression that parses an integer with optional leading sign, converts to an int""" 

230 

231 fraction = ( 

232 signed_integer().set_parse_action( 

233 convert_to_float 

234 if PY_310 

235 else lambda t: [float(tt) for tt in t] # type: ignore[misc] 

236 ) 

237 + "/" 

238 + signed_integer().set_parse_action( 

239 convert_to_float 

240 if PY_310 

241 else lambda t: [float(tt) for tt in t] # type: ignore[misc] 

242 ) 

243 ).set_name("fraction") 

244 """fractional expression of an integer divided by an integer, converts to a float""" 

245 fraction.add_parse_action(lambda tt: tt[0] / tt[-1]) 

246 

247 mixed_integer = ( 

248 fraction | signed_integer + Opt(Opt("-").suppress() + fraction) 

249 ).set_name("fraction or mixed integer-fraction") 

250 """mixed integer of the form 'integer - fraction', with optional leading integer, converts to a float""" 

251 mixed_integer.add_parse_action(sum) 

252 

253 real = ( 

254 Regex(r"[+-]?(?:\d+\.\d*|\.\d+)") 

255 .set_name("real number") 

256 .set_parse_action( 

257 convert_to_float 

258 if PY_310 

259 else lambda t: [float(tt) for tt in t] # type: ignore[misc] 

260 ) 

261 ) 

262 """expression that parses a floating point number, converts to a float""" 

263 

264 sci_real = ( 

265 Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)") 

266 .set_name("real number with scientific notation") 

267 .set_parse_action( 

268 convert_to_float 

269 if PY_310 

270 else lambda t: [float(tt) for tt in t] # type: ignore[misc] 

271 ) 

272 ) 

273 """expression that parses a floating point number with optional 

274 scientific notation, converts to a float""" 

275 

276 # streamlining this expression makes the docs nicer-looking 

277 number = (sci_real | real | signed_integer).set_name("number").streamline() 

278 """any numeric expression, converts to the corresponding Python type""" 

279 

280 fnumber = ( 

281 Regex(r"[+-]?\d+\.?\d*(?:[eE][+-]?\d+)?") 

282 .set_name("fnumber") 

283 .set_parse_action( 

284 convert_to_float 

285 if PY_310 

286 else lambda t: [float(tt) for tt in t] # type: ignore[misc] 

287 ) 

288 ) 

289 """any int or real number, always converts to a float""" 

290 

291 ieee_float = ( 

292 Regex(r"(?i:[+-]?(?:(?:\d+\.?\d*(?:e[+-]?\d+)?)|nan|inf(?:inity)?))") 

293 .set_name("ieee_float") 

294 .set_parse_action( 

295 convert_to_float 

296 if PY_310 

297 else lambda t: [float(tt) for tt in t] # type: ignore[misc] 

298 ) 

299 ) 

300 """any floating-point literal (int, real number, infinity, or NaN), converts to a float""" 

301 

302 identifier = Word(identchars, identbodychars).set_name("identifier") 

303 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" 

304 

305 ipv4_address = Regex( 

306 r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}" 

307 ).set_name("IPv4 address") 

308 "IPv4 address (``0.0.0.0 - 255.255.255.255``)" 

309 

310 _ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer") 

311 _full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name( 

312 "full IPv6 address" 

313 ) 

314 _short_ipv6_address = ( 

315 Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6)) 

316 + "::" 

317 + Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6)) 

318 ).set_name("short IPv6 address") 

319 _short_ipv6_address.add_condition( 

320 lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8 

321 ) 

322 _mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address") 

323 ipv6_address = Combine( 

324 (_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name( 

325 "IPv6 address" 

326 ) 

327 ).set_name("IPv6 address") 

328 "IPv6 address (long, short, or mixed form)" 

329 

330 mac_address = Regex( 

331 r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}" 

332 ).set_name("MAC address") 

333 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" 

334 

335 @staticmethod 

336 def convert_to_date(fmt: str = "%Y-%m-%d"): 

337 """ 

338 Helper to create a parse action for converting parsed date string to Python datetime.date 

339 

340 Params - 

341 - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``) 

342 

343 Example: 

344 

345 .. testcode:: 

346 

347 date_expr = pyparsing_common.iso8601_date.copy() 

348 date_expr.set_parse_action(pyparsing_common.convert_to_date()) 

349 print(date_expr.parse_string("1999-12-31")) 

350 

351 prints: 

352 

353 .. testoutput:: 

354 

355 [datetime.date(1999, 12, 31)] 

356 """ 

357 

358 def cvt_fn(ss, ll, tt): 

359 try: 

360 return datetime.strptime(tt[0], fmt).date() 

361 except ValueError as ve: 

362 raise ParseException(ss, ll, str(ve)) 

363 

364 return cvt_fn 

365 

366 @staticmethod 

367 def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"): 

368 """Helper to create a parse action for converting parsed 

369 datetime string to Python datetime.datetime 

370 

371 Params - 

372 - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``) 

373 

374 Example: 

375 

376 .. testcode:: 

377 

378 dt_expr = pyparsing_common.iso8601_datetime.copy() 

379 dt_expr.set_parse_action(pyparsing_common.convert_to_datetime()) 

380 print(dt_expr.parse_string("1999-12-31T23:59:59.999")) 

381 

382 prints: 

383 

384 .. testoutput:: 

385 

386 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] 

387 """ 

388 

389 def cvt_fn(s, l, t): 

390 try: 

391 return datetime.strptime(t[0], fmt) 

392 except ValueError as ve: 

393 raise ParseException(s, l, str(ve)) 

394 

395 return cvt_fn 

396 

397 iso8601_date = Regex( 

398 r"(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?" 

399 ).set_name("ISO8601 date") 

400 "ISO8601 date (``yyyy-mm-dd``)" 

401 

402 iso8601_datetime = Regex( 

403 r"(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?" 

404 ).set_name("ISO8601 datetime") 

405 "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``" 

406 

407 uuid = Regex(r"[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name("UUID") 

408 "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)" 

409 

410 _html_stripper = any_open_tag.suppress() | any_close_tag.suppress() 

411 

412 @staticmethod 

413 def strip_html_tags(s: str, l: int, tokens: ParseResults): 

414 """Parse action to remove HTML tags from web page HTML source 

415 

416 Example: 

417 

418 .. testcode:: 

419 

420 # strip HTML links from normal text 

421 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' 

422 td, td_end = make_html_tags("TD") 

423 table_text = td + SkipTo(td_end).set_parse_action( 

424 pyparsing_common.strip_html_tags)("body") + td_end 

425 print(table_text.parse_string(text).body) 

426 

427 Prints: 

428 

429 .. testoutput:: 

430 

431 More info at the pyparsing wiki page 

432 """ 

433 return pyparsing_common._html_stripper.transform_string(tokens[0]) 

434 

435 _commasepitem = ( 

436 Combine( 

437 OneOrMore( 

438 ~Literal(",") 

439 + ~LineEnd() 

440 + Word(printables, exclude_chars=",") 

441 + Opt(White(" \t") + ~FollowedBy(LineEnd() | ",")) 

442 ) 

443 ) 

444 .streamline() 

445 .set_name("commaItem") 

446 ) 

447 comma_separated_list = DelimitedList( 

448 Opt(quoted_string.copy() | _commasepitem, default="") 

449 ).set_name("comma separated list") 

450 """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" 

451 

452 @staticmethod 

453 def upcase_tokens(s, l, t): 

454 """Parse action to convert tokens to upper case.""" 

455 return [tt.upper() for tt in t] 

456 

457 @staticmethod 

458 def downcase_tokens(s, l, t): 

459 """Parse action to convert tokens to lower case.""" 

460 return [tt.lower() for tt in t] 

461 

462 # fmt: off 

463 url = Regex( 

464 # https://mathiasbynens.be/demo/url-regex 

465 # https://gist.github.com/dperini/729294 

466 r"(?P<url>" + 

467 # protocol identifier (optional) 

468 # short syntax // still required 

469 r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)" + 

470 # user:pass BasicAuth (optional) 

471 r"(?:(?P<auth>\S+(?::\S*)?)@)?" + 

472 r"(?P<host>" + 

473 # IP address exclusion 

474 # private & local networks 

475 r"(?!(?:10|127)(?:\.\d{1,3}){3})" + 

476 r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" + 

477 r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" + 

478 # IP address dotted notation octets 

479 # excludes loopback network 0.0.0.0 

480 # excludes reserved space >= 224.0.0.0 

481 # excludes network & broadcast addresses 

482 # (first & last IP address of each class) 

483 r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" + 

484 r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" + 

485 r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" + 

486 r"|" + 

487 # host & domain names, may end with dot 

488 # can be replaced by a shortest alternative 

489 # (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+ 

490 r"(?:" + 

491 r"(?:" + 

492 r"[a-z0-9\u00a1-\uffff]" + 

493 r"[a-z0-9\u00a1-\uffff_-]{0,62}" + 

494 r")?" + 

495 r"[a-z0-9\u00a1-\uffff]\." + 

496 r")+" + 

497 # TLD identifier name, may end with dot 

498 r"(?:[a-z\u00a1-\uffff]{2,}\.?)" + 

499 r")" + 

500 # port number (optional) 

501 r"(:(?P<port>\d{2,5}))?" + 

502 # resource path (optional) 

503 r"(?P<path>\/[^?# ]*)?" + 

504 # query string (optional) 

505 r"(\?(?P<query>[^#]*))?" + 

506 # fragment (optional) 

507 r"(#(?P<fragment>\S*))?" + 

508 r")" 

509 ).set_name("url") 

510 """ 

511 URL (http/https/ftp scheme) 

512  

513 .. versionchanged:: 3.1.0 

514 ``url`` named group added 

515 """ 

516 # fmt: on 

517 

518 # pre-PEP8 compatibility names 

519 # fmt: off 

520 convertToInteger = staticmethod(replaced_by_pep8("convertToInteger", convert_to_integer)) 

521 convertToFloat = staticmethod(replaced_by_pep8("convertToFloat", convert_to_float)) 

522 convertToDate = staticmethod(replaced_by_pep8("convertToDate", convert_to_date)) 

523 convertToDatetime = staticmethod(replaced_by_pep8("convertToDatetime", convert_to_datetime)) 

524 stripHTMLTags = staticmethod(replaced_by_pep8("stripHTMLTags", strip_html_tags)) 

525 upcaseTokens = staticmethod(replaced_by_pep8("upcaseTokens", upcase_tokens)) 

526 downcaseTokens = staticmethod(replaced_by_pep8("downcaseTokens", downcase_tokens)) 

527 # fmt: on 

528 

529 

530_builtin_exprs = [ 

531 v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement) 

532]