Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/common.py: 75%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

110 statements  

1# common.py 

2from .core import * 

3from .helpers import DelimitedList, any_open_tag, any_close_tag 

4from datetime import datetime 

5import sys 

6 

7PY_310_OR_LATER = sys.version_info >= (3, 10) 

8 

9 

10# some other useful expressions - using lower-case class name since we are really using this as a namespace 

11class pyparsing_common: 

12 """Here are some common low-level expressions that may be useful in 

13 jump-starting parser development: 

14 

15 - numeric forms (:class:`integers<integer>`, :class:`reals<real>`, 

16 :class:`scientific notation<sci_real>`) 

17 - common :class:`programming identifiers<identifier>` 

18 - network addresses (:class:`MAC<mac_address>`, 

19 :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`) 

20 - ISO8601 :class:`dates<iso8601_date>` and 

21 :class:`datetime<iso8601_datetime>` 

22 - :class:`UUID<uuid>` 

23 - :class:`comma-separated list<comma_separated_list>` 

24 - :class:`url` 

25 

26 Parse actions: 

27 

28 - :class:`convert_to_integer` 

29 - :class:`convert_to_float` 

30 - :class:`convert_to_date` 

31 - :class:`convert_to_datetime` 

32 - :class:`strip_html_tags` 

33 - :class:`upcase_tokens` 

34 - :class:`downcase_tokens` 

35 

36 Examples: 

37 

38 .. testcode:: 

39 

40 pyparsing_common.number.run_tests(''' 

41 # any int or real number, returned as the appropriate type 

42 100 

43 -100 

44 +100 

45 3.14159 

46 6.02e23 

47 1e-12 

48 ''') 

49 

50 .. testoutput:: 

51 :options: +NORMALIZE_WHITESPACE 

52 

53 

54 # any int or real number, returned as the appropriate type 

55 100 

56 [100] 

57 

58 -100 

59 [-100] 

60 

61 +100 

62 [100] 

63 

64 3.14159 

65 [3.14159] 

66 

67 6.02e23 

68 [6.02e+23] 

69 

70 1e-12 

71 [1e-12] 

72 

73 .. testcode:: 

74 

75 pyparsing_common.fnumber.run_tests(''' 

76 # any int or real number, returned as float 

77 100 

78 -100 

79 +100 

80 3.14159 

81 6.02e23 

82 1e-12 

83 ''') 

84 

85 .. testoutput:: 

86 :options: +NORMALIZE_WHITESPACE 

87 

88 

89 # any int or real number, returned as float 

90 100 

91 [100.0] 

92 

93 -100 

94 [-100.0] 

95 

96 +100 

97 [100.0] 

98 

99 3.14159 

100 [3.14159] 

101 

102 6.02e23 

103 [6.02e+23] 

104 

105 1e-12 

106 [1e-12] 

107 

108 .. testcode:: 

109 

110 pyparsing_common.hex_integer.run_tests(''' 

111 # hex numbers 

112 100 

113 FF 

114 ''') 

115 

116 .. testoutput:: 

117 :options: +NORMALIZE_WHITESPACE 

118 

119 

120 # hex numbers 

121 100 

122 [256] 

123 

124 FF 

125 [255] 

126 

127 .. testcode:: 

128 

129 pyparsing_common.fraction.run_tests(''' 

130 # fractions 

131 1/2 

132 -3/4 

133 ''') 

134 

135 .. testoutput:: 

136 :options: +NORMALIZE_WHITESPACE 

137 

138 

139 # fractions 

140 1/2 

141 [0.5] 

142 

143 -3/4 

144 [-0.75] 

145 

146 .. testcode:: 

147 

148 pyparsing_common.mixed_integer.run_tests(''' 

149 # mixed fractions 

150 1 

151 1/2 

152 -3/4 

153 1-3/4 

154 ''') 

155 

156 .. testoutput:: 

157 :options: +NORMALIZE_WHITESPACE 

158 

159 

160 # mixed fractions 

161 1 

162 [1] 

163 

164 1/2 

165 [0.5] 

166 

167 -3/4 

168 [-0.75] 

169 

170 1-3/4 

171 [1.75] 

172 .. testcode:: 

173 

174 import uuid 

175 pyparsing_common.uuid.set_parse_action(token_map(uuid.UUID)) 

176 pyparsing_common.uuid.run_tests(''' 

177 # uuid 

178 12345678-1234-5678-1234-567812345678 

179 ''') 

180 

181 .. testoutput:: 

182 :options: +NORMALIZE_WHITESPACE 

183 

184 

185 # uuid 

186 12345678-1234-5678-1234-567812345678 

187 [UUID('12345678-1234-5678-1234-567812345678')] 

188 """ 

189 

190 @staticmethod 

191 def convert_to_integer(_, __, t): 

192 """ 

193 Parse action for converting parsed integers to Python int 

194 """ 

195 return [int(tt) for tt in t] 

196 

197 @staticmethod 

198 def convert_to_float(_, __, t): 

199 """ 

200 Parse action for converting parsed numbers to Python float 

201 """ 

202 return [float(tt) for tt in t] 

203 

204 integer = ( 

205 Word(nums) 

206 .set_name("integer") 

207 .set_parse_action( 

208 convert_to_integer 

209 if PY_310_OR_LATER 

210 else lambda t: [int(tt) for tt in t] # type: ignore[misc] 

211 ) 

212 ) 

213 """expression that parses an unsigned integer, converts to an int""" 

214 

215 hex_integer = ( 

216 Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16)) 

217 ) 

218 """expression that parses a hexadecimal integer, converts to an int""" 

219 

220 signed_integer = ( 

221 Regex(r"[+-]?\d+") 

222 .set_name("signed integer") 

223 .set_parse_action( 

224 convert_to_integer 

225 if PY_310_OR_LATER 

226 else lambda t: [int(tt) for tt in t] # type: ignore[misc] 

227 ) 

228 ) 

229 """expression that parses an integer with optional leading sign, converts to an int""" 

230 

231 fraction = ( 

232 signed_integer().set_parse_action( 

233 convert_to_float 

234 if PY_310_OR_LATER 

235 else lambda t: [float(tt) for tt in t] # type: ignore[misc] 

236 ) 

237 + "/" 

238 + signed_integer().set_parse_action( 

239 convert_to_float 

240 if PY_310_OR_LATER 

241 else lambda t: [float(tt) for tt in t] # type: ignore[misc] 

242 ) 

243 ).set_name("fraction") 

244 """fractional expression of an integer divided by an integer, converts to a float""" 

245 fraction.add_parse_action(lambda tt: tt[0] / tt[-1]) 

246 

247 mixed_integer = ( 

248 fraction | signed_integer + Opt(Opt("-").suppress() + fraction) 

249 ).set_name("fraction or mixed integer-fraction") 

250 """mixed integer of the form 'integer - fraction', with optional leading integer, converts to a float""" 

251 mixed_integer.add_parse_action(sum) 

252 

253 real = ( 

254 Regex(r"[+-]?(?:\d+\.\d*|\.\d+)") 

255 .set_name("real number") 

256 .set_parse_action( 

257 convert_to_float 

258 if PY_310_OR_LATER 

259 else lambda t: [float(tt) for tt in t] # type: ignore[misc] 

260 ) 

261 ) 

262 """expression that parses a floating point number, converts to a float""" 

263 

264 sci_real = ( 

265 Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)") 

266 .set_name("real number with scientific notation") 

267 .set_parse_action( 

268 convert_to_float 

269 if PY_310_OR_LATER 

270 else lambda t: [float(tt) for tt in t] # type: ignore[misc] 

271 ) 

272 ) 

273 """expression that parses a floating point number with optional 

274 scientific notation, converts to a float""" 

275 

276 # streamlining this expression makes the docs nicer-looking 

277 number = (sci_real | real | signed_integer).set_name("number").streamline() 

278 """any numeric expression, converts to the corresponding Python type""" 

279 

280 fnumber = ( 

281 Regex(r"[+-]?\d+\.?\d*(?:[eE][+-]?\d+)?") 

282 .set_name("fnumber") 

283 .set_parse_action( 

284 convert_to_float 

285 if PY_310_OR_LATER 

286 else lambda t: [float(tt) for tt in t] # type: ignore[misc] 

287 ) 

288 ) 

289 """any int or real number, always converts to a float""" 

290 

291 ieee_float = ( 

292 Regex(r"(?i:[+-]?(?:(?:\d+\.?\d*(?:e[+-]?\d+)?)|nan|inf(?:inity)?))") 

293 .set_name("ieee_float") 

294 .set_parse_action( 

295 convert_to_float 

296 if PY_310_OR_LATER 

297 else lambda t: [float(tt) for tt in t] # type: ignore[misc] 

298 ) 

299 ) 

300 """any floating-point literal (int, real number, infinity, or NaN), converts to a float""" 

301 

302 identifier = Word(identchars, identbodychars).set_name("identifier") 

303 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" 

304 

305 ipv4_address = Regex( 

306 r"(?:25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(?:\.(?:25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}" 

307 ).set_name("IPv4 address") 

308 "IPv4 address (``0.0.0.0 - 255.255.255.255``)" 

309 

310 _ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer") 

311 _full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name( 

312 "full IPv6 address" 

313 ) 

314 _short_ipv6_address = ( 

315 Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6)) 

316 + "::" 

317 + Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6)) 

318 ).set_name("short IPv6 address") 

319 _short_ipv6_address.add_condition( 

320 lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8 

321 ) 

322 _mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address") 

323 ipv6_address = Combine( 

324 (_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name( 

325 "IPv6 address" 

326 ) 

327 ).set_name("IPv6 address") 

328 "IPv6 address (long, short, or mixed form)" 

329 

330 mac_address = Regex( 

331 r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}" 

332 ).set_name("MAC address") 

333 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" 

334 

335 @staticmethod 

336 def convert_to_date(fmt: str = "%Y-%m-%d"): 

337 """ 

338 Helper to create a parse action for converting parsed date string to Python datetime.date 

339 

340 Params - 

341 - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``) 

342 

343 Example: 

344 

345 .. testcode:: 

346 

347 date_expr = pyparsing_common.iso8601_date.copy() 

348 date_expr.set_parse_action(pyparsing_common.convert_to_date()) 

349 print(date_expr.parse_string("1999-12-31")) 

350 

351 prints: 

352 

353 .. testoutput:: 

354 

355 [datetime.date(1999, 12, 31)] 

356 """ 

357 

358 def cvt_fn(ss, ll, tt): 

359 try: 

360 return datetime.strptime(tt[0], fmt).date() 

361 except ValueError as ve: 

362 raise ParseException(ss, ll, str(ve)) 

363 

364 return cvt_fn 

365 

366 @staticmethod 

367 def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"): 

368 """Helper to create a parse action for converting parsed 

369 datetime string to Python :class:`datetime.datetime` 

370 

371 Params - 

372 - fmt - format to be passed to :class:`datetime.strptime` (default= ``"%Y-%m-%dT%H:%M:%S.%f"``) 

373 

374 Example: 

375 

376 .. testcode:: 

377 

378 dt_expr = pyparsing_common.iso8601_datetime.copy() 

379 dt_expr.set_parse_action(pyparsing_common.convert_to_datetime()) 

380 print(dt_expr.parse_string("1999-12-31T23:59:59.999")) 

381 

382 prints: 

383 

384 .. testoutput:: 

385 

386 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] 

387 """ 

388 

389 def cvt_fn(s, l, t): 

390 try: 

391 return datetime.strptime(t[0], fmt) 

392 except ValueError as ve: 

393 raise ParseException(s, l, str(ve)) 

394 

395 return cvt_fn 

396 

397 iso8601_date = Regex( 

398 r"(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?" 

399 ).set_name("ISO8601 date") 

400 "ISO8601 date (``yyyy-mm-dd``)" 

401 

402 iso8601_datetime = Regex( 

403 r"(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?" 

404 ).set_name("ISO8601 datetime") 

405 "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``" 

406 

407 @staticmethod 

408 def as_datetime(s, l, t): 

409 """Parse action to convert parsed dates or datetimes to a Python 

410 :class:`datetime.datetime`. 

411 

412 This parse action will use the year, month, day, etc. results 

413 names defined in the ISO8601 date expressions, but it can be 

414 used with any expression that provides one or more of these fields. 

415 

416 Omitted fields will default to fields from Jan 1, 00:00:00. 

417 

418 Invalid dates will raise a :class:`ParseException` with the 

419 error message indicating the invalid date fields. 

420 """ 

421 year = int(t.year.lstrip("0") or 0) 

422 month = int(t.month or 1) 

423 day = int(t.day or 1) 

424 hour = int(t.hour or 0) 

425 minute = int(t.minute or 0) 

426 second = float(t.second or 0) 

427 try: 

428 return datetime( 

429 year, month, day, hour, minute, int(second), int((second % 1) * 1000) 

430 ) 

431 except ValueError as ve: 

432 raise ParseException(t, l, f"Invalid date/time: {ve}").with_traceback( 

433 ve.__traceback__ 

434 ) from None 

435 

436 if PY_310_OR_LATER: 

437 iso8601_date_validated = iso8601_date().add_parse_action(as_datetime) 

438 "Validated ISO8601 date strings, raising :class:`ParseException` for invalid date values." 

439 

440 iso8601_datetime_validated = iso8601_datetime().add_parse_action(as_datetime) 

441 "Validated ISO8601 date and time strings, raising :class:`ParseException` for invalid date/time values." 

442 

443 uuid = Regex(r"[0-9a-fA-F]{8}(?:-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name( 

444 "UUID" 

445 ) 

446 "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)" 

447 

448 _html_stripper = any_open_tag.suppress() | any_close_tag.suppress() 

449 

450 @staticmethod 

451 def strip_html_tags(s: str, l: int, tokens: ParseResults): 

452 """Parse action to remove HTML tags from web page HTML source 

453 

454 Example: 

455 

456 .. testcode:: 

457 

458 # strip HTML links from normal text 

459 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' 

460 td, td_end = make_html_tags("TD") 

461 table_text = td + SkipTo(td_end).set_parse_action( 

462 pyparsing_common.strip_html_tags)("body") + td_end 

463 print(table_text.parse_string(text).body) 

464 

465 Prints: 

466 

467 .. testoutput:: 

468 

469 More info at the pyparsing wiki page 

470 """ 

471 return pyparsing_common._html_stripper.transform_string(tokens[0]) 

472 

473 _commasepitem = ( 

474 Combine( 

475 OneOrMore( 

476 ~Literal(",") 

477 + ~LineEnd() 

478 + Word(printables, exclude_chars=",") 

479 + Opt(White(" \t") + ~FollowedBy(LineEnd() | ",")) 

480 ) 

481 ) 

482 .streamline() 

483 .set_name("commaItem") 

484 ) 

485 comma_separated_list = DelimitedList( 

486 Opt(quoted_string.copy() | _commasepitem, default="") 

487 ).set_name("comma separated list") 

488 """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" 

489 

490 @staticmethod 

491 def upcase_tokens(s, l, t): 

492 """Parse action to convert tokens to upper case.""" 

493 return [tt.upper() for tt in t] 

494 

495 @staticmethod 

496 def downcase_tokens(s, l, t): 

497 """Parse action to convert tokens to lower case.""" 

498 return [tt.lower() for tt in t] 

499 

500 # fmt: off 

501 url = Regex( 

502 # https://mathiasbynens.be/demo/url-regex 

503 # https://gist.github.com/dperini/729294 

504 r"(?P<url>" 

505 # protocol identifier (optional) 

506 # short syntax // still required 

507 r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)" 

508 # user:pass BasicAuth (optional) 

509 r"(?:(?P<auth>\S+(?::\S*)?)@)?" 

510 r"(?P<host>" 

511 # IP address exclusion 

512 # private & local networks 

513 r"(?!(?:10|127)(?:\.\d{1,3}){3})" 

514 r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" 

515 r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" 

516 # IP address dotted notation octets 

517 # excludes loopback network 0.0.0.0 

518 # excludes reserved space >= 224.0.0.0 

519 # excludes network & broadcast addresses 

520 # (first & last IP address of each class) 

521 r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" 

522 r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" 

523 r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" 

524 r"|" 

525 # host & domain names, may end with dot 

526 # can be replaced by a shortest alternative 

527 # (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+ 

528 r"(?:" 

529 r"(?:" 

530 r"[a-z0-9\u00a1-\uffff]" 

531 r"[a-z0-9\u00a1-\uffff_-]{0,62}" 

532 r")?" 

533 r"[a-z0-9\u00a1-\uffff]\." 

534 r")+" 

535 # TLD identifier name, may end with dot 

536 r"(?:[a-z\u00a1-\uffff]{2,}\.?)" 

537 r")" 

538 # port number (optional) 

539 r"(:(?P<port>\d{2,5}))?" 

540 # resource path (optional) 

541 r"(?P<path>\/[^?# ]*)?" 

542 # query string (optional) 

543 r"(\?(?P<query>[^#]*))?" 

544 # fragment (optional) 

545 r"(#(?P<fragment>\S*))?" 

546 r")" 

547 ).set_name("url") 

548 """ 

549 URL (http/https/ftp scheme) 

550  

551 .. versionchanged:: 3.1.0 

552 ``url`` named group added 

553 """ 

554 # fmt: on 

555 

556 # pre-PEP8 compatibility names 

557 # fmt: off 

558 convertToInteger = staticmethod(replaced_by_pep8("convertToInteger", convert_to_integer)) 

559 convertToFloat = staticmethod(replaced_by_pep8("convertToFloat", convert_to_float)) 

560 convertToDate = staticmethod(replaced_by_pep8("convertToDate", convert_to_date)) 

561 convertToDatetime = staticmethod(replaced_by_pep8("convertToDatetime", convert_to_datetime)) 

562 stripHTMLTags = staticmethod(replaced_by_pep8("stripHTMLTags", strip_html_tags)) 

563 upcaseTokens = staticmethod(replaced_by_pep8("upcaseTokens", upcase_tokens)) 

564 downcaseTokens = staticmethod(replaced_by_pep8("downcaseTokens", downcase_tokens)) 

565 # fmt: on 

566 

567 

568_builtin_exprs = [ 

569 v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement) 

570]