Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/common.py: 75%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# common.py
2from .core import *
3from .helpers import DelimitedList, any_open_tag, any_close_tag
4from datetime import datetime
5import sys
7PY_310_OR_LATER = sys.version_info >= (3, 10)
10# some other useful expressions - using lower-case class name since we are really using this as a namespace
11class pyparsing_common:
12 """Here are some common low-level expressions that may be useful in
13 jump-starting parser development:
15 - numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
16 :class:`scientific notation<sci_real>`)
17 - common :class:`programming identifiers<identifier>`
18 - network addresses (:class:`MAC<mac_address>`,
19 :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
20 - ISO8601 :class:`dates<iso8601_date>` and
21 :class:`datetime<iso8601_datetime>`
22 - :class:`UUID<uuid>`
23 - :class:`comma-separated list<comma_separated_list>`
24 - :class:`url`
26 Parse actions:
28 - :class:`convert_to_integer`
29 - :class:`convert_to_float`
30 - :class:`convert_to_date`
31 - :class:`convert_to_datetime`
32 - :class:`strip_html_tags`
33 - :class:`upcase_tokens`
34 - :class:`downcase_tokens`
36 Examples:
38 .. testcode::
40 pyparsing_common.number.run_tests('''
41 # any int or real number, returned as the appropriate type
42 100
43 -100
44 +100
45 3.14159
46 6.02e23
47 1e-12
48 ''')
50 .. testoutput::
51 :options: +NORMALIZE_WHITESPACE
54 # any int or real number, returned as the appropriate type
55 100
56 [100]
58 -100
59 [-100]
61 +100
62 [100]
64 3.14159
65 [3.14159]
67 6.02e23
68 [6.02e+23]
70 1e-12
71 [1e-12]
73 .. testcode::
75 pyparsing_common.fnumber.run_tests('''
76 # any int or real number, returned as float
77 100
78 -100
79 +100
80 3.14159
81 6.02e23
82 1e-12
83 ''')
85 .. testoutput::
86 :options: +NORMALIZE_WHITESPACE
89 # any int or real number, returned as float
90 100
91 [100.0]
93 -100
94 [-100.0]
96 +100
97 [100.0]
99 3.14159
100 [3.14159]
102 6.02e23
103 [6.02e+23]
105 1e-12
106 [1e-12]
108 .. testcode::
110 pyparsing_common.hex_integer.run_tests('''
111 # hex numbers
112 100
113 FF
114 ''')
116 .. testoutput::
117 :options: +NORMALIZE_WHITESPACE
120 # hex numbers
121 100
122 [256]
124 FF
125 [255]
127 .. testcode::
129 pyparsing_common.fraction.run_tests('''
130 # fractions
131 1/2
132 -3/4
133 ''')
135 .. testoutput::
136 :options: +NORMALIZE_WHITESPACE
139 # fractions
140 1/2
141 [0.5]
143 -3/4
144 [-0.75]
146 .. testcode::
148 pyparsing_common.mixed_integer.run_tests('''
149 # mixed fractions
150 1
151 1/2
152 -3/4
153 1-3/4
154 ''')
156 .. testoutput::
157 :options: +NORMALIZE_WHITESPACE
160 # mixed fractions
161 1
162 [1]
164 1/2
165 [0.5]
167 -3/4
168 [-0.75]
170 1-3/4
171 [1.75]
172 .. testcode::
174 import uuid
175 pyparsing_common.uuid.set_parse_action(token_map(uuid.UUID))
176 pyparsing_common.uuid.run_tests('''
177 # uuid
178 12345678-1234-5678-1234-567812345678
179 ''')
181 .. testoutput::
182 :options: +NORMALIZE_WHITESPACE
185 # uuid
186 12345678-1234-5678-1234-567812345678
187 [UUID('12345678-1234-5678-1234-567812345678')]
188 """
190 @staticmethod
191 def convert_to_integer(_, __, t) -> list[int]:
192 """
193 Parse action for converting parsed integers to Python int
194 """
195 return [int(tt) for tt in t]
197 @staticmethod
198 def convert_to_float(_, __, t) -> list[float]:
199 """
200 Parse action for converting parsed numbers to Python float
201 """
202 return [float(tt) for tt in t]
204 integer = (
205 Word(nums)
206 .set_name("integer")
207 .set_parse_action(
208 convert_to_integer
209 if PY_310_OR_LATER
210 else token_map(int)
211 )
212 )
213 """expression that parses an unsigned integer, converts to an int"""
215 hex_integer = (
216 Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16))
217 )
218 """expression that parses a hexadecimal integer, converts to an int"""
220 signed_integer = (
221 Regex(r"[+-]?\d+")
222 .set_name("signed integer")
223 .set_parse_action(
224 convert_to_integer
225 if PY_310_OR_LATER
226 else token_map(int)
227 )
228 )
229 """expression that parses an integer with optional leading sign, converts to an int"""
231 fraction = (
232 signed_integer().set_parse_action(
233 convert_to_float
234 if PY_310_OR_LATER
235 else token_map(float)
236 )
237 + "/"
238 + signed_integer().set_parse_action(
239 convert_to_float
240 if PY_310_OR_LATER
241 else token_map(float)
242 )
243 ).set_name("fraction")
244 """fractional expression of an integer divided by an integer, converts to a float"""
245 fraction.add_parse_action(lambda tt: tt[0] / tt[-1])
247 mixed_integer = (
248 fraction | signed_integer + Opt(Opt("-").suppress() + fraction)
249 ).set_name("fraction or mixed integer-fraction")
250 """mixed integer of the form 'integer - fraction', with optional leading integer, converts to a float"""
251 mixed_integer.add_parse_action(sum)
253 real = (
254 Regex(r"[+-]?(?:\d+\.\d*|\.\d+)")
255 .set_name("real number")
256 .set_parse_action(
257 convert_to_float
258 if PY_310_OR_LATER
259 else token_map(float)
260 )
261 )
262 """expression that parses a floating point number, converts to a float"""
264 sci_real = (
265 Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)")
266 .set_name("real number with scientific notation")
267 .set_parse_action(
268 convert_to_float
269 if PY_310_OR_LATER
270 else token_map(float)
271 )
272 )
273 """expression that parses a floating point number with optional
274 scientific notation, converts to a float"""
276 # streamlining this expression makes the docs nicer-looking
277 number = (sci_real | real | signed_integer).set_name("number").streamline()
278 """any numeric expression, converts to the corresponding Python type"""
280 fnumber = (
281 Regex(r"[+-]?\d+\.?\d*(?:[eE][+-]?\d+)?")
282 .set_name("fnumber")
283 .set_parse_action(
284 convert_to_float
285 if PY_310_OR_LATER
286 else token_map(float)
287 )
288 )
289 """any int or real number, always converts to a float"""
291 ieee_float = (
292 Regex(r"(?i:[+-]?(?:(?:\d+\.?\d*(?:e[+-]?\d+)?)|nan|inf(?:inity)?))")
293 .set_name("ieee_float")
294 .set_parse_action(
295 convert_to_float
296 if PY_310_OR_LATER
297 else token_map(float)
298 )
299 )
300 """any floating-point literal (int, real number, infinity, or NaN), converts to a float"""
302 identifier = Word(identchars, identbodychars).set_name("identifier")
303 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
305 ipv4_address = Regex(
306 r"(?:25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(?:\.(?:25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}"
307 ).set_name("IPv4 address")
308 "IPv4 address (``0.0.0.0 - 255.255.255.255``)"
310 _ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer")
311 _full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name(
312 "full IPv6 address"
313 )
314 _short_ipv6_address = (
315 Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
316 + "::"
317 + Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
318 ).set_name("short IPv6 address")
319 _short_ipv6_address.add_condition(
320 lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8
321 )
322 _mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address")
323 ipv6_address = Combine(
324 (_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name(
325 "IPv6 address"
326 )
327 ).set_name("IPv6 address")
328 "IPv6 address (long, short, or mixed form)"
330 mac_address = Regex(
331 r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}"
332 ).set_name("MAC address")
333 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
335 @staticmethod
336 def convert_to_date(fmt: str = "%Y-%m-%d"):
337 """
338 Helper to create a parse action for converting parsed date string to Python datetime.date
340 Params -
341 - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
343 Example:
345 .. testcode::
347 date_expr = pyparsing_common.iso8601_date.copy()
348 date_expr.set_parse_action(pyparsing_common.convert_to_date())
349 print(date_expr.parse_string("1999-12-31"))
351 prints:
353 .. testoutput::
355 [datetime.date(1999, 12, 31)]
356 """
358 def cvt_fn(ss, ll, tt):
359 try:
360 return datetime.strptime(tt[0], fmt).date()
361 except ValueError as ve:
362 raise ParseException(ss, ll, str(ve))
364 return cvt_fn
366 @staticmethod
367 def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"):
368 """Helper to create a parse action for converting parsed
369 datetime string to Python :class:`datetime.datetime`
371 Params -
372 - fmt - format to be passed to :class:`datetime.strptime` (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
374 Example:
376 .. testcode::
378 dt_expr = pyparsing_common.iso8601_datetime.copy()
379 dt_expr.set_parse_action(pyparsing_common.convert_to_datetime())
380 print(dt_expr.parse_string("1999-12-31T23:59:59.999"))
382 prints:
384 .. testoutput::
386 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
387 """
389 def cvt_fn(s, l, t):
390 try:
391 return datetime.strptime(t[0], fmt)
392 except ValueError as ve:
393 raise ParseException(s, l, str(ve))
395 return cvt_fn
397 iso8601_date = Regex(
398 r"(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?"
399 ).set_name("ISO8601 date")
400 "ISO8601 date (``yyyy-mm-dd``)"
402 iso8601_datetime = Regex(
403 r"(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?"
404 ).set_name("ISO8601 datetime")
405 "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
407 @staticmethod
408 def as_datetime(s, l, t):
409 """Parse action to convert parsed dates or datetimes to a Python
410 :class:`datetime.datetime`.
412 This parse action will use the year, month, day, etc. results
413 names defined in the ISO8601 date expressions, but it can be
414 used with any expression that provides one or more of these fields.
416 Omitted fields will default to fields from Jan 1, 00:00:00.
418 Invalid dates will raise a :class:`ParseException` with the
419 error message indicating the invalid date fields.
420 """
421 year = int(t.year.lstrip("0") or 0)
422 month = int(t.month or 1)
423 day = int(t.day or 1)
424 hour = int(t.hour or 0)
425 minute = int(t.minute or 0)
426 second = float(t.second or 0)
427 try:
428 return datetime(
429 year,
430 month,
431 day,
432 hour,
433 minute,
434 int(second),
435 round((second % 1) * 1_000_000),
436 )
437 except ValueError as ve:
438 raise ParseException(t, l, f"Invalid date/time: {ve}").with_traceback(
439 ve.__traceback__
440 ) from None
442 if PY_310_OR_LATER:
443 iso8601_date_validated = iso8601_date().add_parse_action(as_datetime)
444 "Validated ISO8601 date strings, raising :class:`ParseException` for invalid date values."
446 iso8601_datetime_validated = iso8601_datetime().add_parse_action(as_datetime)
447 "Validated ISO8601 date and time strings, raising :class:`ParseException` for invalid date/time values."
449 uuid = Regex(r"[0-9a-fA-F]{8}(?:-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name(
450 "UUID"
451 )
452 "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
454 _html_stripper = any_open_tag.suppress() | any_close_tag.suppress()
456 @staticmethod
457 def strip_html_tags(s: str, l: int, tokens: ParseResults):
458 """Parse action to remove HTML tags from web page HTML source
460 Example:
462 .. testcode::
464 # strip HTML links from normal text
465 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
466 td, td_end = make_html_tags("TD")
467 table_text = td + SkipTo(td_end).set_parse_action(
468 pyparsing_common.strip_html_tags)("body") + td_end
469 print(table_text.parse_string(text).body)
471 Prints:
473 .. testoutput::
475 More info at the pyparsing wiki page
476 """
477 return pyparsing_common._html_stripper.transform_string(tokens[0])
479 _commasepitem = (
480 Combine(
481 OneOrMore(
482 ~Literal(",")
483 + ~LineEnd()
484 + Word(printables, exclude_chars=",")
485 + Opt(White(" \t") + ~FollowedBy(LineEnd() | ","))
486 )
487 )
488 .streamline()
489 .set_name("commaItem")
490 )
491 comma_separated_list = DelimitedList(
492 Opt(quoted_string.copy() | _commasepitem, default="")
493 ).set_name("comma separated list")
494 """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
496 @staticmethod
497 def upcase_tokens(s, l, t):
498 """Parse action to convert tokens to upper case."""
499 return [tt.upper() for tt in t]
501 @staticmethod
502 def downcase_tokens(s, l, t):
503 """Parse action to convert tokens to lower case."""
504 return [tt.lower() for tt in t]
506 # fmt: off
507 url = Regex(
508 # https://mathiasbynens.be/demo/url-regex
509 # https://gist.github.com/dperini/729294
510 r"(?P<url>"
511 # protocol identifier (optional)
512 # short syntax // still required
513 r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)"
514 # user:pass BasicAuth (optional)
515 r"(?:(?P<auth>\S+(?::\S*)?)@)?"
516 r"(?P<host>"
517 # IP address exclusion
518 # private & local networks
519 r"(?!(?:10|127)(?:\.\d{1,3}){3})"
520 r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})"
521 r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})"
522 # IP address dotted notation octets
523 # excludes loopback network 0.0.0.0
524 # excludes reserved space >= 224.0.0.0
525 # excludes network & broadcast addresses
526 # (first & last IP address of each class)
527 r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])"
528 r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}"
529 r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))"
530 r"|"
531 # host & domain names, may end with dot
532 # can be replaced by a shortest alternative
533 # (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+
534 r"(?:"
535 r"(?:"
536 r"[a-z0-9\u00a1-\uffff]"
537 r"[a-z0-9\u00a1-\uffff_-]{0,62}"
538 r")?"
539 r"[a-z0-9\u00a1-\uffff]\."
540 r")+"
541 # TLD identifier name, may end with dot
542 r"(?:[a-z\u00a1-\uffff]{2,}\.?)"
543 r")"
544 # port number (optional)
545 r"(:(?P<port>\d{2,5}))?"
546 # resource path (optional)
547 r"(?P<path>\/[^?# ]*)?"
548 # query string (optional)
549 r"(\?(?P<query>[^#]*))?"
550 # fragment (optional)
551 r"(#(?P<fragment>\S*))?"
552 r")"
553 ).set_name("url")
554 """
555 URL (http/https/ftp scheme)
557 .. versionchanged:: 3.1.0
558 ``url`` named group added
559 """
560 # fmt: on
562 # pre-PEP8 compatibility names
563 # fmt: off
564 convertToInteger = staticmethod(replaced_by_pep8("convertToInteger", convert_to_integer))
565 convertToFloat = staticmethod(replaced_by_pep8("convertToFloat", convert_to_float))
566 convertToDate = staticmethod(replaced_by_pep8("convertToDate", convert_to_date))
567 convertToDatetime = staticmethod(replaced_by_pep8("convertToDatetime", convert_to_datetime))
568 stripHTMLTags = staticmethod(replaced_by_pep8("stripHTMLTags", strip_html_tags))
569 upcaseTokens = staticmethod(replaced_by_pep8("upcaseTokens", upcase_tokens))
570 downcaseTokens = staticmethod(replaced_by_pep8("downcaseTokens", downcase_tokens))
571 # fmt: on
574_builtin_exprs = [
575 v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement)
576]