Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/common.py: 75%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# common.py
2from .core import *
3from .helpers import DelimitedList, any_open_tag, any_close_tag
4from datetime import datetime
5import sys
7PY_310_OR_LATER = sys.version_info >= (3, 10)
10# some other useful expressions - using lower-case class name since we are really using this as a namespace
11class pyparsing_common:
12 """Here are some common low-level expressions that may be useful in
13 jump-starting parser development:
15 - numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
16 :class:`scientific notation<sci_real>`)
17 - common :class:`programming identifiers<identifier>`
18 - network addresses (:class:`MAC<mac_address>`,
19 :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
20 - ISO8601 :class:`dates<iso8601_date>` and
21 :class:`datetime<iso8601_datetime>`
22 - :class:`UUID<uuid>`
23 - :class:`comma-separated list<comma_separated_list>`
24 - :class:`url`
26 Parse actions:
28 - :class:`convert_to_integer`
29 - :class:`convert_to_float`
30 - :class:`convert_to_date`
31 - :class:`convert_to_datetime`
32 - :class:`strip_html_tags`
33 - :class:`upcase_tokens`
34 - :class:`downcase_tokens`
36 Examples:
38 .. testcode::
40 pyparsing_common.number.run_tests('''
41 # any int or real number, returned as the appropriate type
42 100
43 -100
44 +100
45 3.14159
46 6.02e23
47 1e-12
48 ''')
50 .. testoutput::
51 :options: +NORMALIZE_WHITESPACE
54 # any int or real number, returned as the appropriate type
55 100
56 [100]
58 -100
59 [-100]
61 +100
62 [100]
64 3.14159
65 [3.14159]
67 6.02e23
68 [6.02e+23]
70 1e-12
71 [1e-12]
73 .. testcode::
75 pyparsing_common.fnumber.run_tests('''
76 # any int or real number, returned as float
77 100
78 -100
79 +100
80 3.14159
81 6.02e23
82 1e-12
83 ''')
85 .. testoutput::
86 :options: +NORMALIZE_WHITESPACE
89 # any int or real number, returned as float
90 100
91 [100.0]
93 -100
94 [-100.0]
96 +100
97 [100.0]
99 3.14159
100 [3.14159]
102 6.02e23
103 [6.02e+23]
105 1e-12
106 [1e-12]
108 .. testcode::
110 pyparsing_common.hex_integer.run_tests('''
111 # hex numbers
112 100
113 FF
114 ''')
116 .. testoutput::
117 :options: +NORMALIZE_WHITESPACE
120 # hex numbers
121 100
122 [256]
124 FF
125 [255]
127 .. testcode::
129 pyparsing_common.fraction.run_tests('''
130 # fractions
131 1/2
132 -3/4
133 ''')
135 .. testoutput::
136 :options: +NORMALIZE_WHITESPACE
139 # fractions
140 1/2
141 [0.5]
143 -3/4
144 [-0.75]
146 .. testcode::
148 pyparsing_common.mixed_integer.run_tests('''
149 # mixed fractions
150 1
151 1/2
152 -3/4
153 1-3/4
154 ''')
156 .. testoutput::
157 :options: +NORMALIZE_WHITESPACE
160 # mixed fractions
161 1
162 [1]
164 1/2
165 [0.5]
167 -3/4
168 [-0.75]
170 1-3/4
171 [1.75]
172 .. testcode::
174 import uuid
175 pyparsing_common.uuid.set_parse_action(token_map(uuid.UUID))
176 pyparsing_common.uuid.run_tests('''
177 # uuid
178 12345678-1234-5678-1234-567812345678
179 ''')
181 .. testoutput::
182 :options: +NORMALIZE_WHITESPACE
185 # uuid
186 12345678-1234-5678-1234-567812345678
187 [UUID('12345678-1234-5678-1234-567812345678')]
188 """
190 @staticmethod
191 def convert_to_integer(_, __, t):
192 """
193 Parse action for converting parsed integers to Python int
194 """
195 return [int(tt) for tt in t]
197 @staticmethod
198 def convert_to_float(_, __, t):
199 """
200 Parse action for converting parsed numbers to Python float
201 """
202 return [float(tt) for tt in t]
204 integer = (
205 Word(nums)
206 .set_name("integer")
207 .set_parse_action(
208 convert_to_integer
209 if PY_310_OR_LATER
210 else lambda t: [int(tt) for tt in t] # type: ignore[misc]
211 )
212 )
213 """expression that parses an unsigned integer, converts to an int"""
215 hex_integer = (
216 Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16))
217 )
218 """expression that parses a hexadecimal integer, converts to an int"""
220 signed_integer = (
221 Regex(r"[+-]?\d+")
222 .set_name("signed integer")
223 .set_parse_action(
224 convert_to_integer
225 if PY_310_OR_LATER
226 else lambda t: [int(tt) for tt in t] # type: ignore[misc]
227 )
228 )
229 """expression that parses an integer with optional leading sign, converts to an int"""
231 fraction = (
232 signed_integer().set_parse_action(
233 convert_to_float
234 if PY_310_OR_LATER
235 else lambda t: [float(tt) for tt in t] # type: ignore[misc]
236 )
237 + "/"
238 + signed_integer().set_parse_action(
239 convert_to_float
240 if PY_310_OR_LATER
241 else lambda t: [float(tt) for tt in t] # type: ignore[misc]
242 )
243 ).set_name("fraction")
244 """fractional expression of an integer divided by an integer, converts to a float"""
245 fraction.add_parse_action(lambda tt: tt[0] / tt[-1])
247 mixed_integer = (
248 fraction | signed_integer + Opt(Opt("-").suppress() + fraction)
249 ).set_name("fraction or mixed integer-fraction")
250 """mixed integer of the form 'integer - fraction', with optional leading integer, converts to a float"""
251 mixed_integer.add_parse_action(sum)
253 real = (
254 Regex(r"[+-]?(?:\d+\.\d*|\.\d+)")
255 .set_name("real number")
256 .set_parse_action(
257 convert_to_float
258 if PY_310_OR_LATER
259 else lambda t: [float(tt) for tt in t] # type: ignore[misc]
260 )
261 )
262 """expression that parses a floating point number, converts to a float"""
264 sci_real = (
265 Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)")
266 .set_name("real number with scientific notation")
267 .set_parse_action(
268 convert_to_float
269 if PY_310_OR_LATER
270 else lambda t: [float(tt) for tt in t] # type: ignore[misc]
271 )
272 )
273 """expression that parses a floating point number with optional
274 scientific notation, converts to a float"""
276 # streamlining this expression makes the docs nicer-looking
277 number = (sci_real | real | signed_integer).set_name("number").streamline()
278 """any numeric expression, converts to the corresponding Python type"""
280 fnumber = (
281 Regex(r"[+-]?\d+\.?\d*(?:[eE][+-]?\d+)?")
282 .set_name("fnumber")
283 .set_parse_action(
284 convert_to_float
285 if PY_310_OR_LATER
286 else lambda t: [float(tt) for tt in t] # type: ignore[misc]
287 )
288 )
289 """any int or real number, always converts to a float"""
291 ieee_float = (
292 Regex(r"(?i:[+-]?(?:(?:\d+\.?\d*(?:e[+-]?\d+)?)|nan|inf(?:inity)?))")
293 .set_name("ieee_float")
294 .set_parse_action(
295 convert_to_float
296 if PY_310_OR_LATER
297 else lambda t: [float(tt) for tt in t] # type: ignore[misc]
298 )
299 )
300 """any floating-point literal (int, real number, infinity, or NaN), converts to a float"""
302 identifier = Word(identchars, identbodychars).set_name("identifier")
303 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
305 ipv4_address = Regex(
306 r"(?:25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(?:\.(?:25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}"
307 ).set_name("IPv4 address")
308 "IPv4 address (``0.0.0.0 - 255.255.255.255``)"
310 _ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer")
311 _full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name(
312 "full IPv6 address"
313 )
314 _short_ipv6_address = (
315 Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
316 + "::"
317 + Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
318 ).set_name("short IPv6 address")
319 _short_ipv6_address.add_condition(
320 lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8
321 )
322 _mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address")
323 ipv6_address = Combine(
324 (_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name(
325 "IPv6 address"
326 )
327 ).set_name("IPv6 address")
328 "IPv6 address (long, short, or mixed form)"
330 mac_address = Regex(
331 r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}"
332 ).set_name("MAC address")
333 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
335 @staticmethod
336 def convert_to_date(fmt: str = "%Y-%m-%d"):
337 """
338 Helper to create a parse action for converting parsed date string to Python datetime.date
340 Params -
341 - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
343 Example:
345 .. testcode::
347 date_expr = pyparsing_common.iso8601_date.copy()
348 date_expr.set_parse_action(pyparsing_common.convert_to_date())
349 print(date_expr.parse_string("1999-12-31"))
351 prints:
353 .. testoutput::
355 [datetime.date(1999, 12, 31)]
356 """
358 def cvt_fn(ss, ll, tt):
359 try:
360 return datetime.strptime(tt[0], fmt).date()
361 except ValueError as ve:
362 raise ParseException(ss, ll, str(ve))
364 return cvt_fn
366 @staticmethod
367 def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"):
368 """Helper to create a parse action for converting parsed
369 datetime string to Python :class:`datetime.datetime`
371 Params -
372 - fmt - format to be passed to :class:`datetime.strptime` (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
374 Example:
376 .. testcode::
378 dt_expr = pyparsing_common.iso8601_datetime.copy()
379 dt_expr.set_parse_action(pyparsing_common.convert_to_datetime())
380 print(dt_expr.parse_string("1999-12-31T23:59:59.999"))
382 prints:
384 .. testoutput::
386 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
387 """
389 def cvt_fn(s, l, t):
390 try:
391 return datetime.strptime(t[0], fmt)
392 except ValueError as ve:
393 raise ParseException(s, l, str(ve))
395 return cvt_fn
397 iso8601_date = Regex(
398 r"(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?"
399 ).set_name("ISO8601 date")
400 "ISO8601 date (``yyyy-mm-dd``)"
402 iso8601_datetime = Regex(
403 r"(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?"
404 ).set_name("ISO8601 datetime")
405 "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
407 @staticmethod
408 def as_datetime(s, l, t):
409 """Parse action to convert parsed dates or datetimes to a Python
410 :class:`datetime.datetime`.
412 This parse action will use the year, month, day, etc. results
413 names defined in the ISO8601 date expressions, but it can be
414 used with any expression that provides one or more of these fields.
416 Omitted fields will default to fields from Jan 1, 00:00:00.
418 Invalid dates will raise a :class:`ParseException` with the
419 error message indicating the invalid date fields.
420 """
421 year = int(t.year.lstrip("0") or 0)
422 month = int(t.month or 1)
423 day = int(t.day or 1)
424 hour = int(t.hour or 0)
425 minute = int(t.minute or 0)
426 second = float(t.second or 0)
427 try:
428 return datetime(
429 year, month, day, hour, minute, int(second), int((second % 1) * 1000)
430 )
431 except ValueError as ve:
432 raise ParseException(t, l, f"Invalid date/time: {ve}").with_traceback(
433 ve.__traceback__
434 ) from None
436 if PY_310_OR_LATER:
437 iso8601_date_validated = iso8601_date().add_parse_action(as_datetime)
438 "Validated ISO8601 date strings, raising :class:`ParseException` for invalid date values."
440 iso8601_datetime_validated = iso8601_datetime().add_parse_action(as_datetime)
441 "Validated ISO8601 date and time strings, raising :class:`ParseException` for invalid date/time values."
443 uuid = Regex(r"[0-9a-fA-F]{8}(?:-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name(
444 "UUID"
445 )
446 "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
448 _html_stripper = any_open_tag.suppress() | any_close_tag.suppress()
450 @staticmethod
451 def strip_html_tags(s: str, l: int, tokens: ParseResults):
452 """Parse action to remove HTML tags from web page HTML source
454 Example:
456 .. testcode::
458 # strip HTML links from normal text
459 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
460 td, td_end = make_html_tags("TD")
461 table_text = td + SkipTo(td_end).set_parse_action(
462 pyparsing_common.strip_html_tags)("body") + td_end
463 print(table_text.parse_string(text).body)
465 Prints:
467 .. testoutput::
469 More info at the pyparsing wiki page
470 """
471 return pyparsing_common._html_stripper.transform_string(tokens[0])
473 _commasepitem = (
474 Combine(
475 OneOrMore(
476 ~Literal(",")
477 + ~LineEnd()
478 + Word(printables, exclude_chars=",")
479 + Opt(White(" \t") + ~FollowedBy(LineEnd() | ","))
480 )
481 )
482 .streamline()
483 .set_name("commaItem")
484 )
485 comma_separated_list = DelimitedList(
486 Opt(quoted_string.copy() | _commasepitem, default="")
487 ).set_name("comma separated list")
488 """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
490 @staticmethod
491 def upcase_tokens(s, l, t):
492 """Parse action to convert tokens to upper case."""
493 return [tt.upper() for tt in t]
495 @staticmethod
496 def downcase_tokens(s, l, t):
497 """Parse action to convert tokens to lower case."""
498 return [tt.lower() for tt in t]
500 # fmt: off
501 url = Regex(
502 # https://mathiasbynens.be/demo/url-regex
503 # https://gist.github.com/dperini/729294
504 r"(?P<url>"
505 # protocol identifier (optional)
506 # short syntax // still required
507 r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)"
508 # user:pass BasicAuth (optional)
509 r"(?:(?P<auth>\S+(?::\S*)?)@)?"
510 r"(?P<host>"
511 # IP address exclusion
512 # private & local networks
513 r"(?!(?:10|127)(?:\.\d{1,3}){3})"
514 r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})"
515 r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})"
516 # IP address dotted notation octets
517 # excludes loopback network 0.0.0.0
518 # excludes reserved space >= 224.0.0.0
519 # excludes network & broadcast addresses
520 # (first & last IP address of each class)
521 r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])"
522 r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}"
523 r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))"
524 r"|"
525 # host & domain names, may end with dot
526 # can be replaced by a shortest alternative
527 # (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+
528 r"(?:"
529 r"(?:"
530 r"[a-z0-9\u00a1-\uffff]"
531 r"[a-z0-9\u00a1-\uffff_-]{0,62}"
532 r")?"
533 r"[a-z0-9\u00a1-\uffff]\."
534 r")+"
535 # TLD identifier name, may end with dot
536 r"(?:[a-z\u00a1-\uffff]{2,}\.?)"
537 r")"
538 # port number (optional)
539 r"(:(?P<port>\d{2,5}))?"
540 # resource path (optional)
541 r"(?P<path>\/[^?# ]*)?"
542 # query string (optional)
543 r"(\?(?P<query>[^#]*))?"
544 # fragment (optional)
545 r"(#(?P<fragment>\S*))?"
546 r")"
547 ).set_name("url")
548 """
549 URL (http/https/ftp scheme)
551 .. versionchanged:: 3.1.0
552 ``url`` named group added
553 """
554 # fmt: on
556 # pre-PEP8 compatibility names
557 # fmt: off
558 convertToInteger = staticmethod(replaced_by_pep8("convertToInteger", convert_to_integer))
559 convertToFloat = staticmethod(replaced_by_pep8("convertToFloat", convert_to_float))
560 convertToDate = staticmethod(replaced_by_pep8("convertToDate", convert_to_date))
561 convertToDatetime = staticmethod(replaced_by_pep8("convertToDatetime", convert_to_datetime))
562 stripHTMLTags = staticmethod(replaced_by_pep8("stripHTMLTags", strip_html_tags))
563 upcaseTokens = staticmethod(replaced_by_pep8("upcaseTokens", upcase_tokens))
564 downcaseTokens = staticmethod(replaced_by_pep8("downcaseTokens", downcase_tokens))
565 # fmt: on
568_builtin_exprs = [
569 v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement)
570]