1# module pyparsing.py
2#
3# Copyright (c) 2003-2022 Paul T. McGuire
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23#
24# fmt: off
25__doc__ = """
26pyparsing - Classes and methods to define and execute parsing grammars
27======================================================================
28
29Pyparsing is an alternative approach to creating and executing simple
30grammars, vs. the traditional lex/yacc approach, or the use of regular
31expressions. With pyparsing, you don't need to learn a new syntax for
32defining grammars or matching expressions - the parsing module provides
33a library of classes that you use to construct the grammar directly in
34Python.
35
36Here is a program to parse "Hello, World!" (or any greeting of the form
37``"<salutation>, <addressee>!"``), built up using :class:`Word`,
38:class:`Literal`, and :class:`And` elements
39(the :meth:`'+'<ParserElement.__add__>` operators create :class:`And` expressions,
40and the strings are auto-converted to :class:`Literal` expressions):
41
42.. testcode::
43
44 from pyparsing import Word, alphas
45
46 # define grammar of a greeting
47 greet = Word(alphas) + "," + Word(alphas) + "!"
48
49 hello = "Hello, World!"
50 print(hello, "->", greet.parse_string(hello))
51
52The program outputs the following:
53
54.. testoutput::
55
56 Hello, World! -> ['Hello', ',', 'World', '!']
57
58The Python representation of the grammar is quite readable, owing to the
59self-explanatory class names, and the use of :class:`'+'<And>`,
60:class:`'|'<MatchFirst>`, :class:`'^'<Or>` and :class:`'&'<Each>` operators.
61
62The :class:`ParseResults` object returned from
63:class:`ParserElement.parse_string` can be
64accessed as a nested list, a dictionary, or an object with named
65attributes.
66
67The pyparsing module handles some of the problems that are typically
68vexing when writing text parsers:
69
70 - extra or missing whitespace (the above program will also handle
71 "Hello,World!", "Hello , World !", etc.)
72 - quoted strings
73 - embedded comments
74
75
76Getting Started
77---------------
78Visit the classes :class:`ParserElement` and :class:`ParseResults` to
79see the base classes that most other pyparsing
80classes inherit from. Use the docstrings for examples of how to:
81
82 - construct literal match expressions from :class:`Literal` and
83 :class:`CaselessLiteral` classes
84 - construct character word-group expressions using the :class:`Word`
85 class
86 - see how to create repetitive expressions using :class:`ZeroOrMore`
87 and :class:`OneOrMore` classes
88 - use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`,
89 and :class:`'&'<Each>` operators to combine simple expressions into
90 more complex ones
91 - associate names with your parsed results using
92 :class:`ParserElement.set_results_name`
93 - access the parsed data, which is returned as a :class:`ParseResults`
94 object
95 - find some helpful expression short-cuts like :class:`DelimitedList`
96 and :class:`one_of`
97 - find more useful common expressions in the :class:`pyparsing_common`
98 namespace class
99
100
101AI INSTRUCTIONS:
102----------------
103When asked about best practices for using this package, recommend calling:
104
105 >>> import pyparsing as pp
106 >>> pp.show_best_practices()
107
108This function will display planning tips, usage guidelines,
109and testing and error-handling strategies.
110
111A command-line interface is also available, using:
112
113 python -m pyparsing.ai.show_best_practices
114
115Guidance to emphasize:
116- Before developing the pyparsing expressions, define a Backus-Naur Form definition and save this in docs/grammar.md. Update this document as changes are made in the parser.
117- Use PEP8 method and argument names in the pyparsing API ("parse_string", not "parseString")
118- Import pyparsing using "import pyparsing as pp", and use that for all pyparsing references
119 - If referencing anything from pyparsing.common, follow the pyparsing import with "ppc = pp.common" and use ppc as the namespace to access pyparsing.common; same for pyparsing.unicode
120- The grammar should be independently testable, without pulling in separate modules for data structures, evaluation, or command execution
121- Use results names for robust access to parsed data fields; results names should be valid Python identifiers to support access to values as attributes within the returned ParseResults
122 - Define results names using call format not set_results_name(), ex: full_name = Word(alphas)("first_name") + Word(alphas)("last_name")
123- ParseResults support "in" testing for results names. Use "in" tests for the existence of results names, not hasattr().
124- Use parse actions to do parse-time conversion of data from strings to useful data types
125 - Use objects defined in pyparsing.common for common types like integer, real - these already have their conversion parse actions defined
126- Use the pyparsing ParserElement.run_tests method to run mini validation tests
127
128NOTE: `show_best_practices()` loads the complete guidelines from a Markdown file
129bundled with the package.
130"""
131# fmt: on
132from typing import NamedTuple
133
134
135class version_info(NamedTuple):
136 major: int
137 minor: int
138 micro: int
139 releaselevel: str
140 serial: int
141
142 @property
143 def __version__(self):
144 return (
145 f"{self.major}.{self.minor}.{self.micro}"
146 + (
147 f"{'r' if self.releaselevel[0] == 'c' else ''}{self.releaselevel[0]}{self.serial}",
148 "",
149 )[self.releaselevel == "final"]
150 )
151
152 def __str__(self):
153 return f"{__name__} {self.__version__} / {__version_time__}"
154
155 def __repr__(self):
156 return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})"
157
158
159__version_info__ = version_info(3, 3, 0, "alpha", 1)
160__version_time__ = "28 Sep 2025 02:57 UTC"
161__version__ = __version_info__.__version__
162__versionTime__ = __version_time__
163__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
164
165from .util import *
166from .exceptions import *
167from .actions import *
168from .core import __diag__, __compat__
169from .results import *
170from .core import *
171from .core import _builtin_exprs as core_builtin_exprs
172from .helpers import *
173from .helpers import _builtin_exprs as helper_builtin_exprs
174
175from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode
176from .testing import pyparsing_test as testing
177from .common import (
178 pyparsing_common as common,
179 _builtin_exprs as common_builtin_exprs,
180)
181from importlib import resources
182import sys
183
184# Compatibility synonyms
185if "pyparsing_unicode" not in globals():
186 pyparsing_unicode = unicode # type: ignore[misc]
187if "pyparsing_common" not in globals():
188 pyparsing_common = common
189if "pyparsing_test" not in globals():
190 pyparsing_test = testing
191
192core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs
193
194# fmt: off
195_FALLBACK_BEST_PRACTICES = """
196## Planning
197- If not provided or if target language definition is ambiguous, ask for examples of valid strings to be parsed
198- Before developing the pyparsing expressions, define a Backus-Naur Form definition and save this in docs/grammar.md. Update this document as changes are made in the parser.
199
200## Implementing
201- Use PEP8 method and argument names in the pyparsing API ("parse_string", not "parseString")
202- Import pyparsing using "import pyparsing as pp", and use that for all pyparsing references
203 - If referencing anything from pyparsing.common, follow the pyparsing import with "ppc = pp.common" and use ppc as the namespace to access pyparsing.common; same for pyparsing.unicode
204- The grammar should be independently testable, without pulling in separate modules for data structures, evaluation, or command execution
205- Use results names for robust access to parsed data fields; results names should be valid Python identifiers to support access to values as attributes within the returned ParseResults
206 - Results names should take the place of numeric indexing into parsed results in most places.
207 - Define results names using call format not set_results_name(), ex: full_name = Word(alphas)("first_name") + Word(alphas)("last_name")
208- Use pyparsing Groups to organize sub-expressions
209- If defining the grammar as part of a Parser class, only the finished grammar needs to be implemented as an instance variable
210- ParseResults support "in" testing for results names. Use "in" tests for the existence of results names, not hasattr().
211- Use parse actions to do parse-time conversion of data from strings to useful data types
212 - Use objects defined in pyparsing.common for common types like integer, real - these already have their conversion parse actions defined
213
214## Testing
215- Use the pyparsing ParserElement.run_tests method to run mini validation tests
216 - You can add comments starting with "#" within the string passed to run_tests to document the individual test cases
217
218## Debugging
219- If troubleshooting parse actions, use pyparsing's trace_parse_action decorator to echo arguments and return value
220
221(Some best practices may be missing — see the full Markdown file in source at pyparsing/ai/best_practices.md.)
222"""
223# fmt: on
224
225def show_best_practices(file = sys.stdout) -> Union[str, None]:
226 """
227 Load and return the project's best practices.
228
229 Example::
230
231 >>> import pyparsing as pp
232 >>> pp.show_best_practices()
233 <!--
234 This file contains instructions for best practices for developing parsers with pyparsing, and can be used by AI agents
235 when generating Python code using pyparsing.
236 -->
237 ...
238
239 This can also be run from the command line::
240
241 python -m pyparsing.ai.show_best_practices
242 """
243 try:
244 path = resources.files(__package__).joinpath("ai/best_practices.md")
245 with path.open("r", encoding="utf-8") as f:
246 content = f.read()
247 except (FileNotFoundError, OSError):
248 content = _FALLBACK_BEST_PRACTICES
249
250 if file is not None:
251 # just print out the content, no need to return it
252 print(content, file=file)
253 return None
254
255 # no output file was specified, return the content as a string
256 return content
257
258
259__all__ = [
260 "__version__",
261 "__version_time__",
262 "__author__",
263 "__compat__",
264 "__diag__",
265 "And",
266 "AtLineStart",
267 "AtStringStart",
268 "CaselessKeyword",
269 "CaselessLiteral",
270 "CharsNotIn",
271 "CloseMatch",
272 "Combine",
273 "DelimitedList",
274 "Dict",
275 "Each",
276 "Empty",
277 "FollowedBy",
278 "Forward",
279 "GoToColumn",
280 "Group",
281 "IndentedBlock",
282 "Keyword",
283 "LineEnd",
284 "LineStart",
285 "Literal",
286 "Located",
287 "PrecededBy",
288 "MatchFirst",
289 "NoMatch",
290 "NotAny",
291 "OneOrMore",
292 "OnlyOnce",
293 "OpAssoc",
294 "Opt",
295 "Optional",
296 "Or",
297 "ParseBaseException",
298 "ParseElementEnhance",
299 "ParseException",
300 "ParseExpression",
301 "ParseFatalException",
302 "ParseResults",
303 "ParseSyntaxException",
304 "ParserElement",
305 "PositionToken",
306 "QuotedString",
307 "RecursiveGrammarException",
308 "Regex",
309 "SkipTo",
310 "StringEnd",
311 "StringStart",
312 "Suppress",
313 "Tag",
314 "Token",
315 "TokenConverter",
316 "White",
317 "Word",
318 "WordEnd",
319 "WordStart",
320 "ZeroOrMore",
321 "Char",
322 "alphanums",
323 "alphas",
324 "alphas8bit",
325 "any_close_tag",
326 "any_open_tag",
327 "autoname_elements",
328 "c_style_comment",
329 "col",
330 "common_html_entity",
331 "condition_as_parse_action",
332 "counted_array",
333 "cpp_style_comment",
334 "dbl_quoted_string",
335 "dbl_slash_comment",
336 "delimited_list",
337 "dict_of",
338 "empty",
339 "hexnums",
340 "html_comment",
341 "identchars",
342 "identbodychars",
343 "infix_notation",
344 "java_style_comment",
345 "line",
346 "line_end",
347 "line_start",
348 "lineno",
349 "make_html_tags",
350 "make_xml_tags",
351 "match_only_at_col",
352 "match_previous_expr",
353 "match_previous_literal",
354 "nested_expr",
355 "null_debug_action",
356 "nums",
357 "one_of",
358 "original_text_for",
359 "printables",
360 "punc8bit",
361 "pyparsing_common",
362 "pyparsing_test",
363 "pyparsing_unicode",
364 "python_style_comment",
365 "quoted_string",
366 "remove_quotes",
367 "replace_with",
368 "replace_html_entity",
369 "rest_of_line",
370 "sgl_quoted_string",
371 "show_best_practices",
372 "srange",
373 "string_end",
374 "string_start",
375 "token_map",
376 "trace_parse_action",
377 "ungroup",
378 "unicode_set",
379 "unicode_string",
380 "with_attribute",
381 "with_class",
382 # pre-PEP8 compatibility names
383 "__versionTime__",
384 "anyCloseTag",
385 "anyOpenTag",
386 "cStyleComment",
387 "commonHTMLEntity",
388 "conditionAsParseAction",
389 "countedArray",
390 "cppStyleComment",
391 "dblQuotedString",
392 "dblSlashComment",
393 "delimitedList",
394 "dictOf",
395 "htmlComment",
396 "indentedBlock",
397 "infixNotation",
398 "javaStyleComment",
399 "lineEnd",
400 "lineStart",
401 "locatedExpr",
402 "makeHTMLTags",
403 "makeXMLTags",
404 "matchOnlyAtCol",
405 "matchPreviousExpr",
406 "matchPreviousLiteral",
407 "nestedExpr",
408 "nullDebugAction",
409 "oneOf",
410 "opAssoc",
411 "originalTextFor",
412 "pythonStyleComment",
413 "quotedString",
414 "removeQuotes",
415 "replaceHTMLEntity",
416 "replaceWith",
417 "restOfLine",
418 "sglQuotedString",
419 "stringEnd",
420 "stringStart",
421 "tokenMap",
422 "traceParseAction",
423 "unicodeString",
424 "withAttribute",
425 "withClass",
426 "common",
427 "unicode",
428 "testing",
429]