1# $Id$
2# Author: David Goodger <goodger@python.org>
3# Copyright: This module has been placed in the public domain.
4
5"""
6A finite state machine specialized for regular-expression-based text filters,
7this module defines the following classes:
8
9- `StateMachine`, a state machine
10- `State`, a state superclass
11- `StateMachineWS`, a whitespace-sensitive version of `StateMachine`
12- `StateWS`, a state superclass for use with `StateMachineWS`
13- `SearchStateMachine`, uses `re.search()` instead of `re.match()`
14- `SearchStateMachineWS`, uses `re.search()` instead of `re.match()`
15- `ViewList`, extends standard Python lists.
16- `StringList`, string-specific ViewList.
17
18Exception classes:
19
20- `StateMachineError`
21- `UnknownStateError`
22- `DuplicateStateError`
23- `UnknownTransitionError`
24- `DuplicateTransitionError`
25- `TransitionPatternNotFound`
26- `TransitionMethodNotFound`
27- `UnexpectedIndentationError`
28- `TransitionCorrection`: Raised to switch to another transition.
29- `StateCorrection`: Raised to switch to another state & transition.
30
31Functions:
32
33- `string2lines()`: split a multi-line string into a list of one-line strings
34
35
36How To Use This Module
37======================
38(See the individual classes, methods, and attributes for details.)
39
401. Import it: ``import statemachine`` or ``from statemachine import ...``.
41 You will also need to ``import re``.
42
432. Derive a subclass of `State` (or `StateWS`) for each state in your state
44 machine::
45
46 class MyState(statemachine.State):
47
48 Within the state's class definition:
49
50 a) Include a pattern for each transition, in `State.patterns`::
51
52 patterns = {'atransition': r'pattern', ...}
53
54 b) Include a list of initial transitions to be set up automatically, in
55 `State.initial_transitions`::
56
57 initial_transitions = ['atransition', ...]
58
59 c) Define a method for each transition, with the same name as the
60 transition pattern::
61
62 def atransition(self, match, context, next_state):
63 # do something
64 result = [...] # a list
65 return context, next_state, result
66 # context, next_state may be altered
67
68 Transition methods may raise an `EOFError` to cut processing short.
69
70 d) You may wish to override the `State.bof()` and/or `State.eof()` implicit
71 transition methods, which handle the beginning- and end-of-file.
72
73 e) In order to handle nested processing, you may wish to override the
74 attributes `State.nested_sm` and/or `State.nested_sm_kwargs`.
75
76 If you are using `StateWS` as a base class, in order to handle nested
77 indented blocks, you may wish to:
78
79 - override the attributes `StateWS.indent_sm`,
80 `StateWS.indent_sm_kwargs`, `StateWS.known_indent_sm`, and/or
81 `StateWS.known_indent_sm_kwargs`;
82 - override the `StateWS.blank()` method; and/or
83 - override or extend the `StateWS.indent()`, `StateWS.known_indent()`,
84 and/or `StateWS.firstknown_indent()` methods.
85
863. Create a state machine object::
87
88 sm = StateMachine(state_classes=[MyState, ...],
89 initial_state='MyState')
90
914. Obtain the input text, which needs to be converted into a tab-free list of
92 one-line strings. For example, to read text from a file called
93 'inputfile'::
94
95 with open('inputfile', encoding='utf-8') as fp:
96 input_string = fp.read()
97 input_lines = statemachine.string2lines(input_string)
98
995. Run the state machine on the input text and collect the results, a list::
100
101 results = sm.run(input_lines)
102
1036. Remove any lingering circular references::
104
105 sm.unlink()
106"""
107
108from __future__ import annotations
109
110__docformat__ = 'restructuredtext'
111
112import sys
113import re
114from unicodedata import east_asian_width
115
116from docutils import utils
117
118
119class StateMachine:
120
121 """
122 A finite state machine for text filters using regular expressions.
123
124 The input is provided in the form of a list of one-line strings (no
125 newlines). States are subclasses of the `State` class. Transitions consist
126 of regular expression patterns and transition methods, and are defined in
127 each state.
128
129 The state machine is started with the `run()` method, which returns the
130 results of processing in a list.
131 """
132
133 def __init__(self, state_classes, initial_state,
134 debug=False, parent_state_machine=None) -> None:
135 """
136 Initialize a `StateMachine` object; add state objects.
137
138 Parameters:
139
140 - `state_classes`: a list of `State` (sub)classes.
141 - `initial_state`: a string, the class name of the initial state.
142 - `debug`: a boolean; produce verbose output if true (nonzero).
143 - `parent_state_machine`: the parent of a nested state machine.
144 """
145 self.input_lines = None
146 """`StringList` of input lines (without newlines).
147 Filled by `self.run()`."""
148
149 self.input_offset = 0
150 """Offset of `self.input_lines` from the beginning of the file."""
151
152 self.line = None
153 """Current input line."""
154
155 self.line_offset = -1
156 """Current input line offset from beginning of `self.input_lines`."""
157
158 self.debug = debug
159 """Debugging mode on/off."""
160
161 self.parent_state_machine = parent_state_machine
162 """The instance of the parent state machine or None."""
163
164 self.initial_state = initial_state
165 """The name of the initial state (key to `self.states`)."""
166
167 self.current_state = initial_state
168 """The name of the current state (key to `self.states`)."""
169
170 self.states = {}
171 """Mapping of {state_name: State_object}."""
172
173 self.add_states(state_classes)
174
175 self.observers = []
176 """List of bound methods or functions to call whenever the current
177 line changes. Observers are called with one argument, ``self``.
178 Cleared at the end of `run()`."""
179
180 def unlink(self) -> None:
181 """Remove circular references to objects no longer required."""
182 for state in self.states.values():
183 state.unlink()
184 self.states = None
185
186 def run(self, input_lines, input_offset=0, context=None,
187 input_source=None, initial_state=None):
188 """
189 Run the state machine on `input_lines`. Return results (a list).
190
191 Reset `self.line_offset` and `self.current_state`. Run the
192 beginning-of-file transition. Input one line at a time and check for a
193 matching transition. If a match is found, call the transition method
194 and possibly change the state. Store the context returned by the
195 transition method to be passed on to the next transition matched.
196 Accumulate the results returned by the transition methods in a list.
197 Run the end-of-file transition. Finally, return the accumulated
198 results.
199
200 Parameters:
201
202 - `input_lines`: a list of strings without newlines, or `StringList`.
203 - `input_offset`: the line offset of `input_lines` from the beginning
204 of the file.
205 - `context`: application-specific storage.
206 - `input_source`: name or path of source of `input_lines`.
207 - `initial_state`: name of initial state.
208 """
209 self.runtime_init()
210 if isinstance(input_lines, StringList):
211 self.input_lines = input_lines
212 else:
213 self.input_lines = StringList(input_lines, source=input_source)
214 self.input_offset = input_offset
215 self.line_offset = -1
216 self.current_state = initial_state or self.initial_state
217 if self.debug:
218 print('\nStateMachine.run: input_lines (line_offset=%s):\n| %s'
219 % (self.line_offset, '\n| '.join(self.input_lines)),
220 file=sys.stderr)
221 transitions = None
222 results = []
223 state = self.get_state()
224 try:
225 if self.debug:
226 print('\nStateMachine.run: bof transition', file=sys.stderr)
227 context, result = state.bof(context)
228 results.extend(result)
229 while True:
230 try:
231 try:
232 self.next_line()
233 if self.debug:
234 source, offset = self.input_lines.info(
235 self.line_offset)
236 print(f'\nStateMachine.run: line '
237 f'(source={source!r}, offset={offset!r}):\n'
238 f'| {self.line}', file=sys.stderr)
239 context, next_state, result = self.check_line(
240 context, state, transitions)
241 except EOFError:
242 if self.debug:
243 print('\nStateMachine.run: %s.eof transition'
244 % state.__class__.__name__, file=sys.stderr)
245 result = state.eof(context)
246 results.extend(result)
247 break
248 else:
249 results.extend(result)
250 except TransitionCorrection as exception:
251 self.previous_line() # back up for another try
252 transitions = (exception.args[0],)
253 if self.debug:
254 print('\nStateMachine.run: TransitionCorrection to '
255 f'state "{state.__class__.__name__}", '
256 f'transition {transitions[0]}.',
257 file=sys.stderr)
258 continue
259 except StateCorrection as exception:
260 self.previous_line() # back up for another try
261 next_state = exception.args[0]
262 if len(exception.args) == 1:
263 transitions = None
264 else:
265 transitions = (exception.args[1],)
266 if self.debug:
267 print('\nStateMachine.run: StateCorrection to state '
268 f'"{next_state}", transition {transitions[0]}.',
269 file=sys.stderr)
270 else:
271 transitions = None
272 state = self.get_state(next_state)
273 except: # NoQA: E722 (catchall)
274 if self.debug:
275 self.error()
276 raise
277 self.observers = []
278 return results
279
280 def get_state(self, next_state=None):
281 """
282 Return current state object; set it first if `next_state` given.
283
284 Parameter `next_state`: a string, the name of the next state.
285
286 Exception: `UnknownStateError` raised if `next_state` unknown.
287 """
288 if next_state:
289 if self.debug and next_state != self.current_state:
290 print('\nStateMachine.get_state: Changing state from '
291 '"%s" to "%s" (input line %s).'
292 % (self.current_state, next_state,
293 self.abs_line_number()), file=sys.stderr)
294 self.current_state = next_state
295 try:
296 return self.states[self.current_state]
297 except KeyError:
298 raise UnknownStateError(self.current_state)
299
300 def next_line(self, n=1):
301 """Load `self.line` with the `n`'th next line and return it."""
302 try:
303 try:
304 self.line_offset += n
305 self.line = self.input_lines[self.line_offset]
306 except IndexError:
307 self.line = None
308 raise EOFError
309 return self.line
310 finally:
311 self.notify_observers()
312
313 def is_next_line_blank(self):
314 """Return True if the next line is blank or non-existent."""
315 try:
316 return not self.input_lines[self.line_offset + 1].strip()
317 except IndexError:
318 return 1
319
320 def at_eof(self):
321 """Return 1 if the input is at or past end-of-file."""
322 return self.line_offset >= len(self.input_lines) - 1
323
324 def at_bof(self):
325 """Return 1 if the input is at or before beginning-of-file."""
326 return self.line_offset <= 0
327
328 def previous_line(self, n=1):
329 """Load `self.line` with the `n`'th previous line and return it."""
330 self.line_offset -= n
331 if self.line_offset < 0:
332 self.line = None
333 else:
334 self.line = self.input_lines[self.line_offset]
335 self.notify_observers()
336 return self.line
337
338 def goto_line(self, line_offset):
339 """Jump to absolute line offset `line_offset`, load and return it."""
340 try:
341 try:
342 self.line_offset = line_offset - self.input_offset
343 self.line = self.input_lines[self.line_offset]
344 except IndexError:
345 self.line = None
346 raise EOFError
347 return self.line
348 finally:
349 self.notify_observers()
350
351 def get_source(self, line_offset):
352 """Return source of line at absolute line offset `line_offset`."""
353 return self.input_lines.source(line_offset - self.input_offset)
354
355 def abs_line_offset(self):
356 """Return line offset of current line, from beginning of file."""
357 return self.line_offset + self.input_offset
358
359 def abs_line_number(self):
360 """Return line number of current line (counting from 1)."""
361 return self.line_offset + self.input_offset + 1
362
363 def get_source_and_line(self, lineno=None):
364 """Return (source, line) tuple for current or given line number.
365
366 Looks up the source and line number in the `self.input_lines`
367 StringList instance to count for included source files.
368
369 If the optional argument `lineno` is given, convert it from an
370 absolute line number to the corresponding (source, line) pair.
371 """
372 if lineno is None:
373 offset = self.line_offset
374 else:
375 offset = lineno - self.input_offset - 1
376 try:
377 src, srcoffset = self.input_lines.info(offset)
378 srcline = srcoffset + 1
379 except TypeError:
380 # line is None if index is "Just past the end"
381 src, srcline = self.get_source_and_line(offset + self.input_offset)
382 return src, srcline + 1
383 except IndexError: # `offset` is off the list
384 src, srcline = None, None
385 # raise AssertionError('cannot find line %d in %s lines' %
386 # (offset, len(self.input_lines)))
387 # # list(self.input_lines.lines())))
388 return src, srcline
389
390 def insert_input(self, input_lines, source) -> None:
391 self.input_lines.insert(self.line_offset + 1, '',
392 source='internal padding after '+source,
393 offset=len(input_lines))
394 self.input_lines.insert(self.line_offset + 1, '',
395 source='internal padding before '+source,
396 offset=-1)
397 self.input_lines.insert(self.line_offset + 2,
398 StringList(input_lines, source))
399
400 def get_text_block(self, flush_left=False):
401 """
402 Return a contiguous block of text.
403
404 If `flush_left` is true, raise `UnexpectedIndentationError` if an
405 indented line is encountered before the text block ends (with a blank
406 line).
407 """
408 try:
409 block = self.input_lines.get_text_block(self.line_offset,
410 flush_left)
411 self.next_line(len(block) - 1)
412 return block
413 except UnexpectedIndentationError as err:
414 block = err.args[0]
415 self.next_line(len(block) - 1) # advance to last line of block
416 raise
417
418 def check_line(self, context, state, transitions=None):
419 """
420 Examine one line of input for a transition match & execute its method.
421
422 Parameters:
423
424 - `context`: application-dependent storage.
425 - `state`: a `State` object, the current state.
426 - `transitions`: an optional ordered list of transition names to try,
427 instead of ``state.transition_order``.
428
429 Return the values returned by the transition method:
430
431 - context: possibly modified from the parameter `context`;
432 - next state name (`State` subclass name);
433 - the result output of the transition, a list.
434
435 When there is no match, ``state.no_match()`` is called and its return
436 value is returned.
437 """
438 if transitions is None:
439 transitions = state.transition_order
440 if self.debug:
441 print('\nStateMachine.check_line: state="%s", transitions=%r.'
442 % (state.__class__.__name__, transitions), file=sys.stderr)
443 for name in transitions:
444 pattern, method, next_state = state.transitions[name]
445 match = pattern.match(self.line)
446 if match:
447 if self.debug:
448 print('\nStateMachine.check_line: Matched transition '
449 f'"{name}" in state "{state.__class__.__name__}".',
450 file=sys.stderr)
451 return method(match, context, next_state)
452 else:
453 if self.debug:
454 print('\nStateMachine.check_line: No match in state "%s".'
455 % state.__class__.__name__, file=sys.stderr)
456 return state.no_match(context, transitions)
457
458 def add_state(self, state_class):
459 """
460 Initialize & add a `state_class` (`State` subclass) object.
461
462 Exception: `DuplicateStateError` raised if `state_class` was already
463 added.
464 """
465 statename = state_class.__name__
466 if statename in self.states:
467 raise DuplicateStateError(statename)
468 self.states[statename] = state_class(self, self.debug)
469
470 def add_states(self, state_classes) -> None:
471 """
472 Add `state_classes` (a list of `State` subclasses).
473 """
474 for state_class in state_classes:
475 self.add_state(state_class)
476
477 def runtime_init(self) -> None:
478 """
479 Initialize `self.states`.
480 """
481 for state in self.states.values():
482 state.runtime_init()
483
484 def error(self) -> None:
485 """Report error details."""
486 type_name, value, module, line, function = _exception_data()
487 print('%s: %s' % (type_name, value), file=sys.stderr)
488 print('input line %s' % (self.abs_line_number()), file=sys.stderr)
489 print('module %s, line %s, function %s' % (module, line, function),
490 file=sys.stderr)
491
492 def attach_observer(self, observer) -> None:
493 """
494 The `observer` parameter is a function or bound method which takes two
495 arguments, the source and offset of the current line.
496 """
497 self.observers.append(observer)
498
499 def detach_observer(self, observer) -> None:
500 self.observers.remove(observer)
501
502 def notify_observers(self) -> None:
503 for observer in self.observers:
504 try:
505 info = self.input_lines.info(self.line_offset)
506 except IndexError:
507 info = (None, None)
508 observer(*info)
509
510
511class State:
512
513 """
514 State superclass. Contains a list of transitions, and transition methods.
515
516 Transition methods all have the same signature. They take 3 parameters:
517
518 - An `re` match object. ``match.string`` contains the matched input line,
519 ``match.start()`` gives the start index of the match, and
520 ``match.end()`` gives the end index.
521 - A context object, whose meaning is application-defined (initial value
522 ``None``). It can be used to store any information required by the state
523 machine, and the returned context is passed on to the next transition
524 method unchanged.
525 - The name of the next state, a string, taken from the transitions list;
526 normally it is returned unchanged, but it may be altered by the
527 transition method if necessary.
528
529 Transition methods all return a 3-tuple:
530
531 - A context object, as (potentially) modified by the transition method.
532 - The next state name (a return value of ``None`` means no state change).
533 - The processing result, a list, which is accumulated by the state
534 machine.
535
536 Transition methods may raise an `EOFError` to cut processing short.
537
538 There are two implicit transitions, and corresponding transition methods
539 are defined: `bof()` handles the beginning-of-file, and `eof()` handles
540 the end-of-file. These methods have non-standard signatures and return
541 values. `bof()` returns the initial context and results, and may be used
542 to return a header string, or do any other processing needed. `eof()`
543 should handle any remaining context and wrap things up; it returns the
544 final processing result.
545
546 Typical applications need only subclass `State` (or a subclass), set the
547 `patterns` and `initial_transitions` class attributes, and provide
548 corresponding transition methods. The default object initialization will
549 take care of constructing the list of transitions.
550 """
551
552 patterns = None
553 """
554 {Name: pattern} mapping, used by `make_transition()`. Each pattern may
555 be a string or a compiled `re` pattern. Override in subclasses.
556 """
557
558 initial_transitions = None
559 """
560 A list of transitions to initialize when a `State` is instantiated.
561 Each entry is either a transition name string, or a (transition name, next
562 state name) pair. See `make_transitions()`. Override in subclasses.
563 """
564
565 nested_sm = None
566 """
567 The `StateMachine` class for handling nested processing.
568
569 If left as ``None``, `nested_sm` defaults to the class of the state's
570 controlling state machine. Override it in subclasses to avoid the default.
571 """
572
573 nested_sm_kwargs = None
574 """
575 Keyword arguments dictionary, passed to the `nested_sm` constructor.
576
577 Two keys must have entries in the dictionary:
578
579 - Key 'state_classes' must be set to a list of `State` classes.
580 - Key 'initial_state' must be set to the name of the initial state class.
581
582 If `nested_sm_kwargs` is left as ``None``, 'state_classes' defaults to the
583 class of the current state, and 'initial_state' defaults to the name of
584 the class of the current state. Override in subclasses to avoid the
585 defaults.
586 """
587
588 def __init__(self, state_machine, debug=False) -> None:
589 """
590 Initialize a `State` object; make & add initial transitions.
591
592 Parameters:
593
594 - `statemachine`: the controlling `StateMachine` object.
595 - `debug`: a boolean; produce verbose output if true.
596 """
597
598 self.transition_order = []
599 """A list of transition names in search order."""
600
601 self.transitions = {}
602 """
603 A mapping of transition names to 3-tuples containing
604 (compiled_pattern, transition_method, next_state_name). Initialized as
605 an instance attribute dynamically (instead of as a class attribute)
606 because it may make forward references to patterns and methods in this
607 or other classes.
608 """
609
610 self.add_initial_transitions()
611
612 self.state_machine = state_machine
613 """A reference to the controlling `StateMachine` object."""
614
615 self.debug = debug
616 """Debugging mode on/off."""
617
618 if self.nested_sm is None:
619 self.nested_sm = self.state_machine.__class__
620 if self.nested_sm_kwargs is None:
621 self.nested_sm_kwargs = {'state_classes': [self.__class__],
622 'initial_state': self.__class__.__name__}
623
624 def runtime_init(self) -> None:
625 """
626 Initialize this `State` before running the state machine; called from
627 `self.state_machine.run()`.
628 """
629
630 def unlink(self) -> None:
631 """Remove circular references to objects no longer required."""
632 self.state_machine = None
633
634 def add_initial_transitions(self) -> None:
635 """Make and add transitions listed in `self.initial_transitions`."""
636 if self.initial_transitions:
637 names, transitions = self.make_transitions(
638 self.initial_transitions)
639 self.add_transitions(names, transitions)
640
641 def add_transitions(self, names, transitions):
642 """
643 Add a list of transitions to the start of the transition list.
644
645 Parameters:
646
647 - `names`: a list of transition names.
648 - `transitions`: a mapping of names to transition tuples.
649
650 Exceptions: `DuplicateTransitionError`, `UnknownTransitionError`.
651 """
652 for name in names:
653 if name in self.transitions:
654 raise DuplicateTransitionError(name)
655 if name not in transitions:
656 raise UnknownTransitionError(name)
657 self.transition_order[:0] = names
658 self.transitions.update(transitions)
659
660 def add_transition(self, name, transition):
661 """
662 Add a transition to the start of the transition list.
663
664 Parameter `transition`: a ready-made transition 3-tuple.
665
666 Exception: `DuplicateTransitionError`.
667 """
668 if name in self.transitions:
669 raise DuplicateTransitionError(name)
670 self.transition_order[:0] = [name]
671 self.transitions[name] = transition
672
673 def remove_transition(self, name):
674 """
675 Remove a transition by `name`.
676
677 Exception: `UnknownTransitionError`.
678 """
679 try:
680 del self.transitions[name]
681 self.transition_order.remove(name)
682 except: # NoQA: E722 (catchall)
683 raise UnknownTransitionError(name)
684
685 def make_transition(self, name, next_state=None):
686 """
687 Make & return a transition tuple based on `name`.
688
689 This is a convenience function to simplify transition creation.
690
691 Parameters:
692
693 - `name`: a string, the name of the transition pattern & method. This
694 `State` object must have a method called '`name`', and a dictionary
695 `self.patterns` containing a key '`name`'.
696 - `next_state`: a string, the name of the next `State` object for this
697 transition. A value of ``None`` (or absent) implies no state change
698 (i.e., continue with the same state).
699
700 Exceptions: `TransitionPatternNotFound`, `TransitionMethodNotFound`.
701 """
702 if next_state is None:
703 next_state = self.__class__.__name__
704 try:
705 pattern = self.patterns[name]
706 if not hasattr(pattern, 'match'):
707 pattern = self.patterns[name] = re.compile(pattern)
708 except KeyError:
709 raise TransitionPatternNotFound(
710 '%s.patterns[%r]' % (self.__class__.__name__, name))
711 try:
712 method = getattr(self, name)
713 except AttributeError:
714 raise TransitionMethodNotFound(
715 '%s.%s' % (self.__class__.__name__, name))
716 return pattern, method, next_state
717
718 def make_transitions(self, name_list):
719 """
720 Return a list of transition names and a transition mapping.
721
722 Parameter `name_list`: a list, where each entry is either a transition
723 name string, or a 1- or 2-tuple (transition name, optional next state
724 name).
725 """
726 names = []
727 transitions = {}
728 for namestate in name_list:
729 if isinstance(namestate, str):
730 transitions[namestate] = self.make_transition(namestate)
731 names.append(namestate)
732 else:
733 transitions[namestate[0]] = self.make_transition(*namestate)
734 names.append(namestate[0])
735 return names, transitions
736
737 def no_match(self, context, transitions):
738 """
739 Called when there is no match from `StateMachine.check_line()`.
740
741 Return the same values returned by transition methods:
742
743 - context: unchanged;
744 - next state name: ``None``;
745 - empty result list.
746
747 Override in subclasses to catch this event.
748 """
749 return context, None, []
750
751 def bof(self, context):
752 """
753 Handle beginning-of-file. Return unchanged `context`, empty result.
754
755 Override in subclasses.
756
757 Parameter `context`: application-defined storage.
758 """
759 return context, []
760
761 def eof(self, context):
762 """
763 Handle end-of-file. Return empty result.
764
765 Override in subclasses.
766
767 Parameter `context`: application-defined storage.
768 """
769 return []
770
771 def nop(self, match, context, next_state):
772 """
773 A "do nothing" transition method.
774
775 Return unchanged `context` & `next_state`, empty result. Useful for
776 simple state changes (actionless transitions).
777 """
778 return context, next_state, []
779
780
781class StateMachineWS(StateMachine):
782
783 """
784 `StateMachine` subclass specialized for whitespace recognition.
785
786 There are three methods provided for extracting indented text blocks:
787
788 - `get_indented()`: use when the indent is unknown.
789 - `get_known_indented()`: use when the indent is known for all lines.
790 - `get_first_known_indented()`: use when only the first line's indent is
791 known.
792 """
793
794 def get_indented(self, until_blank=False, strip_indent=True):
795 """
796 Return a block of indented lines of text, and info.
797
798 Extract an indented block where the indent is unknown for all lines.
799
800 :Parameters:
801 - `until_blank`: Stop collecting at the first blank line if true.
802 - `strip_indent`: Strip common leading indent if true (default).
803
804 :Return:
805 - the indented block (a list of lines of text),
806 - its indent,
807 - its first line offset from BOF, and
808 - whether or not it finished with a blank line.
809 """
810 offset = self.abs_line_offset()
811 indented, indent, blank_finish = self.input_lines.get_indented(
812 self.line_offset, until_blank, strip_indent)
813 if indented:
814 self.next_line(len(indented) - 1) # advance to last indented line
815 while indented and not indented[0].strip():
816 indented.trim_start()
817 offset += 1
818 return indented, indent, offset, blank_finish
819
820 def get_known_indented(self, indent, until_blank=False, strip_indent=True):
821 """
822 Return an indented block and info.
823
824 Extract an indented block where the indent is known for all lines.
825 Starting with the current line, extract the entire text block with at
826 least `indent` indentation (which must be whitespace, except for the
827 first line).
828
829 :Parameters:
830 - `indent`: The number of indent columns/characters.
831 - `until_blank`: Stop collecting at the first blank line if true.
832 - `strip_indent`: Strip `indent` characters of indentation if true
833 (default).
834
835 :Return:
836 - the indented block,
837 - its first line offset from BOF, and
838 - whether or not it finished with a blank line.
839 """
840 offset = self.abs_line_offset()
841 indented, indent, blank_finish = self.input_lines.get_indented(
842 self.line_offset, until_blank, strip_indent,
843 block_indent=indent)
844 self.next_line(len(indented) - 1) # advance to last indented line
845 while indented and not indented[0].strip():
846 indented.trim_start()
847 offset += 1
848 return indented, offset, blank_finish
849
850 def get_first_known_indented(self, indent, until_blank=False,
851 strip_indent=True, strip_top=True):
852 """
853 Return an indented block and info.
854
855 Extract an indented block where the indent is known for the first line
856 and unknown for all other lines.
857
858 :Parameters:
859 - `indent`: The first line's indent (# of columns/characters).
860 - `until_blank`: Stop collecting at the first blank line if true
861 (1).
862 - `strip_indent`: Strip `indent` characters of indentation if true
863 (1, default).
864 - `strip_top`: Strip blank lines from the beginning of the block.
865
866 :Return:
867 - the indented block,
868 - its indent,
869 - its first line offset from BOF, and
870 - whether or not it finished with a blank line.
871 """
872 offset = self.abs_line_offset()
873 indented, indent, blank_finish = self.input_lines.get_indented(
874 self.line_offset, until_blank, strip_indent,
875 first_indent=indent)
876 self.next_line(len(indented) - 1) # advance to last indented line
877 if strip_top:
878 while indented and not indented[0].strip():
879 indented.trim_start()
880 offset += 1
881 return indented, indent, offset, blank_finish
882
883
884class StateWS(State):
885
886 """
887 State superclass specialized for whitespace (blank lines & indents).
888
889 Use this class with `StateMachineWS`. The transitions 'blank' (for blank
890 lines) and 'indent' (for indented text blocks) are added automatically,
891 before any other transitions. The transition method `blank()` handles
892 blank lines and `indent()` handles nested indented blocks. Indented
893 blocks trigger a new state machine to be created by `indent()` and run.
894 The class of the state machine to be created is in `indent_sm`, and the
895 constructor keyword arguments are in the dictionary `indent_sm_kwargs`.
896
897 The methods `known_indent()` and `firstknown_indent()` are provided for
898 indented blocks where the indent (all lines' and first line's only,
899 respectively) is known to the transition method, along with the attributes
900 `known_indent_sm` and `known_indent_sm_kwargs`. Neither transition method
901 is triggered automatically.
902 """
903
904 indent_sm = None
905 """
906 The `StateMachine` class handling indented text blocks.
907
908 If left as ``None``, `indent_sm` defaults to the value of
909 `State.nested_sm`. Override it in subclasses to avoid the default.
910 """
911
912 indent_sm_kwargs = None
913 """
914 Keyword arguments dictionary, passed to the `indent_sm` constructor.
915
916 If left as ``None``, `indent_sm_kwargs` defaults to the value of
917 `State.nested_sm_kwargs`. Override it in subclasses to avoid the default.
918 """
919
920 known_indent_sm = None
921 """
922 The `StateMachine` class handling known-indented text blocks.
923
924 If left as ``None``, `known_indent_sm` defaults to the value of
925 `indent_sm`. Override it in subclasses to avoid the default.
926 """
927
928 known_indent_sm_kwargs = None
929 """
930 Keyword arguments dictionary, passed to the `known_indent_sm` constructor.
931
932 If left as ``None``, `known_indent_sm_kwargs` defaults to the value of
933 `indent_sm_kwargs`. Override it in subclasses to avoid the default.
934 """
935
936 ws_patterns = {'blank': re.compile(' *$'),
937 'indent': re.compile(' +')}
938 """Patterns for default whitespace transitions. May be overridden in
939 subclasses."""
940
941 ws_initial_transitions = ('blank', 'indent')
942 """Default initial whitespace transitions, added before those listed in
943 `State.initial_transitions`. May be overridden in subclasses."""
944
945 def __init__(self, state_machine, debug=False) -> None:
946 """
947 Initialize a `StateSM` object; extends `State.__init__()`.
948
949 Check for indent state machine attributes, set defaults if not set.
950 """
951 State.__init__(self, state_machine, debug)
952 if self.indent_sm is None:
953 self.indent_sm = self.nested_sm
954 if self.indent_sm_kwargs is None:
955 self.indent_sm_kwargs = self.nested_sm_kwargs
956 if self.known_indent_sm is None:
957 self.known_indent_sm = self.indent_sm
958 if self.known_indent_sm_kwargs is None:
959 self.known_indent_sm_kwargs = self.indent_sm_kwargs
960
961 def add_initial_transitions(self) -> None:
962 """
963 Add whitespace-specific transitions before those defined in subclass.
964
965 Extends `State.add_initial_transitions()`.
966 """
967 State.add_initial_transitions(self)
968 if self.patterns is None:
969 self.patterns = {}
970 self.patterns.update(self.ws_patterns)
971 names, transitions = self.make_transitions(
972 self.ws_initial_transitions)
973 self.add_transitions(names, transitions)
974
975 def blank(self, match, context, next_state):
976 """Handle blank lines. Does nothing. Override in subclasses."""
977 return self.nop(match, context, next_state)
978
979 def indent(self, match, context, next_state):
980 """
981 Handle an indented text block. Extend or override in subclasses.
982
983 Recursively run the registered state machine for indented blocks
984 (`self.indent_sm`).
985 """
986 (indented, indent, line_offset, blank_finish
987 ) = self.state_machine.get_indented()
988 sm = self.indent_sm(debug=self.debug, **self.indent_sm_kwargs)
989 results = sm.run(indented, input_offset=line_offset)
990 return context, next_state, results
991
992 def known_indent(self, match, context, next_state):
993 """
994 Handle a known-indent text block. Extend or override in subclasses.
995
996 Recursively run the registered state machine for known-indent indented
997 blocks (`self.known_indent_sm`). The indent is the length of the
998 match, ``match.end()``.
999 """
1000 (indented, line_offset, blank_finish
1001 ) = self.state_machine.get_known_indented(match.end())
1002 sm = self.known_indent_sm(debug=self.debug,
1003 **self.known_indent_sm_kwargs)
1004 results = sm.run(indented, input_offset=line_offset)
1005 return context, next_state, results
1006
1007 def first_known_indent(self, match, context, next_state):
1008 """
1009 Handle an indented text block (first line's indent known).
1010
1011 Extend or override in subclasses.
1012
1013 Recursively run the registered state machine for known-indent indented
1014 blocks (`self.known_indent_sm`). The indent is the length of the
1015 match, ``match.end()``.
1016 """
1017 (indented, line_offset, blank_finish
1018 ) = self.state_machine.get_first_known_indented(match.end())
1019 sm = self.known_indent_sm(debug=self.debug,
1020 **self.known_indent_sm_kwargs)
1021 results = sm.run(indented, input_offset=line_offset)
1022 return context, next_state, results
1023
1024
1025class _SearchOverride:
1026
1027 """
1028 Mix-in class to override `StateMachine` regular expression behavior.
1029
1030 Changes regular expression matching, from the default `re.match()`
1031 (succeeds only if the pattern matches at the start of `self.line`) to
1032 `re.search()` (succeeds if the pattern matches anywhere in `self.line`).
1033 When subclassing a `StateMachine`, list this class **first** in the
1034 inheritance list of the class definition.
1035 """
1036
1037 def match(self, pattern):
1038 """
1039 Return the result of a regular expression search.
1040
1041 Overrides `StateMachine.match()`.
1042
1043 Parameter `pattern`: `re` compiled regular expression.
1044 """
1045 return pattern.search(self.line)
1046
1047
1048class SearchStateMachine(_SearchOverride, StateMachine):
1049 """`StateMachine` which uses `re.search()` instead of `re.match()`."""
1050
1051
1052class SearchStateMachineWS(_SearchOverride, StateMachineWS):
1053 """`StateMachineWS` which uses `re.search()` instead of `re.match()`."""
1054
1055
1056class ViewList:
1057
1058 """
1059 List with extended functionality: slices of ViewList objects are child
1060 lists, linked to their parents. Changes made to a child list also affect
1061 the parent list. A child list is effectively a "view" (in the SQL sense)
1062 of the parent list. Changes to parent lists, however, do *not* affect
1063 active child lists. If a parent list is changed, any active child lists
1064 should be recreated.
1065
1066 The start and end of the slice can be trimmed using the `trim_start()` and
1067 `trim_end()` methods, without affecting the parent list. The link between
1068 child and parent lists can be broken by calling `disconnect()` on the
1069 child list.
1070
1071 Also, ViewList objects keep track of the source & offset of each item.
1072 This information is accessible via the `source()`, `offset()`, and
1073 `info()` methods.
1074 """
1075
1076 def __init__(self, initlist=None, source=None, items=None,
1077 parent=None, parent_offset=None) -> None:
1078 self.data = []
1079 """The actual list of data, flattened from various sources."""
1080
1081 self.items = []
1082 """A list of (source, offset) pairs, same length as `self.data`: the
1083 source of each line and the offset of each line from the beginning of
1084 its source."""
1085
1086 self.parent = parent
1087 """The parent list."""
1088
1089 self.parent_offset = parent_offset
1090 """Offset of this list from the beginning of the parent list."""
1091
1092 if isinstance(initlist, ViewList):
1093 self.data = initlist.data[:]
1094 self.items = initlist.items[:]
1095 elif initlist is not None:
1096 self.data = list(initlist)
1097 if items:
1098 self.items = items
1099 else:
1100 self.items = [(source, i) for i in range(len(initlist))]
1101 assert len(self.data) == len(self.items), 'data mismatch'
1102
1103 def __str__(self) -> str:
1104 return str(self.data)
1105
1106 def __repr__(self) -> str:
1107 return f'{self.__class__.__name__}({self.data}, items={self.items})'
1108
1109 def __lt__(self, other):
1110 return self.data < self.__cast(other)
1111
1112 def __le__(self, other):
1113 return self.data <= self.__cast(other)
1114
1115 def __eq__(self, other):
1116 return self.data == self.__cast(other)
1117
1118 def __ne__(self, other):
1119 return self.data != self.__cast(other)
1120
1121 def __gt__(self, other):
1122 return self.data > self.__cast(other)
1123
1124 def __ge__(self, other):
1125 return self.data >= self.__cast(other)
1126
1127 def __cast(self, other):
1128 if isinstance(other, ViewList):
1129 return other.data
1130 else:
1131 return other
1132
1133 def __contains__(self, item) -> bool:
1134 return item in self.data
1135
1136 def __len__(self) -> int:
1137 return len(self.data)
1138
1139 # The __getitem__()/__setitem__() methods check whether the index
1140 # is a slice first, since indexing a native list with a slice object
1141 # just works.
1142
1143 def __getitem__(self, i):
1144 if isinstance(i, slice):
1145 assert i.step in (None, 1), 'cannot handle slice with stride'
1146 return self.__class__(self.data[i.start:i.stop],
1147 items=self.items[i.start:i.stop],
1148 parent=self, parent_offset=i.start or 0)
1149 else:
1150 return self.data[i]
1151
1152 def __setitem__(self, i, item) -> None:
1153 if isinstance(i, slice):
1154 assert i.step in (None, 1), 'cannot handle slice with stride'
1155 if not isinstance(item, ViewList):
1156 raise TypeError('assigning non-ViewList to ViewList slice')
1157 self.data[i.start:i.stop] = item.data
1158 self.items[i.start:i.stop] = item.items
1159 assert len(self.data) == len(self.items), 'data mismatch'
1160 if self.parent:
1161 k = (i.start or 0) + self.parent_offset
1162 n = (i.stop or len(self)) + self.parent_offset
1163 self.parent[k:n] = item
1164 else:
1165 self.data[i] = item
1166 if self.parent:
1167 self.parent[i + self.parent_offset] = item
1168
1169 def __delitem__(self, i) -> None:
1170 try:
1171 del self.data[i]
1172 del self.items[i]
1173 if self.parent:
1174 del self.parent[i + self.parent_offset]
1175 except TypeError:
1176 assert i.step is None, 'cannot handle slice with stride'
1177 del self.data[i.start:i.stop]
1178 del self.items[i.start:i.stop]
1179 if self.parent:
1180 k = (i.start or 0) + self.parent_offset
1181 n = (i.stop or len(self)) + self.parent_offset
1182 del self.parent[k:n]
1183
1184 def __add__(self, other):
1185 if isinstance(other, ViewList):
1186 return self.__class__(self.data + other.data,
1187 items=(self.items + other.items))
1188 else:
1189 raise TypeError('adding non-ViewList to a ViewList')
1190
1191 def __radd__(self, other):
1192 if isinstance(other, ViewList):
1193 return self.__class__(other.data + self.data,
1194 items=(other.items + self.items))
1195 else:
1196 raise TypeError('adding ViewList to a non-ViewList')
1197
1198 def __iadd__(self, other):
1199 if isinstance(other, ViewList):
1200 self.data += other.data
1201 else:
1202 raise TypeError('argument to += must be a ViewList')
1203 return self
1204
1205 def __mul__(self, n):
1206 return self.__class__(self.data * n, items=(self.items * n))
1207
1208 __rmul__ = __mul__
1209
1210 def __imul__(self, n):
1211 self.data *= n
1212 self.items *= n
1213 return self
1214
1215 def extend(self, other):
1216 if not isinstance(other, ViewList):
1217 raise TypeError('extending a ViewList with a non-ViewList')
1218 if self.parent:
1219 self.parent.insert(len(self.data) + self.parent_offset, other)
1220 self.data.extend(other.data)
1221 self.items.extend(other.items)
1222
1223 def append(self, item, source=None, offset=0) -> None:
1224 if source is None:
1225 self.extend(item)
1226 else:
1227 if self.parent:
1228 self.parent.insert(len(self.data) + self.parent_offset, item,
1229 source, offset)
1230 self.data.append(item)
1231 self.items.append((source, offset))
1232
1233 def insert(self, i, item, source=None, offset=0):
1234 if source is None:
1235 if not isinstance(item, ViewList):
1236 raise TypeError('inserting non-ViewList with no source given')
1237 self.data[i:i] = item.data
1238 self.items[i:i] = item.items
1239 if self.parent:
1240 index = (len(self.data) + i) % len(self.data)
1241 self.parent.insert(index + self.parent_offset, item)
1242 else:
1243 self.data.insert(i, item)
1244 self.items.insert(i, (source, offset))
1245 if self.parent:
1246 index = (len(self.data) + i) % len(self.data)
1247 self.parent.insert(index + self.parent_offset, item,
1248 source, offset)
1249
1250 def pop(self, i=-1):
1251 if self.parent:
1252 index = (len(self.data) + i) % len(self.data)
1253 self.parent.pop(index + self.parent_offset)
1254 self.items.pop(i)
1255 return self.data.pop(i)
1256
1257 def trim_start(self, n=1):
1258 """
1259 Remove items from the start of the list, without touching the parent.
1260 """
1261 if n > len(self.data):
1262 raise IndexError("Size of trim too large; can't trim %s items "
1263 "from a list of size %s." % (n, len(self.data)))
1264 elif n < 0:
1265 raise IndexError('Trim size must be >= 0.')
1266 del self.data[:n]
1267 del self.items[:n]
1268 if self.parent:
1269 self.parent_offset += n
1270
1271 def trim_end(self, n=1):
1272 """
1273 Remove items from the end of the list, without touching the parent.
1274 """
1275 if n > len(self.data):
1276 raise IndexError("Size of trim too large; can't trim %s items "
1277 "from a list of size %s." % (n, len(self.data)))
1278 elif n < 0:
1279 raise IndexError('Trim size must be >= 0.')
1280 del self.data[-n:]
1281 del self.items[-n:]
1282
1283 def remove(self, item) -> None:
1284 index = self.index(item)
1285 del self[index]
1286
1287 def count(self, item):
1288 return self.data.count(item)
1289
1290 def index(self, item):
1291 return self.data.index(item)
1292
1293 def reverse(self) -> None:
1294 self.data.reverse()
1295 self.items.reverse()
1296 self.parent = None
1297
1298 def sort(self, *args) -> None:
1299 tmp = sorted(zip(self.data, self.items), *args)
1300 self.data = [entry[0] for entry in tmp]
1301 self.items = [entry[1] for entry in tmp]
1302 self.parent = None
1303
1304 def info(self, i):
1305 """Return source & offset for index `i`."""
1306 try:
1307 return self.items[i]
1308 except IndexError:
1309 if i == len(self.data): # Just past the end
1310 return self.items[i - 1][0], None
1311 else:
1312 raise
1313
1314 def source(self, i):
1315 """Return source for index `i`."""
1316 return self.info(i)[0]
1317
1318 def offset(self, i):
1319 """Return offset for index `i`."""
1320 return self.info(i)[1]
1321
1322 def disconnect(self) -> None:
1323 """Break link between this list and parent list."""
1324 self.parent = None
1325
1326 def xitems(self):
1327 """Return iterator yielding (source, offset, value) tuples."""
1328 for (value, (source, offset)) in zip(self.data, self.items):
1329 yield source, offset, value
1330
1331 def pprint(self) -> None:
1332 """Print the list in `grep` format (`source:offset:value` lines)"""
1333 for line in self.xitems():
1334 print("%s:%d:%s" % line)
1335
1336
1337class StringList(ViewList):
1338
1339 """A `ViewList` with string-specific methods."""
1340
1341 def trim_left(self, length, start=0, end=sys.maxsize) -> None:
1342 """
1343 Trim `length` characters off the beginning of each item, in-place,
1344 from index `start` to `end`. No whitespace-checking is done on the
1345 trimmed text. Does not affect slice parent.
1346 """
1347 self.data[start:end] = [line[length:]
1348 for line in self.data[start:end]]
1349
1350 def get_text_block(self, start, flush_left=False):
1351 """
1352 Return a contiguous block of text.
1353
1354 If `flush_left` is true, raise `UnexpectedIndentationError` if an
1355 indented line is encountered before the text block ends (with a blank
1356 line).
1357 """
1358 end = start
1359 last = len(self.data)
1360 while end < last:
1361 line = self.data[end]
1362 if not line.strip():
1363 break
1364 if flush_left and (line[0] == ' '):
1365 source, offset = self.info(end)
1366 raise UnexpectedIndentationError(self[start:end], source,
1367 offset + 1)
1368 end += 1
1369 return self[start:end]
1370
1371 def get_indented(self, start=0, until_blank=False, strip_indent=True,
1372 block_indent=None, first_indent=None):
1373 """
1374 Extract and return a StringList of indented lines of text.
1375
1376 Collect all lines with indentation, determine the minimum indentation,
1377 remove the minimum indentation from all indented lines (unless
1378 `strip_indent` is false), and return them. All lines up to but not
1379 including the first unindented line will be returned.
1380
1381 :Parameters:
1382 - `start`: The index of the first line to examine.
1383 - `until_blank`: Stop collecting at the first blank line if true.
1384 - `strip_indent`: Strip common leading indent if true (default).
1385 - `block_indent`: The indent of the entire block, if known.
1386 - `first_indent`: The indent of the first line, if known.
1387
1388 :Return:
1389 - a StringList of indented lines with minimum indent removed;
1390 - the amount of the indent;
1391 - a boolean: did the indented block finish with a blank line or EOF?
1392 """
1393 indent = block_indent # start with None if unknown
1394 end = start
1395 if block_indent is not None and first_indent is None:
1396 first_indent = block_indent
1397 if first_indent is not None:
1398 end += 1
1399 last = len(self.data)
1400 while end < last:
1401 line = self.data[end]
1402 if line and (line[0] != ' '
1403 or (block_indent is not None
1404 and line[:block_indent].strip())):
1405 # Line not indented or insufficiently indented.
1406 # Block finished properly iff the last indented line blank:
1407 blank_finish = ((end > start)
1408 and not self.data[end - 1].strip())
1409 break
1410 stripped = line.lstrip()
1411 if not stripped: # blank line
1412 if until_blank:
1413 blank_finish = True
1414 break
1415 elif block_indent is None:
1416 line_indent = len(line) - len(stripped)
1417 if indent is None:
1418 indent = line_indent
1419 else:
1420 indent = min(indent, line_indent)
1421 end += 1
1422 else:
1423 blank_finish = True # block ends at end of lines
1424 block = self[start:end]
1425 if first_indent is not None and block:
1426 block.data[0] = block.data[0][first_indent:]
1427 if indent and strip_indent:
1428 block.trim_left(indent, start=(first_indent is not None))
1429 return block, indent or 0, blank_finish
1430
1431 def get_2D_block(self, top, left, bottom, right, strip_indent=True):
1432 block = self[top:bottom]
1433 indent = right
1434 for i in range(len(block.data)):
1435 # get slice from line, care for combining characters
1436 ci = utils.column_indices(block.data[i])
1437 try:
1438 left = ci[left]
1439 except IndexError:
1440 left += len(block.data[i]) - len(ci)
1441 try:
1442 right = ci[right]
1443 except IndexError:
1444 right += len(block.data[i]) - len(ci)
1445 block.data[i] = line = block.data[i][left:right].rstrip()
1446 if line:
1447 indent = min(indent, len(line) - len(line.lstrip()))
1448 if strip_indent and 0 < indent < right:
1449 block.data = [line[indent:] for line in block.data]
1450 return block
1451
1452 def pad_double_width(self, pad_char) -> None:
1453 """Pad all double-width characters in `self` appending `pad_char`.
1454
1455 For East Asian language support.
1456 """
1457 for i in range(len(self.data)):
1458 line = self.data[i]
1459 if isinstance(line, str):
1460 new = []
1461 for char in line:
1462 new.append(char)
1463 if east_asian_width(char) in 'WF': # Wide & Full-width
1464 new.append(pad_char)
1465 self.data[i] = ''.join(new)
1466
1467 def replace(self, old, new) -> None:
1468 """Replace all occurrences of substring `old` with `new`."""
1469 for i in range(len(self.data)):
1470 self.data[i] = self.data[i].replace(old, new)
1471
1472
1473class StateMachineError(Exception): pass
1474class UnknownStateError(StateMachineError): pass
1475class DuplicateStateError(StateMachineError): pass
1476class UnknownTransitionError(StateMachineError): pass
1477class DuplicateTransitionError(StateMachineError): pass
1478class TransitionPatternNotFound(StateMachineError): pass
1479class TransitionMethodNotFound(StateMachineError): pass
1480class UnexpectedIndentationError(StateMachineError): pass
1481
1482
1483class TransitionCorrection(Exception):
1484
1485 """
1486 Raise from within a transition method to switch to another transition.
1487
1488 Raise with one argument, the new transition name.
1489 """
1490
1491
1492class StateCorrection(Exception):
1493
1494 """
1495 Raise from within a transition method to switch to another state.
1496
1497 Raise with one or two arguments: new state name, and an optional new
1498 transition name.
1499 """
1500
1501
1502def string2lines(astring, tab_width=8, convert_whitespace=False,
1503 whitespace=re.compile('[\v\f]')):
1504 """
1505 Return a list of one-line strings with tabs expanded, no newlines, and
1506 trailing whitespace stripped.
1507
1508 Each tab is expanded with between 1 and `tab_width` spaces, so that the
1509 next character's index becomes a multiple of `tab_width` (8 by default).
1510
1511 Parameters:
1512
1513 - `astring`: a multi-line string.
1514 - `tab_width`: the number of columns between tab stops.
1515 - `convert_whitespace`: convert form feeds and vertical tabs to spaces?
1516 - `whitespace`: pattern object with the to-be-converted
1517 whitespace characters (default [\\v\\f]).
1518 """
1519 if convert_whitespace:
1520 astring = whitespace.sub(' ', astring)
1521 return [s.expandtabs(tab_width).rstrip() for s in astring.splitlines()]
1522
1523
1524def _exception_data():
1525 """
1526 Return exception information:
1527
1528 - the exception's class name;
1529 - the exception object;
1530 - the name of the file containing the offending code;
1531 - the line number of the offending code;
1532 - the function name of the offending code.
1533 """
1534 typ, value, traceback = sys.exc_info()
1535 while traceback.tb_next:
1536 traceback = traceback.tb_next
1537 code = traceback.tb_frame.f_code
1538 return (typ.__name__, value, code.co_filename, traceback.tb_lineno,
1539 code.co_name)