Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/docutils/statemachine.py: 73%

1# $Id$

2# Author: David Goodger <goodger@python.org>

3# Copyright: This module has been placed in the public domain.

5"""

6A finite state machine specialized for regular-expression-based text filters,

7this module defines the following classes:

9- `StateMachine`, a state machine

10- `State`, a state superclass

11- `StateMachineWS`, a whitespace-sensitive version of `StateMachine`

12- `StateWS`, a state superclass for use with `StateMachineWS`

13- `SearchStateMachine`, uses `re.search()` instead of `re.match()`

14- `SearchStateMachineWS`, uses `re.search()` instead of `re.match()`

15- `ViewList`, extends standard Python lists.

16- `StringList`, string-specific ViewList.

18Exception classes:

20- `StateMachineError`

21- `UnknownStateError`

22- `DuplicateStateError`

23- `UnknownTransitionError`

24- `DuplicateTransitionError`

25- `TransitionPatternNotFound`

26- `TransitionMethodNotFound`

27- `UnexpectedIndentationError`

28- `TransitionCorrection`: Raised to switch to another transition.

29- `StateCorrection`: Raised to switch to another state & transition.

31Functions:

33- `string2lines()`: split a multi-line string into a list of one-line strings

36How To Use This Module

37======================

38(See the individual classes, methods, and attributes for details.)

401. Import it: ``import statemachine`` or ``from statemachine import ...``.

41 You will also need to ``import re``.

432. Derive a subclass of `State` (or `StateWS`) for each state in your state

44 machine::

46 class MyState(statemachine.State):

48 Within the state's class definition:

50 a) Include a pattern for each transition, in `State.patterns`::

52 patterns = {'atransition': r'pattern', ...}

54 b) Include a list of initial transitions to be set up automatically, in

55 `State.initial_transitions`::

57 initial_transitions = ['atransition', ...]

59 c) Define a method for each transition, with the same name as the

60 transition pattern::

62 def atransition(self, match, context, next_state):

63 # do something

64 result = [...] # a list

65 return context, next_state, result

66 # context, next_state may be altered

68 Transition methods may raise an `EOFError` to cut processing short.

70 d) You may wish to override the `State.bof()` and/or `State.eof()` implicit

71 transition methods, which handle the beginning- and end-of-file.

73 e) In order to handle nested processing, you may wish to override the

74 attributes `State.nested_sm` and/or `State.nested_sm_kwargs`.

76 If you are using `StateWS` as a base class, in order to handle nested

77 indented blocks, you may wish to:

79 - override the attributes `StateWS.indent_sm`,

80 `StateWS.indent_sm_kwargs`, `StateWS.known_indent_sm`, and/or

81 `StateWS.known_indent_sm_kwargs`;

82 - override the `StateWS.blank()` method; and/or

83 - override or extend the `StateWS.indent()`, `StateWS.known_indent()`,

84 and/or `StateWS.firstknown_indent()` methods.

863. Create a state machine object::

88 sm = StateMachine(state_classes=[MyState, ...],

89 initial_state='MyState')

914. Obtain the input text, which needs to be converted into a tab-free list of

92 one-line strings. For example, to read text from a file called

93 'inputfile'::

95 with open('inputfile', encoding='utf-8') as fp:

96 input_string = fp.read()

97 input_lines = statemachine.string2lines(input_string)

995. Run the state machine on the input text and collect the results, a list::

100

101 results = sm.run(input_lines)

102

1036. Remove any lingering circular references::

104

105 sm.unlink()

106"""

107

108__docformat__ = 'restructuredtext'

109

110import sys

111import re

112from unicodedata import east_asian_width

113

114from docutils import utils

115

116

117class StateMachine:

118

119 """

120 A finite state machine for text filters using regular expressions.

121

122 The input is provided in the form of a list of one-line strings (no

123 newlines). States are subclasses of the `State` class. Transitions consist

124 of regular expression patterns and transition methods, and are defined in

125 each state.

126

127 The state machine is started with the `run()` method, which returns the

128 results of processing in a list.

129 """

130

131 def __init__(self, state_classes, initial_state, debug=False):

132 """

133 Initialize a `StateMachine` object; add state objects.

134

135 Parameters:

136

137 - `state_classes`: a list of `State` (sub)classes.

138 - `initial_state`: a string, the class name of the initial state.

139 - `debug`: a boolean; produce verbose output if true (nonzero).

140 """

141

142 self.input_lines = None

143 """`StringList` of input lines (without newlines).

144 Filled by `self.run()`."""

145

146 self.input_offset = 0

147 """Offset of `self.input_lines` from the beginning of the file."""

148

149 self.line = None

150 """Current input line."""

151

152 self.line_offset = -1

153 """Current input line offset from beginning of `self.input_lines`."""

154

155 self.debug = debug

156 """Debugging mode on/off."""

157

158 self.initial_state = initial_state

159 """The name of the initial state (key to `self.states`)."""

160

161 self.current_state = initial_state

162 """The name of the current state (key to `self.states`)."""

163

164 self.states = {}

165 """Mapping of {state_name: State_object}."""

166

167 self.add_states(state_classes)

168

169 self.observers = []

170 """List of bound methods or functions to call whenever the current

171 line changes. Observers are called with one argument, ``self``.

172 Cleared at the end of `run()`."""

173

174 def unlink(self):

175 """Remove circular references to objects no longer required."""

176 for state in self.states.values():

177 state.unlink()

178 self.states = None

179

180 def run(self, input_lines, input_offset=0, context=None,

181 input_source=None, initial_state=None):

182 """

183 Run the state machine on `input_lines`. Return results (a list).

184

185 Reset `self.line_offset` and `self.current_state`. Run the

186 beginning-of-file transition. Input one line at a time and check for a

187 matching transition. If a match is found, call the transition method

188 and possibly change the state. Store the context returned by the

189 transition method to be passed on to the next transition matched.

190 Accumulate the results returned by the transition methods in a list.

191 Run the end-of-file transition. Finally, return the accumulated

192 results.

193

194 Parameters:

195

196 - `input_lines`: a list of strings without newlines, or `StringList`.

197 - `input_offset`: the line offset of `input_lines` from the beginning

198 of the file.

199 - `context`: application-specific storage.

200 - `input_source`: name or path of source of `input_lines`.

201 - `initial_state`: name of initial state.

202 """

203 self.runtime_init()

204 if isinstance(input_lines, StringList):

205 self.input_lines = input_lines

206 else:

207 self.input_lines = StringList(input_lines, source=input_source)

208 self.input_offset = input_offset

209 self.line_offset = -1

210 self.current_state = initial_state or self.initial_state

211 if self.debug:

212 print('\nStateMachine.run: input_lines (line_offset=%s):\n| %s'

213 % (self.line_offset, '\n| '.join(self.input_lines)),

214 file=sys.stderr)

215 transitions = None

216 results = []

217 state = self.get_state()

218 try:

219 if self.debug:

220 print('\nStateMachine.run: bof transition', file=sys.stderr)

221 context, result = state.bof(context)

222 results.extend(result)

223 while True:

224 try:

225 try:

226 self.next_line()

227 if self.debug:

228 source, offset = self.input_lines.info(

229 self.line_offset)

230 print(f'\nStateMachine.run: line '

231 f'(source={source!r}, offset={offset!r}):\n'

232 f'| {self.line}', file=sys.stderr)

233 context, next_state, result = self.check_line(

234 context, state, transitions)

235 except EOFError:

236 if self.debug:

237 print('\nStateMachine.run: %s.eof transition'

238 % state.__class__.__name__, file=sys.stderr)

239 result = state.eof(context)

240 results.extend(result)

241 break

242 else:

243 results.extend(result)

244 except TransitionCorrection as exception:

245 self.previous_line() # back up for another try

246 transitions = (exception.args[0],)

247 if self.debug:

248 print('\nStateMachine.run: TransitionCorrection to '

249 f'state "{state.__class__.__name__}", '

250 f'transition {transitions[0]}.',

251 file=sys.stderr)

252 continue

253 except StateCorrection as exception:

254 self.previous_line() # back up for another try

255 next_state = exception.args[0]

256 if len(exception.args) == 1:

257 transitions = None

258 else:

259 transitions = (exception.args[1],)

260 if self.debug:

261 print('\nStateMachine.run: StateCorrection to state '

262 f'"{next_state}", transition {transitions[0]}.',

263 file=sys.stderr)

264 else:

265 transitions = None

266 state = self.get_state(next_state)

267 except: # noqa catchall

268 if self.debug:

269 self.error()

270 raise

271 self.observers = []

272 return results

273

274 def get_state(self, next_state=None):

275 """

276 Return current state object; set it first if `next_state` given.

277

278 Parameter `next_state`: a string, the name of the next state.

279

280 Exception: `UnknownStateError` raised if `next_state` unknown.

281 """

282 if next_state:

283 if self.debug and next_state != self.current_state:

284 print('\nStateMachine.get_state: Changing state from '

285 '"%s" to "%s" (input line %s).'

286 % (self.current_state, next_state,

287 self.abs_line_number()), file=sys.stderr)

288 self.current_state = next_state

289 try:

290 return self.states[self.current_state]

291 except KeyError:

292 raise UnknownStateError(self.current_state)

293

294 def next_line(self, n=1):

295 """Load `self.line` with the `n`'th next line and return it."""

296 try:

297 try:

298 self.line_offset += n

299 self.line = self.input_lines[self.line_offset]

300 except IndexError:

301 self.line = None

302 raise EOFError

303 return self.line

304 finally:

305 self.notify_observers()

306

307 def is_next_line_blank(self):

308 """Return True if the next line is blank or non-existent."""

309 try:

310 return not self.input_lines[self.line_offset + 1].strip()

311 except IndexError:

312 return 1

313

314 def at_eof(self):

315 """Return 1 if the input is at or past end-of-file."""

316 return self.line_offset >= len(self.input_lines) - 1

317

318 def at_bof(self):

319 """Return 1 if the input is at or before beginning-of-file."""

320 return self.line_offset <= 0

321

322 def previous_line(self, n=1):

323 """Load `self.line` with the `n`'th previous line and return it."""

324 self.line_offset -= n

325 if self.line_offset < 0:

326 self.line = None

327 else:

328 self.line = self.input_lines[self.line_offset]

329 self.notify_observers()

330 return self.line

331

332 def goto_line(self, line_offset):

333 """Jump to absolute line offset `line_offset`, load and return it."""

334 try:

335 try:

336 self.line_offset = line_offset - self.input_offset

337 self.line = self.input_lines[self.line_offset]

338 except IndexError:

339 self.line = None

340 raise EOFError

341 return self.line

342 finally:

343 self.notify_observers()

344

345 def get_source(self, line_offset):

346 """Return source of line at absolute line offset `line_offset`."""

347 return self.input_lines.source(line_offset - self.input_offset)

348

349 def abs_line_offset(self):

350 """Return line offset of current line, from beginning of file."""

351 return self.line_offset + self.input_offset

352

353 def abs_line_number(self):

354 """Return line number of current line (counting from 1)."""

355 return self.line_offset + self.input_offset + 1

356

357 def get_source_and_line(self, lineno=None):

358 """Return (source, line) tuple for current or given line number.

359

360 Looks up the source and line number in the `self.input_lines`

361 StringList instance to count for included source files.

362

363 If the optional argument `lineno` is given, convert it from an

364 absolute line number to the corresponding (source, line) pair.

365 """

366 if lineno is None:

367 offset = self.line_offset

368 else:

369 offset = lineno - self.input_offset - 1

370 try:

371 src, srcoffset = self.input_lines.info(offset)

372 srcline = srcoffset + 1

373 except TypeError:

374 # line is None if index is "Just past the end"

375 src, srcline = self.get_source_and_line(offset + self.input_offset)

376 return src, srcline + 1

377 except IndexError: # `offset` is off the list

378 src, srcline = None, None

379 # raise AssertionError('cannot find line %d in %s lines' %

380 # (offset, len(self.input_lines)))

381 # # list(self.input_lines.lines())))

382 return src, srcline

383

384 def insert_input(self, input_lines, source):

385 self.input_lines.insert(self.line_offset + 1, '',

386 source='internal padding after '+source,

387 offset=len(input_lines))

388 self.input_lines.insert(self.line_offset + 1, '',

389 source='internal padding before '+source,

390 offset=-1)

391 self.input_lines.insert(self.line_offset + 2,

392 StringList(input_lines, source))

393

394 def get_text_block(self, flush_left=False):

395 """

396 Return a contiguous block of text.

397

398 If `flush_left` is true, raise `UnexpectedIndentationError` if an

399 indented line is encountered before the text block ends (with a blank

400 line).

401 """

402 try:

403 block = self.input_lines.get_text_block(self.line_offset,

404 flush_left)

405 self.next_line(len(block) - 1)

406 return block

407 except UnexpectedIndentationError as err:

408 block = err.args[0]

409 self.next_line(len(block) - 1) # advance to last line of block

410 raise

411

412 def check_line(self, context, state, transitions=None):

413 """

414 Examine one line of input for a transition match & execute its method.

415

416 Parameters:

417

418 - `context`: application-dependent storage.

419 - `state`: a `State` object, the current state.

420 - `transitions`: an optional ordered list of transition names to try,

421 instead of ``state.transition_order``.

422

423 Return the values returned by the transition method:

424

425 - context: possibly modified from the parameter `context`;

426 - next state name (`State` subclass name);

427 - the result output of the transition, a list.

428

429 When there is no match, ``state.no_match()`` is called and its return

430 value is returned.

431 """

432 if transitions is None:

433 transitions = state.transition_order

434 if self.debug:

435 print('\nStateMachine.check_line: state="%s", transitions=%r.'

436 % (state.__class__.__name__, transitions), file=sys.stderr)

437 for name in transitions:

438 pattern, method, next_state = state.transitions[name]

439 match = pattern.match(self.line)

440 if match:

441 if self.debug:

442 print('\nStateMachine.check_line: Matched transition '

443 f'"{name}" in state "{state.__class__.__name__}".',

444 file=sys.stderr)

445 return method(match, context, next_state)

446 else:

447 if self.debug:

448 print('\nStateMachine.check_line: No match in state "%s".'

449 % state.__class__.__name__, file=sys.stderr)

450 return state.no_match(context, transitions)

451

452 def add_state(self, state_class):

453 """

454 Initialize & add a `state_class` (`State` subclass) object.

455

456 Exception: `DuplicateStateError` raised if `state_class` was already

457 added.

458 """

459 statename = state_class.__name__

460 if statename in self.states:

461 raise DuplicateStateError(statename)

462 self.states[statename] = state_class(self, self.debug)

463

464 def add_states(self, state_classes):

465 """

466 Add `state_classes` (a list of `State` subclasses).

467 """

468 for state_class in state_classes:

469 self.add_state(state_class)

470

471 def runtime_init(self):

472 """

473 Initialize `self.states`.

474 """

475 for state in self.states.values():

476 state.runtime_init()

477

478 def error(self):

479 """Report error details."""

480 type, value, module, line, function = _exception_data()

481 print('%s: %s' % (type, value), file=sys.stderr)

482 print('input line %s' % (self.abs_line_number()), file=sys.stderr)

483 print('module %s, line %s, function %s' % (module, line, function),

484 file=sys.stderr)

485

486 def attach_observer(self, observer):

487 """

488 The `observer` parameter is a function or bound method which takes two

489 arguments, the source and offset of the current line.

490 """

491 self.observers.append(observer)

492

493 def detach_observer(self, observer):

494 self.observers.remove(observer)

495

496 def notify_observers(self):

497 for observer in self.observers:

498 try:

499 info = self.input_lines.info(self.line_offset)

500 except IndexError:

501 info = (None, None)

502 observer(*info)

503

504

505class State:

506

507 """

508 State superclass. Contains a list of transitions, and transition methods.

509

510 Transition methods all have the same signature. They take 3 parameters:

511

512 - An `re` match object. ``match.string`` contains the matched input line,

513 ``match.start()`` gives the start index of the match, and

514 ``match.end()`` gives the end index.

515 - A context object, whose meaning is application-defined (initial value

516 ``None``). It can be used to store any information required by the state

517 machine, and the returned context is passed on to the next transition

518 method unchanged.

519 - The name of the next state, a string, taken from the transitions list;

520 normally it is returned unchanged, but it may be altered by the

521 transition method if necessary.

522

523 Transition methods all return a 3-tuple:

524

525 - A context object, as (potentially) modified by the transition method.

526 - The next state name (a return value of ``None`` means no state change).

527 - The processing result, a list, which is accumulated by the state

528 machine.

529

530 Transition methods may raise an `EOFError` to cut processing short.

531

532 There are two implicit transitions, and corresponding transition methods

533 are defined: `bof()` handles the beginning-of-file, and `eof()` handles

534 the end-of-file. These methods have non-standard signatures and return

535 values. `bof()` returns the initial context and results, and may be used

536 to return a header string, or do any other processing needed. `eof()`

537 should handle any remaining context and wrap things up; it returns the

538 final processing result.

539

540 Typical applications need only subclass `State` (or a subclass), set the

541 `patterns` and `initial_transitions` class attributes, and provide

542 corresponding transition methods. The default object initialization will

543 take care of constructing the list of transitions.

544 """

545

546 patterns = None

547 """

548 {Name: pattern} mapping, used by `make_transition()`. Each pattern may

549 be a string or a compiled `re` pattern. Override in subclasses.

550 """

551

552 initial_transitions = None

553 """

554 A list of transitions to initialize when a `State` is instantiated.

555 Each entry is either a transition name string, or a (transition name, next

556 state name) pair. See `make_transitions()`. Override in subclasses.

557 """

558

559 nested_sm = None

560 """

561 The `StateMachine` class for handling nested processing.

562

563 If left as ``None``, `nested_sm` defaults to the class of the state's

564 controlling state machine. Override it in subclasses to avoid the default.

565 """

566

567 nested_sm_kwargs = None

568 """

569 Keyword arguments dictionary, passed to the `nested_sm` constructor.

570

571 Two keys must have entries in the dictionary:

572

573 - Key 'state_classes' must be set to a list of `State` classes.

574 - Key 'initial_state' must be set to the name of the initial state class.

575

576 If `nested_sm_kwargs` is left as ``None``, 'state_classes' defaults to the

577 class of the current state, and 'initial_state' defaults to the name of

578 the class of the current state. Override in subclasses to avoid the

579 defaults.

580 """

581

582 def __init__(self, state_machine, debug=False):

583 """

584 Initialize a `State` object; make & add initial transitions.

585

586 Parameters:

587

588 - `statemachine`: the controlling `StateMachine` object.

589 - `debug`: a boolean; produce verbose output if true.

590 """

591

592 self.transition_order = []

593 """A list of transition names in search order."""

594

595 self.transitions = {}

596 """

597 A mapping of transition names to 3-tuples containing

598 (compiled_pattern, transition_method, next_state_name). Initialized as

599 an instance attribute dynamically (instead of as a class attribute)

600 because it may make forward references to patterns and methods in this

601 or other classes.

602 """

603

604 self.add_initial_transitions()

605

606 self.state_machine = state_machine

607 """A reference to the controlling `StateMachine` object."""

608

609 self.debug = debug

610 """Debugging mode on/off."""

611

612 if self.nested_sm is None:

613 self.nested_sm = self.state_machine.__class__

614 if self.nested_sm_kwargs is None:

615 self.nested_sm_kwargs = {'state_classes': [self.__class__],

616 'initial_state': self.__class__.__name__}

617

618 def runtime_init(self):

619 """

620 Initialize this `State` before running the state machine; called from

621 `self.state_machine.run()`.

622 """

623 pass

624

625 def unlink(self):

626 """Remove circular references to objects no longer required."""

627 self.state_machine = None

628

629 def add_initial_transitions(self):

630 """Make and add transitions listed in `self.initial_transitions`."""

631 if self.initial_transitions:

632 names, transitions = self.make_transitions(

633 self.initial_transitions)

634 self.add_transitions(names, transitions)

635

636 def add_transitions(self, names, transitions):

637 """

638 Add a list of transitions to the start of the transition list.

639

640 Parameters:

641

642 - `names`: a list of transition names.

643 - `transitions`: a mapping of names to transition tuples.

644

645 Exceptions: `DuplicateTransitionError`, `UnknownTransitionError`.

646 """

647 for name in names:

648 if name in self.transitions:

649 raise DuplicateTransitionError(name)

650 if name not in transitions:

651 raise UnknownTransitionError(name)

652 self.transition_order[:0] = names

653 self.transitions.update(transitions)

654

655 def add_transition(self, name, transition):

656 """

657 Add a transition to the start of the transition list.

658

659 Parameter `transition`: a ready-made transition 3-tuple.

660

661 Exception: `DuplicateTransitionError`.

662 """

663 if name in self.transitions:

664 raise DuplicateTransitionError(name)

665 self.transition_order[:0] = [name]

666 self.transitions[name] = transition

667

668 def remove_transition(self, name):

669 """

670 Remove a transition by `name`.

671

672 Exception: `UnknownTransitionError`.

673 """

674 try:

675 del self.transitions[name]

676 self.transition_order.remove(name)

677 except: # noqa catchall

678 raise UnknownTransitionError(name)

679

680 def make_transition(self, name, next_state=None):

681 """

682 Make & return a transition tuple based on `name`.

683

684 This is a convenience function to simplify transition creation.

685

686 Parameters:

687

688 - `name`: a string, the name of the transition pattern & method. This

689 `State` object must have a method called '`name`', and a dictionary

690 `self.patterns` containing a key '`name`'.

691 - `next_state`: a string, the name of the next `State` object for this

692 transition. A value of ``None`` (or absent) implies no state change

693 (i.e., continue with the same state).

694

695 Exceptions: `TransitionPatternNotFound`, `TransitionMethodNotFound`.

696 """

697 if next_state is None:

698 next_state = self.__class__.__name__

699 try:

700 pattern = self.patterns[name]

701 if not hasattr(pattern, 'match'):

702 pattern = self.patterns[name] = re.compile(pattern)

703 except KeyError:

704 raise TransitionPatternNotFound(

705 '%s.patterns[%r]' % (self.__class__.__name__, name))

706 try:

707 method = getattr(self, name)

708 except AttributeError:

709 raise TransitionMethodNotFound(

710 '%s.%s' % (self.__class__.__name__, name))

711 return pattern, method, next_state

712

713 def make_transitions(self, name_list):

714 """

715 Return a list of transition names and a transition mapping.

716

717 Parameter `name_list`: a list, where each entry is either a transition

718 name string, or a 1- or 2-tuple (transition name, optional next state

719 name).

720 """

721 names = []

722 transitions = {}

723 for namestate in name_list:

724 if isinstance(namestate, str):

725 transitions[namestate] = self.make_transition(namestate)

726 names.append(namestate)

727 else:

728 transitions[namestate[0]] = self.make_transition(*namestate)

729 names.append(namestate[0])

730 return names, transitions

731

732 def no_match(self, context, transitions):

733 """

734 Called when there is no match from `StateMachine.check_line()`.

735

736 Return the same values returned by transition methods:

737

738 - context: unchanged;

739 - next state name: ``None``;

740 - empty result list.

741

742 Override in subclasses to catch this event.

743 """

744 return context, None, []

745

746 def bof(self, context):

747 """

748 Handle beginning-of-file. Return unchanged `context`, empty result.

749

750 Override in subclasses.

751

752 Parameter `context`: application-defined storage.

753 """

754 return context, []

755

756 def eof(self, context):

757 """

758 Handle end-of-file. Return empty result.

759

760 Override in subclasses.

761

762 Parameter `context`: application-defined storage.

763 """

764 return []

765

766 def nop(self, match, context, next_state):

767 """

768 A "do nothing" transition method.

769

770 Return unchanged `context` & `next_state`, empty result. Useful for

771 simple state changes (actionless transitions).

772 """

773 return context, next_state, []

774

775

776class StateMachineWS(StateMachine):

777

778 """

779 `StateMachine` subclass specialized for whitespace recognition.

780

781 There are three methods provided for extracting indented text blocks:

782

783 - `get_indented()`: use when the indent is unknown.

784 - `get_known_indented()`: use when the indent is known for all lines.

785 - `get_first_known_indented()`: use when only the first line's indent is

786 known.

787 """

788

789 def get_indented(self, until_blank=False, strip_indent=True):

790 """

791 Return a block of indented lines of text, and info.

792

793 Extract an indented block where the indent is unknown for all lines.

794

795 :Parameters:

796 - `until_blank`: Stop collecting at the first blank line if true.

797 - `strip_indent`: Strip common leading indent if true (default).

798

799 :Return:

800 - the indented block (a list of lines of text),

801 - its indent,

802 - its first line offset from BOF, and

803 - whether or not it finished with a blank line.

804 """

805 offset = self.abs_line_offset()

806 indented, indent, blank_finish = self.input_lines.get_indented(

807 self.line_offset, until_blank, strip_indent)

808 if indented:

809 self.next_line(len(indented) - 1) # advance to last indented line

810 while indented and not indented[0].strip():

811 indented.trim_start()

812 offset += 1

813 return indented, indent, offset, blank_finish

814

815 def get_known_indented(self, indent, until_blank=False, strip_indent=True):

816 """

817 Return an indented block and info.

818

819 Extract an indented block where the indent is known for all lines.

820 Starting with the current line, extract the entire text block with at

821 least `indent` indentation (which must be whitespace, except for the

822 first line).

823

824 :Parameters:

825 - `indent`: The number of indent columns/characters.

826 - `until_blank`: Stop collecting at the first blank line if true.

827 - `strip_indent`: Strip `indent` characters of indentation if true

828 (default).

829

830 :Return:

831 - the indented block,

832 - its first line offset from BOF, and

833 - whether or not it finished with a blank line.

834 """

835 offset = self.abs_line_offset()

836 indented, indent, blank_finish = self.input_lines.get_indented(

837 self.line_offset, until_blank, strip_indent,

838 block_indent=indent)

839 self.next_line(len(indented) - 1) # advance to last indented line

840 while indented and not indented[0].strip():

841 indented.trim_start()

842 offset += 1

843 return indented, offset, blank_finish

844

845 def get_first_known_indented(self, indent, until_blank=False,

846 strip_indent=True, strip_top=True):

847 """

848 Return an indented block and info.

849

850 Extract an indented block where the indent is known for the first line

851 and unknown for all other lines.

852

853 :Parameters:

854 - `indent`: The first line's indent (# of columns/characters).

855 - `until_blank`: Stop collecting at the first blank line if true

856 (1).

857 - `strip_indent`: Strip `indent` characters of indentation if true

858 (1, default).

859 - `strip_top`: Strip blank lines from the beginning of the block.

860

861 :Return:

862 - the indented block,

863 - its indent,

864 - its first line offset from BOF, and

865 - whether or not it finished with a blank line.

866 """

867 offset = self.abs_line_offset()

868 indented, indent, blank_finish = self.input_lines.get_indented(

869 self.line_offset, until_blank, strip_indent,

870 first_indent=indent)

871 self.next_line(len(indented) - 1) # advance to last indented line

872 if strip_top:

873 while indented and not indented[0].strip():

874 indented.trim_start()

875 offset += 1

876 return indented, indent, offset, blank_finish

877

878

879class StateWS(State):

880

881 """

882 State superclass specialized for whitespace (blank lines & indents).

883

884 Use this class with `StateMachineWS`. The transitions 'blank' (for blank

885 lines) and 'indent' (for indented text blocks) are added automatically,

886 before any other transitions. The transition method `blank()` handles

887 blank lines and `indent()` handles nested indented blocks. Indented

888 blocks trigger a new state machine to be created by `indent()` and run.

889 The class of the state machine to be created is in `indent_sm`, and the

890 constructor keyword arguments are in the dictionary `indent_sm_kwargs`.

891

892 The methods `known_indent()` and `firstknown_indent()` are provided for

893 indented blocks where the indent (all lines' and first line's only,

894 respectively) is known to the transition method, along with the attributes

895 `known_indent_sm` and `known_indent_sm_kwargs`. Neither transition method

896 is triggered automatically.

897 """

898

899 indent_sm = None

900 """

901 The `StateMachine` class handling indented text blocks.

902

903 If left as ``None``, `indent_sm` defaults to the value of

904 `State.nested_sm`. Override it in subclasses to avoid the default.

905 """

906

907 indent_sm_kwargs = None

908 """

909 Keyword arguments dictionary, passed to the `indent_sm` constructor.

910

911 If left as ``None``, `indent_sm_kwargs` defaults to the value of

912 `State.nested_sm_kwargs`. Override it in subclasses to avoid the default.

913 """

914

915 known_indent_sm = None

916 """

917 The `StateMachine` class handling known-indented text blocks.

918

919 If left as ``None``, `known_indent_sm` defaults to the value of

920 `indent_sm`. Override it in subclasses to avoid the default.

921 """

922

923 known_indent_sm_kwargs = None

924 """

925 Keyword arguments dictionary, passed to the `known_indent_sm` constructor.

926

927 If left as ``None``, `known_indent_sm_kwargs` defaults to the value of

928 `indent_sm_kwargs`. Override it in subclasses to avoid the default.

929 """

930

931 ws_patterns = {'blank': re.compile(' *$'),

932 'indent': re.compile(' +')}

933 """Patterns for default whitespace transitions. May be overridden in

934 subclasses."""

935

936 ws_initial_transitions = ('blank', 'indent')

937 """Default initial whitespace transitions, added before those listed in

938 `State.initial_transitions`. May be overridden in subclasses."""

939

940 def __init__(self, state_machine, debug=False):

941 """

942 Initialize a `StateSM` object; extends `State.__init__()`.

943

944 Check for indent state machine attributes, set defaults if not set.

945 """

946 State.__init__(self, state_machine, debug)

947 if self.indent_sm is None:

948 self.indent_sm = self.nested_sm

949 if self.indent_sm_kwargs is None:

950 self.indent_sm_kwargs = self.nested_sm_kwargs

951 if self.known_indent_sm is None:

952 self.known_indent_sm = self.indent_sm

953 if self.known_indent_sm_kwargs is None:

954 self.known_indent_sm_kwargs = self.indent_sm_kwargs

955

956 def add_initial_transitions(self):

957 """

958 Add whitespace-specific transitions before those defined in subclass.

959

960 Extends `State.add_initial_transitions()`.

961 """

962 State.add_initial_transitions(self)

963 if self.patterns is None:

964 self.patterns = {}

965 self.patterns.update(self.ws_patterns)

966 names, transitions = self.make_transitions(

967 self.ws_initial_transitions)

968 self.add_transitions(names, transitions)

969

970 def blank(self, match, context, next_state):

971 """Handle blank lines. Does nothing. Override in subclasses."""

972 return self.nop(match, context, next_state)

973

974 def indent(self, match, context, next_state):

975 """

976 Handle an indented text block. Extend or override in subclasses.

977

978 Recursively run the registered state machine for indented blocks

979 (`self.indent_sm`).

980 """

981 (indented, indent, line_offset, blank_finish

982 ) = self.state_machine.get_indented()

983 sm = self.indent_sm(debug=self.debug, **self.indent_sm_kwargs)

984 results = sm.run(indented, input_offset=line_offset)

985 return context, next_state, results

986

987 def known_indent(self, match, context, next_state):

988 """

989 Handle a known-indent text block. Extend or override in subclasses.

990

991 Recursively run the registered state machine for known-indent indented

992 blocks (`self.known_indent_sm`). The indent is the length of the

993 match, ``match.end()``.

994 """

995 (indented, line_offset, blank_finish

996 ) = self.state_machine.get_known_indented(match.end())

997 sm = self.known_indent_sm(debug=self.debug,

998 **self.known_indent_sm_kwargs)

999 results = sm.run(indented, input_offset=line_offset)

1000 return context, next_state, results

1001

1002 def first_known_indent(self, match, context, next_state):

1003 """

1004 Handle an indented text block (first line's indent known).

1005

1006 Extend or override in subclasses.

1007

1008 Recursively run the registered state machine for known-indent indented

1009 blocks (`self.known_indent_sm`). The indent is the length of the

1010 match, ``match.end()``.

1011 """

1012 (indented, line_offset, blank_finish

1013 ) = self.state_machine.get_first_known_indented(match.end())

1014 sm = self.known_indent_sm(debug=self.debug,

1015 **self.known_indent_sm_kwargs)

1016 results = sm.run(indented, input_offset=line_offset)

1017 return context, next_state, results

1018

1019

1020class _SearchOverride:

1021

1022 """

1023 Mix-in class to override `StateMachine` regular expression behavior.

1024

1025 Changes regular expression matching, from the default `re.match()`

1026 (succeeds only if the pattern matches at the start of `self.line`) to

1027 `re.search()` (succeeds if the pattern matches anywhere in `self.line`).

1028 When subclassing a `StateMachine`, list this class **first** in the

1029 inheritance list of the class definition.

1030 """

1031

1032 def match(self, pattern):

1033 """

1034 Return the result of a regular expression search.

1035

1036 Overrides `StateMachine.match()`.

1037

1038 Parameter `pattern`: `re` compiled regular expression.

1039 """

1040 return pattern.search(self.line)

1041

1042

1043class SearchStateMachine(_SearchOverride, StateMachine):

1044 """`StateMachine` which uses `re.search()` instead of `re.match()`."""

1045 pass

1046

1047

1048class SearchStateMachineWS(_SearchOverride, StateMachineWS):

1049 """`StateMachineWS` which uses `re.search()` instead of `re.match()`."""

1050 pass

1051

1052

1053class ViewList:

1054

1055 """

1056 List with extended functionality: slices of ViewList objects are child

1057 lists, linked to their parents. Changes made to a child list also affect

1058 the parent list. A child list is effectively a "view" (in the SQL sense)

1059 of the parent list. Changes to parent lists, however, do *not* affect

1060 active child lists. If a parent list is changed, any active child lists

1061 should be recreated.

1062

1063 The start and end of the slice can be trimmed using the `trim_start()` and

1064 `trim_end()` methods, without affecting the parent list. The link between

1065 child and parent lists can be broken by calling `disconnect()` on the

1066 child list.

1067

1068 Also, ViewList objects keep track of the source & offset of each item.

1069 This information is accessible via the `source()`, `offset()`, and

1070 `info()` methods.

1071 """

1072

1073 def __init__(self, initlist=None, source=None, items=None,

1074 parent=None, parent_offset=None):

1075 self.data = []

1076 """The actual list of data, flattened from various sources."""

1077

1078 self.items = []

1079 """A list of (source, offset) pairs, same length as `self.data`: the

1080 source of each line and the offset of each line from the beginning of

1081 its source."""

1082

1083 self.parent = parent

1084 """The parent list."""

1085

1086 self.parent_offset = parent_offset

1087 """Offset of this list from the beginning of the parent list."""

1088

1089 if isinstance(initlist, ViewList):

1090 self.data = initlist.data[:]

1091 self.items = initlist.items[:]

1092 elif initlist is not None:

1093 self.data = list(initlist)

1094 if items:

1095 self.items = items

1096 else:

1097 self.items = [(source, i) for i in range(len(initlist))]

1098 assert len(self.data) == len(self.items), 'data mismatch'

1099

1100 def __str__(self):

1101 return str(self.data)

1102

1103 def __repr__(self):

1104 return f'{self.__class__.__name__}({self.data}, items={self.items})'

1105

1106 def __lt__(self, other): return self.data < self.__cast(other) # noqa

1107 def __le__(self, other): return self.data <= self.__cast(other) # noqa

1108 def __eq__(self, other): return self.data == self.__cast(other) # noqa

1109 def __ne__(self, other): return self.data != self.__cast(other) # noqa

1110 def __gt__(self, other): return self.data > self.__cast(other) # noqa

1111 def __ge__(self, other): return self.data >= self.__cast(other) # noqa

1112

1113 def __cast(self, other):

1114 if isinstance(other, ViewList):

1115 return other.data

1116 else:

1117 return other

1118

1119 def __contains__(self, item):

1120 return item in self.data

1121

1122 def __len__(self):

1123 return len(self.data)

1124

1125 # The __getitem__()/__setitem__() methods check whether the index

1126 # is a slice first, since indexing a native list with a slice object

1127 # just works.

1128

1129 def __getitem__(self, i):

1130 if isinstance(i, slice):

1131 assert i.step in (None, 1), 'cannot handle slice with stride'

1132 return self.__class__(self.data[i.start:i.stop],

1133 items=self.items[i.start:i.stop],

1134 parent=self, parent_offset=i.start or 0)

1135 else:

1136 return self.data[i]

1137

1138 def __setitem__(self, i, item):

1139 if isinstance(i, slice):

1140 assert i.step in (None, 1), 'cannot handle slice with stride'

1141 if not isinstance(item, ViewList):

1142 raise TypeError('assigning non-ViewList to ViewList slice')

1143 self.data[i.start:i.stop] = item.data

1144 self.items[i.start:i.stop] = item.items

1145 assert len(self.data) == len(self.items), 'data mismatch'

1146 if self.parent:

1147 k = (i.start or 0) + self.parent_offset

1148 n = (i.stop or len(self)) + self.parent_offset

1149 self.parent[k:n] = item

1150 else:

1151 self.data[i] = item

1152 if self.parent:

1153 self.parent[i + self.parent_offset] = item

1154

1155 def __delitem__(self, i):

1156 try:

1157 del self.data[i]

1158 del self.items[i]

1159 if self.parent:

1160 del self.parent[i + self.parent_offset]

1161 except TypeError:

1162 assert i.step is None, 'cannot handle slice with stride'

1163 del self.data[i.start:i.stop]

1164 del self.items[i.start:i.stop]

1165 if self.parent:

1166 k = (i.start or 0) + self.parent_offset

1167 n = (i.stop or len(self)) + self.parent_offset

1168 del self.parent[k:n]

1169

1170 def __add__(self, other):

1171 if isinstance(other, ViewList):

1172 return self.__class__(self.data + other.data,

1173 items=(self.items + other.items))

1174 else:

1175 raise TypeError('adding non-ViewList to a ViewList')

1176

1177 def __radd__(self, other):

1178 if isinstance(other, ViewList):

1179 return self.__class__(other.data + self.data,

1180 items=(other.items + self.items))

1181 else:

1182 raise TypeError('adding ViewList to a non-ViewList')

1183

1184 def __iadd__(self, other):

1185 if isinstance(other, ViewList):

1186 self.data += other.data

1187 else:

1188 raise TypeError('argument to += must be a ViewList')

1189 return self

1190

1191 def __mul__(self, n):

1192 return self.__class__(self.data * n, items=(self.items * n))

1193

1194 __rmul__ = __mul__

1195

1196 def __imul__(self, n):

1197 self.data *= n

1198 self.items *= n

1199 return self

1200

1201 def extend(self, other):

1202 if not isinstance(other, ViewList):

1203 raise TypeError('extending a ViewList with a non-ViewList')

1204 if self.parent:

1205 self.parent.insert(len(self.data) + self.parent_offset, other)

1206 self.data.extend(other.data)

1207 self.items.extend(other.items)

1208

1209 def append(self, item, source=None, offset=0):

1210 if source is None:

1211 self.extend(item)

1212 else:

1213 if self.parent:

1214 self.parent.insert(len(self.data) + self.parent_offset, item,

1215 source, offset)

1216 self.data.append(item)

1217 self.items.append((source, offset))

1218

1219 def insert(self, i, item, source=None, offset=0):

1220 if source is None:

1221 if not isinstance(item, ViewList):

1222 raise TypeError('inserting non-ViewList with no source given')

1223 self.data[i:i] = item.data

1224 self.items[i:i] = item.items

1225 if self.parent:

1226 index = (len(self.data) + i) % len(self.data)

1227 self.parent.insert(index + self.parent_offset, item)

1228 else:

1229 self.data.insert(i, item)

1230 self.items.insert(i, (source, offset))

1231 if self.parent:

1232 index = (len(self.data) + i) % len(self.data)

1233 self.parent.insert(index + self.parent_offset, item,

1234 source, offset)

1235

1236 def pop(self, i=-1):

1237 if self.parent:

1238 index = (len(self.data) + i) % len(self.data)

1239 self.parent.pop(index + self.parent_offset)

1240 self.items.pop(i)

1241 return self.data.pop(i)

1242

1243 def trim_start(self, n=1):

1244 """

1245 Remove items from the start of the list, without touching the parent.

1246 """

1247 if n > len(self.data):

1248 raise IndexError("Size of trim too large; can't trim %s items "

1249 "from a list of size %s." % (n, len(self.data)))

1250 elif n < 0:

1251 raise IndexError('Trim size must be >= 0.')

1252 del self.data[:n]

1253 del self.items[:n]

1254 if self.parent:

1255 self.parent_offset += n

1256

1257 def trim_end(self, n=1):

1258 """

1259 Remove items from the end of the list, without touching the parent.

1260 """

1261 if n > len(self.data):

1262 raise IndexError("Size of trim too large; can't trim %s items "

1263 "from a list of size %s." % (n, len(self.data)))

1264 elif n < 0:

1265 raise IndexError('Trim size must be >= 0.')

1266 del self.data[-n:]

1267 del self.items[-n:]

1268

1269 def remove(self, item):

1270 index = self.index(item)

1271 del self[index]

1272

1273 def count(self, item):

1274 return self.data.count(item)

1275

1276 def index(self, item):

1277 return self.data.index(item)

1278

1279 def reverse(self):

1280 self.data.reverse()

1281 self.items.reverse()

1282 self.parent = None

1283

1284 def sort(self, *args):

1285 tmp = sorted(zip(self.data, self.items), *args)

1286 self.data = [entry[0] for entry in tmp]

1287 self.items = [entry[1] for entry in tmp]

1288 self.parent = None

1289

1290 def info(self, i):

1291 """Return source & offset for index `i`."""

1292 try:

1293 return self.items[i]

1294 except IndexError:

1295 if i == len(self.data): # Just past the end

1296 return self.items[i - 1][0], None

1297 else:

1298 raise

1299

1300 def source(self, i):

1301 """Return source for index `i`."""

1302 return self.info(i)[0]

1303

1304 def offset(self, i):

1305 """Return offset for index `i`."""

1306 return self.info(i)[1]

1307

1308 def disconnect(self):

1309 """Break link between this list and parent list."""

1310 self.parent = None

1311

1312 def xitems(self):

1313 """Return iterator yielding (source, offset, value) tuples."""

1314 for (value, (source, offset)) in zip(self.data, self.items):

1315 yield source, offset, value

1316

1317 def pprint(self):

1318 """Print the list in `grep` format (`source:offset:value` lines)"""

1319 for line in self.xitems():

1320 print("%s:%d:%s" % line)

1321

1322

1323class StringList(ViewList):

1324

1325 """A `ViewList` with string-specific methods."""

1326

1327 def trim_left(self, length, start=0, end=sys.maxsize):

1328 """

1329 Trim `length` characters off the beginning of each item, in-place,

1330 from index `start` to `end`. No whitespace-checking is done on the

1331 trimmed text. Does not affect slice parent.

1332 """

1333 self.data[start:end] = [line[length:]

1334 for line in self.data[start:end]]

1335

1336 def get_text_block(self, start, flush_left=False):

1337 """

1338 Return a contiguous block of text.

1339

1340 If `flush_left` is true, raise `UnexpectedIndentationError` if an

1341 indented line is encountered before the text block ends (with a blank

1342 line).

1343 """

1344 end = start

1345 last = len(self.data)

1346 while end < last:

1347 line = self.data[end]

1348 if not line.strip():

1349 break

1350 if flush_left and (line[0] == ' '):

1351 source, offset = self.info(end)

1352 raise UnexpectedIndentationError(self[start:end], source,

1353 offset + 1)

1354 end += 1

1355 return self[start:end]

1356

1357 def get_indented(self, start=0, until_blank=False, strip_indent=True,

1358 block_indent=None, first_indent=None):

1359 """

1360 Extract and return a StringList of indented lines of text.

1361

1362 Collect all lines with indentation, determine the minimum indentation,

1363 remove the minimum indentation from all indented lines (unless

1364 `strip_indent` is false), and return them. All lines up to but not

1365 including the first unindented line will be returned.

1366

1367 :Parameters:

1368 - `start`: The index of the first line to examine.

1369 - `until_blank`: Stop collecting at the first blank line if true.

1370 - `strip_indent`: Strip common leading indent if true (default).

1371 - `block_indent`: The indent of the entire block, if known.

1372 - `first_indent`: The indent of the first line, if known.

1373

1374 :Return:

1375 - a StringList of indented lines with minimum indent removed;

1376 - the amount of the indent;

1377 - a boolean: did the indented block finish with a blank line or EOF?

1378 """

1379 indent = block_indent # start with None if unknown

1380 end = start

1381 if block_indent is not None and first_indent is None:

1382 first_indent = block_indent

1383 if first_indent is not None:

1384 end += 1

1385 last = len(self.data)

1386 while end < last:

1387 line = self.data[end]

1388 if line and (line[0] != ' '

1389 or (block_indent is not None

1390 and line[:block_indent].strip())):

1391 # Line not indented or insufficiently indented.

1392 # Block finished properly iff the last indented line blank:

1393 blank_finish = ((end > start)

1394 and not self.data[end - 1].strip())

1395 break

1396 stripped = line.lstrip()

1397 if not stripped: # blank line

1398 if until_blank:

1399 blank_finish = 1

1400 break

1401 elif block_indent is None:

1402 line_indent = len(line) - len(stripped)

1403 if indent is None:

1404 indent = line_indent

1405 else:

1406 indent = min(indent, line_indent)

1407 end += 1

1408 else:

1409 blank_finish = 1 # block ends at end of lines

1410 block = self[start:end]

1411 if first_indent is not None and block:

1412 block.data[0] = block.data[0][first_indent:]

1413 if indent and strip_indent:

1414 block.trim_left(indent, start=(first_indent is not None))

1415 return block, indent or 0, blank_finish

1416

1417 def get_2D_block(self, top, left, bottom, right, strip_indent=True):

1418 block = self[top:bottom]

1419 indent = right

1420 for i in range(len(block.data)):

1421 # get slice from line, care for combining characters

1422 ci = utils.column_indices(block.data[i])

1423 try:

1424 left = ci[left]

1425 except IndexError:

1426 left += len(block.data[i]) - len(ci)

1427 try:

1428 right = ci[right]

1429 except IndexError:

1430 right += len(block.data[i]) - len(ci)

1431 block.data[i] = line = block.data[i][left:right].rstrip()

1432 if line:

1433 indent = min(indent, len(line) - len(line.lstrip()))

1434 if strip_indent and 0 < indent < right:

1435 block.data = [line[indent:] for line in block.data]

1436 return block

1437

1438 def pad_double_width(self, pad_char):

1439 """Pad all double-width characters in `self` appending `pad_char`.

1440

1441 For East Asian language support.

1442 """

1443 for i in range(len(self.data)):

1444 line = self.data[i]

1445 if isinstance(line, str):

1446 new = []

1447 for char in line:

1448 new.append(char)

1449 if east_asian_width(char) in 'WF': # Wide & Full-width

1450 new.append(pad_char)

1451 self.data[i] = ''.join(new)

1452

1453 def replace(self, old, new):

1454 """Replace all occurrences of substring `old` with `new`."""

1455 for i in range(len(self.data)):

1456 self.data[i] = self.data[i].replace(old, new)

1457

1458

1459class StateMachineError(Exception): pass

1460class UnknownStateError(StateMachineError): pass

1461class DuplicateStateError(StateMachineError): pass

1462class UnknownTransitionError(StateMachineError): pass

1463class DuplicateTransitionError(StateMachineError): pass

1464class TransitionPatternNotFound(StateMachineError): pass

1465class TransitionMethodNotFound(StateMachineError): pass

1466class UnexpectedIndentationError(StateMachineError): pass

1467

1468

1469class TransitionCorrection(Exception):

1470

1471 """

1472 Raise from within a transition method to switch to another transition.

1473

1474 Raise with one argument, the new transition name.

1475 """

1476

1477

1478class StateCorrection(Exception):

1479

1480 """

1481 Raise from within a transition method to switch to another state.

1482

1483 Raise with one or two arguments: new state name, and an optional new

1484 transition name.

1485 """

1486

1487

1488def string2lines(astring, tab_width=8, convert_whitespace=False,

1489 whitespace=re.compile('[\v\f]')):

1490 """

1491 Return a list of one-line strings with tabs expanded, no newlines, and

1492 trailing whitespace stripped.

1493

1494 Each tab is expanded with between 1 and `tab_width` spaces, so that the

1495 next character's index becomes a multiple of `tab_width` (8 by default).

1496

1497 Parameters:

1498

1499 - `astring`: a multi-line string.

1500 - `tab_width`: the number of columns between tab stops.

1501 - `convert_whitespace`: convert form feeds and vertical tabs to spaces?

1502 - `whitespace`: pattern object with the to-be-converted

1503 whitespace characters (default [\\v\\f]).

1504 """

1505 if convert_whitespace:

1506 astring = whitespace.sub(' ', astring)

1507 return [s.expandtabs(tab_width).rstrip() for s in astring.splitlines()]

1508

1509

1510def _exception_data():

1511 """

1512 Return exception information:

1513

1514 - the exception's class name;

1515 - the exception object;

1516 - the name of the file containing the offending code;

1517 - the line number of the offending code;

1518 - the function name of the offending code.

1519 """

1520 type, value, traceback = sys.exc_info()

1521 while traceback.tb_next:

1522 traceback = traceback.tb_next

1523 code = traceback.tb_frame.f_code

1524 return (type.__name__, value, code.co_filename, traceback.tb_lineno,

1525 code.co_name)