Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/blib2to3/pgen2/grammar.py: 70%

2# Licensed to PSF under a Contributor Agreement.

4"""This module defines the data structures used to represent a grammar.

6These are a bit arcane because they are derived from the data

7structures used by Python's 'pgen' parser generator.

9There's also a table here mapping operators to their names in the

10token module; the Python tokenize module reports all operators as the

11fallback token code OP, but the parser needs the actual token code.

13"""

15# Python imports

16import os

17import pickle

18import tempfile

19from typing import Any, Dict, List, Optional, Text, Tuple, TypeVar, Union

21# Local imports

22from . import token

24_P = TypeVar("_P", bound="Grammar")

25Label = Tuple[int, Optional[Text]]

26DFA = List[List[Tuple[int, int]]]

27DFAS = Tuple[DFA, Dict[int, int]]

28Path = Union[str, "os.PathLike[str]"]

31class Grammar(object):

32 """Pgen parsing tables conversion class.

34 Once initialized, this class supplies the grammar tables for the

35 parsing engine implemented by parse.py. The parsing engine

36 accesses the instance variables directly. The class here does not

37 provide initialization of the tables; several subclasses exist to

38 do this (see the conv and pgen modules).

40 The load() method reads the tables from a pickle file, which is

41 much faster than the other ways offered by subclasses. The pickle

42 file is written by calling dump() (after loading the grammar

43 tables using a subclass). The report() method prints a readable

44 representation of the tables to stdout, for debugging.

46 The instance variables are as follows:

48 symbol2number -- a dict mapping symbol names to numbers. Symbol

49 numbers are always 256 or higher, to distinguish

50 them from token numbers, which are between 0 and

51 255 (inclusive).

53 number2symbol -- a dict mapping numbers to symbol names;

54 these two are each other's inverse.

56 states -- a list of DFAs, where each DFA is a list of

57 states, each state is a list of arcs, and each

58 arc is a (i, j) pair where i is a label and j is

59 a state number. The DFA number is the index into

60 this list. (This name is slightly confusing.)

61 Final states are represented by a special arc of

62 the form (0, j) where j is its own state number.

64 dfas -- a dict mapping symbol numbers to (DFA, first)

65 pairs, where DFA is an item from the states list

66 above, and first is a set of tokens that can

67 begin this grammar rule (represented by a dict

68 whose values are always 1).

70 labels -- a list of (x, y) pairs where x is either a token

71 number or a symbol number, and y is either None

72 or a string; the strings are keywords. The label

73 number is the index in this list; label numbers

74 are used to mark state transitions (arcs) in the

75 DFAs.

77 start -- the number of the grammar's start symbol.

79 keywords -- a dict mapping keyword strings to arc labels.

81 tokens -- a dict mapping token numbers to arc labels.

83 """

85 def __init__(self) -> None:

86 self.symbol2number: Dict[str, int] = {}

87 self.number2symbol: Dict[int, str] = {}

88 self.states: List[DFA] = []

89 self.dfas: Dict[int, DFAS] = {}

90 self.labels: List[Label] = [(0, "EMPTY")]

91 self.keywords: Dict[str, int] = {}

92 self.soft_keywords: Dict[str, int] = {}

93 self.tokens: Dict[int, int] = {}

94 self.symbol2label: Dict[str, int] = {}

95 self.version: Tuple[int, int] = (0, 0)

96 self.start = 256

97 # Python 3.7+ parses async as a keyword, not an identifier

98 self.async_keywords = False

100 def dump(self, filename: Path) -> None:

101 """Dump the grammar tables to a pickle file."""

102

103 # mypyc generates objects that don't have a __dict__, but they

104 # do have __getstate__ methods that will return an equivalent

105 # dictionary

106 if hasattr(self, "__dict__"):

107 d = self.__dict__

108 else:

109 d = self.__getstate__() # type: ignore

110

111 with tempfile.NamedTemporaryFile(

112 dir=os.path.dirname(filename), delete=False

113 ) as f:

114 pickle.dump(d, f, pickle.HIGHEST_PROTOCOL)

115 os.replace(f.name, filename)

116

117 def _update(self, attrs: Dict[str, Any]) -> None:

118 for k, v in attrs.items():

119 setattr(self, k, v)

120

121 def load(self, filename: Path) -> None:

122 """Load the grammar tables from a pickle file."""

123 with open(filename, "rb") as f:

124 d = pickle.load(f)

125 self._update(d)

126

127 def loads(self, pkl: bytes) -> None:

128 """Load the grammar tables from a pickle bytes object."""

129 self._update(pickle.loads(pkl))

130

131 def copy(self: _P) -> _P:

132 """

133 Copy the grammar.

134 """

135 new = self.__class__()

136 for dict_attr in (

137 "symbol2number",

138 "number2symbol",

139 "dfas",

140 "keywords",

141 "soft_keywords",

142 "tokens",

143 "symbol2label",

144 ):

145 setattr(new, dict_attr, getattr(self, dict_attr).copy())

146 new.labels = self.labels[:]

147 new.states = self.states[:]

148 new.start = self.start

149 new.version = self.version

150 new.async_keywords = self.async_keywords

151 return new

152

153 def report(self) -> None:

154 """Dump the grammar tables to standard output, for debugging."""

155 from pprint import pprint

156

157 print("s2n")

158 pprint(self.symbol2number)

159 print("n2s")

160 pprint(self.number2symbol)

161 print("states")

162 pprint(self.states)

163 print("dfas")

164 pprint(self.dfas)

165 print("labels")

166 pprint(self.labels)

167 print("start", self.start)

168

169

170# Map from operator to number (since tokenize doesn't do this)

171

172opmap_raw = """

173( LPAR

174) RPAR

175[ LSQB

176] RSQB

177: COLON

178, COMMA

179; SEMI

180+ PLUS

181- MINUS

182* STAR

183/ SLASH

184| VBAR

185& AMPER

186< LESS

187> GREATER

188= EQUAL

189. DOT

190% PERCENT

191` BACKQUOTE

192{ LBRACE

193} RBRACE

194@ AT

195@= ATEQUAL

196== EQEQUAL

197!= NOTEQUAL

198<> NOTEQUAL

199<= LESSEQUAL

200>= GREATEREQUAL

201~ TILDE

202^ CIRCUMFLEX

203<< LEFTSHIFT

204>> RIGHTSHIFT

205** DOUBLESTAR

206+= PLUSEQUAL

207-= MINEQUAL

208*= STAREQUAL

209/= SLASHEQUAL

210%= PERCENTEQUAL

211&= AMPEREQUAL

212|= VBAREQUAL

213^= CIRCUMFLEXEQUAL

214<<= LEFTSHIFTEQUAL

215>>= RIGHTSHIFTEQUAL

216**= DOUBLESTAREQUAL

217// DOUBLESLASH

218//= DOUBLESLASHEQUAL

219-> RARROW

220:= COLONEQUAL

221"""

222

223opmap = {}

224for line in opmap_raw.splitlines():

225 if line:

226 op, name = line.split()

227 opmap[op] = getattr(token, name)