Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/blib2to3/pgen2/grammar.py: 70%

70 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:15 +0000

1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. 

2# Licensed to PSF under a Contributor Agreement. 

3 

4"""This module defines the data structures used to represent a grammar. 

5 

6These are a bit arcane because they are derived from the data 

7structures used by Python's 'pgen' parser generator. 

8 

9There's also a table here mapping operators to their names in the 

10token module; the Python tokenize module reports all operators as the 

11fallback token code OP, but the parser needs the actual token code. 

12 

13""" 

14 

15# Python imports 

16import os 

17import pickle 

18import tempfile 

19from typing import Any, Dict, List, Optional, Text, Tuple, TypeVar, Union 

20 

21# Local imports 

22from . import token 

23 

24_P = TypeVar("_P", bound="Grammar") 

25Label = Tuple[int, Optional[Text]] 

26DFA = List[List[Tuple[int, int]]] 

27DFAS = Tuple[DFA, Dict[int, int]] 

28Path = Union[str, "os.PathLike[str]"] 

29 

30 

31class Grammar(object): 

32 """Pgen parsing tables conversion class. 

33 

34 Once initialized, this class supplies the grammar tables for the 

35 parsing engine implemented by parse.py. The parsing engine 

36 accesses the instance variables directly. The class here does not 

37 provide initialization of the tables; several subclasses exist to 

38 do this (see the conv and pgen modules). 

39 

40 The load() method reads the tables from a pickle file, which is 

41 much faster than the other ways offered by subclasses. The pickle 

42 file is written by calling dump() (after loading the grammar 

43 tables using a subclass). The report() method prints a readable 

44 representation of the tables to stdout, for debugging. 

45 

46 The instance variables are as follows: 

47 

48 symbol2number -- a dict mapping symbol names to numbers. Symbol 

49 numbers are always 256 or higher, to distinguish 

50 them from token numbers, which are between 0 and 

51 255 (inclusive). 

52 

53 number2symbol -- a dict mapping numbers to symbol names; 

54 these two are each other's inverse. 

55 

56 states -- a list of DFAs, where each DFA is a list of 

57 states, each state is a list of arcs, and each 

58 arc is a (i, j) pair where i is a label and j is 

59 a state number. The DFA number is the index into 

60 this list. (This name is slightly confusing.) 

61 Final states are represented by a special arc of 

62 the form (0, j) where j is its own state number. 

63 

64 dfas -- a dict mapping symbol numbers to (DFA, first) 

65 pairs, where DFA is an item from the states list 

66 above, and first is a set of tokens that can 

67 begin this grammar rule (represented by a dict 

68 whose values are always 1). 

69 

70 labels -- a list of (x, y) pairs where x is either a token 

71 number or a symbol number, and y is either None 

72 or a string; the strings are keywords. The label 

73 number is the index in this list; label numbers 

74 are used to mark state transitions (arcs) in the 

75 DFAs. 

76 

77 start -- the number of the grammar's start symbol. 

78 

79 keywords -- a dict mapping keyword strings to arc labels. 

80 

81 tokens -- a dict mapping token numbers to arc labels. 

82 

83 """ 

84 

85 def __init__(self) -> None: 

86 self.symbol2number: Dict[str, int] = {} 

87 self.number2symbol: Dict[int, str] = {} 

88 self.states: List[DFA] = [] 

89 self.dfas: Dict[int, DFAS] = {} 

90 self.labels: List[Label] = [(0, "EMPTY")] 

91 self.keywords: Dict[str, int] = {} 

92 self.soft_keywords: Dict[str, int] = {} 

93 self.tokens: Dict[int, int] = {} 

94 self.symbol2label: Dict[str, int] = {} 

95 self.version: Tuple[int, int] = (0, 0) 

96 self.start = 256 

97 # Python 3.7+ parses async as a keyword, not an identifier 

98 self.async_keywords = False 

99 

100 def dump(self, filename: Path) -> None: 

101 """Dump the grammar tables to a pickle file.""" 

102 

103 # mypyc generates objects that don't have a __dict__, but they 

104 # do have __getstate__ methods that will return an equivalent 

105 # dictionary 

106 if hasattr(self, "__dict__"): 

107 d = self.__dict__ 

108 else: 

109 d = self.__getstate__() # type: ignore 

110 

111 with tempfile.NamedTemporaryFile( 

112 dir=os.path.dirname(filename), delete=False 

113 ) as f: 

114 pickle.dump(d, f, pickle.HIGHEST_PROTOCOL) 

115 os.replace(f.name, filename) 

116 

117 def _update(self, attrs: Dict[str, Any]) -> None: 

118 for k, v in attrs.items(): 

119 setattr(self, k, v) 

120 

121 def load(self, filename: Path) -> None: 

122 """Load the grammar tables from a pickle file.""" 

123 with open(filename, "rb") as f: 

124 d = pickle.load(f) 

125 self._update(d) 

126 

127 def loads(self, pkl: bytes) -> None: 

128 """Load the grammar tables from a pickle bytes object.""" 

129 self._update(pickle.loads(pkl)) 

130 

131 def copy(self: _P) -> _P: 

132 """ 

133 Copy the grammar. 

134 """ 

135 new = self.__class__() 

136 for dict_attr in ( 

137 "symbol2number", 

138 "number2symbol", 

139 "dfas", 

140 "keywords", 

141 "soft_keywords", 

142 "tokens", 

143 "symbol2label", 

144 ): 

145 setattr(new, dict_attr, getattr(self, dict_attr).copy()) 

146 new.labels = self.labels[:] 

147 new.states = self.states[:] 

148 new.start = self.start 

149 new.version = self.version 

150 new.async_keywords = self.async_keywords 

151 return new 

152 

153 def report(self) -> None: 

154 """Dump the grammar tables to standard output, for debugging.""" 

155 from pprint import pprint 

156 

157 print("s2n") 

158 pprint(self.symbol2number) 

159 print("n2s") 

160 pprint(self.number2symbol) 

161 print("states") 

162 pprint(self.states) 

163 print("dfas") 

164 pprint(self.dfas) 

165 print("labels") 

166 pprint(self.labels) 

167 print("start", self.start) 

168 

169 

170# Map from operator to number (since tokenize doesn't do this) 

171 

172opmap_raw = """ 

173( LPAR 

174) RPAR 

175[ LSQB 

176] RSQB 

177: COLON 

178, COMMA 

179; SEMI 

180+ PLUS 

181- MINUS 

182* STAR 

183/ SLASH 

184| VBAR 

185& AMPER 

186< LESS 

187> GREATER 

188= EQUAL 

189. DOT 

190% PERCENT 

191` BACKQUOTE 

192{ LBRACE 

193} RBRACE 

194@ AT 

195@= ATEQUAL 

196== EQEQUAL 

197!= NOTEQUAL 

198<> NOTEQUAL 

199<= LESSEQUAL 

200>= GREATEREQUAL 

201~ TILDE 

202^ CIRCUMFLEX 

203<< LEFTSHIFT 

204>> RIGHTSHIFT 

205** DOUBLESTAR 

206+= PLUSEQUAL 

207-= MINEQUAL 

208*= STAREQUAL 

209/= SLASHEQUAL 

210%= PERCENTEQUAL 

211&= AMPEREQUAL 

212|= VBAREQUAL 

213^= CIRCUMFLEXEQUAL 

214<<= LEFTSHIFTEQUAL 

215>>= RIGHTSHIFTEQUAL 

216**= DOUBLESTAREQUAL 

217// DOUBLESLASH 

218//= DOUBLESLASHEQUAL 

219-> RARROW 

220:= COLONEQUAL 

221""" 

222 

223opmap = {} 

224for line in opmap_raw.splitlines(): 

225 if line: 

226 op, name = line.split() 

227 opmap[op] = getattr(token, name)