Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/lark/parsers/grammar_analysis.py: 95%

114 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:30 +0000

1"Provides for superficial grammar analysis." 

2 

3from collections import Counter, defaultdict 

4 

5from ..utils import bfs, fzset, classify 

6from ..exceptions import GrammarError 

7from ..grammar import Rule, Terminal, NonTerminal 

8 

9 

10class RulePtr: 

11 __slots__ = ('rule', 'index') 

12 

13 def __init__(self, rule, index): 

14 assert isinstance(rule, Rule) 

15 assert index <= len(rule.expansion) 

16 self.rule = rule 

17 self.index = index 

18 

19 def __repr__(self): 

20 before = [x.name for x in self.rule.expansion[:self.index]] 

21 after = [x.name for x in self.rule.expansion[self.index:]] 

22 return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after)) 

23 

24 @property 

25 def next(self): 

26 return self.rule.expansion[self.index] 

27 

28 def advance(self, sym): 

29 assert self.next == sym 

30 return RulePtr(self.rule, self.index+1) 

31 

32 @property 

33 def is_satisfied(self): 

34 return self.index == len(self.rule.expansion) 

35 

36 def __eq__(self, other): 

37 return self.rule == other.rule and self.index == other.index 

38 def __hash__(self): 

39 return hash((self.rule, self.index)) 

40 

41 

42# state generation ensures no duplicate LR0ItemSets 

43class LR0ItemSet: 

44 __slots__ = ('kernel', 'closure', 'transitions', 'lookaheads') 

45 

46 def __init__(self, kernel, closure): 

47 self.kernel = fzset(kernel) 

48 self.closure = fzset(closure) 

49 self.transitions = {} 

50 self.lookaheads = defaultdict(set) 

51 

52 def __repr__(self): 

53 return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure])) 

54 

55 

56def update_set(set1, set2): 

57 if not set2 or set1 > set2: 

58 return False 

59 

60 copy = set(set1) 

61 set1 |= set2 

62 return set1 != copy 

63 

64def calculate_sets(rules): 

65 """Calculate FOLLOW sets. 

66 

67 Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets""" 

68 symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules} 

69 

70 # foreach grammar rule X ::= Y(1) ... Y(k) 

71 # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then 

72 # NULLABLE = NULLABLE union {X} 

73 # for i = 1 to k 

74 # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then 

75 # FIRST(X) = FIRST(X) union FIRST(Y(i)) 

76 # for j = i+1 to k 

77 # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then 

78 # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X) 

79 # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then 

80 # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j)) 

81 # until none of NULLABLE,FIRST,FOLLOW changed in last iteration 

82 

83 NULLABLE = set() 

84 FIRST = {} 

85 FOLLOW = {} 

86 for sym in symbols: 

87 FIRST[sym]={sym} if sym.is_term else set() 

88 FOLLOW[sym]=set() 

89 

90 # Calculate NULLABLE and FIRST 

91 changed = True 

92 while changed: 

93 changed = False 

94 

95 for rule in rules: 

96 if set(rule.expansion) <= NULLABLE: 

97 if update_set(NULLABLE, {rule.origin}): 

98 changed = True 

99 

100 for i, sym in enumerate(rule.expansion): 

101 if set(rule.expansion[:i]) <= NULLABLE: 

102 if update_set(FIRST[rule.origin], FIRST[sym]): 

103 changed = True 

104 else: 

105 break 

106 

107 # Calculate FOLLOW 

108 changed = True 

109 while changed: 

110 changed = False 

111 

112 for rule in rules: 

113 for i, sym in enumerate(rule.expansion): 

114 if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE: 

115 if update_set(FOLLOW[sym], FOLLOW[rule.origin]): 

116 changed = True 

117 

118 for j in range(i+1, len(rule.expansion)): 

119 if set(rule.expansion[i+1:j]) <= NULLABLE: 

120 if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]): 

121 changed = True 

122 

123 return FIRST, FOLLOW, NULLABLE 

124 

125 

126class GrammarAnalyzer: 

127 def __init__(self, parser_conf, debug=False, strict=False): 

128 self.debug = debug 

129 self.strict = strict 

130 

131 root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')]) 

132 for start in parser_conf.start} 

133 

134 rules = parser_conf.rules + list(root_rules.values()) 

135 self.rules_by_origin = classify(rules, lambda r: r.origin) 

136 

137 if len(rules) != len(set(rules)): 

138 duplicates = [item for item, count in Counter(rules).items() if count > 1] 

139 raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates)) 

140 

141 for r in rules: 

142 for sym in r.expansion: 

143 if not (sym.is_term or sym in self.rules_by_origin): 

144 raise GrammarError("Using an undefined rule: %s" % sym) 

145 

146 self.start_states = {start: self.expand_rule(root_rule.origin) 

147 for start, root_rule in root_rules.items()} 

148 

149 self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))}) 

150 for start, root_rule in root_rules.items()} 

151 

152 lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)]) 

153 for start in parser_conf.start} 

154 

155 lr0_rules = parser_conf.rules + list(lr0_root_rules.values()) 

156 assert(len(lr0_rules) == len(set(lr0_rules))) 

157 

158 self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin) 

159 

160 # cache RulePtr(r, 0) in r (no duplicate RulePtr objects) 

161 self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin)) 

162 for start, root_rule in lr0_root_rules.items()} 

163 

164 self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules) 

165 

166 def expand_rule(self, source_rule, rules_by_origin=None): 

167 "Returns all init_ptrs accessible by rule (recursive)" 

168 

169 if rules_by_origin is None: 

170 rules_by_origin = self.rules_by_origin 

171 

172 init_ptrs = set() 

173 def _expand_rule(rule): 

174 assert not rule.is_term, rule 

175 

176 for r in rules_by_origin[rule]: 

177 init_ptr = RulePtr(r, 0) 

178 init_ptrs.add(init_ptr) 

179 

180 if r.expansion: # if not empty rule 

181 new_r = init_ptr.next 

182 if not new_r.is_term: 

183 yield new_r 

184 

185 for _ in bfs([source_rule], _expand_rule): 

186 pass 

187 

188 return fzset(init_ptrs)