Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/lark/parsers/grammar_analysis.py: 94%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

127 statements  

1"Provides for superficial grammar analysis." 

2 

3from collections import Counter, defaultdict 

4from typing import List, Dict, Iterator, FrozenSet, Set 

5 

6from ..utils import bfs, fzset, classify, OrderedSet 

7from ..exceptions import GrammarError 

8from ..grammar import Rule, Terminal, NonTerminal, Symbol 

9from ..common import ParserConf 

10 

11 

12class RulePtr: 

13 __slots__ = ('rule', 'index') 

14 rule: Rule 

15 index: int 

16 

17 def __init__(self, rule: Rule, index: int): 

18 assert isinstance(rule, Rule) 

19 assert index <= len(rule.expansion) 

20 self.rule = rule 

21 self.index = index 

22 

23 def __repr__(self): 

24 before = [x.name for x in self.rule.expansion[:self.index]] 

25 after = [x.name for x in self.rule.expansion[self.index:]] 

26 return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after)) 

27 

28 @property 

29 def next(self) -> Symbol: 

30 return self.rule.expansion[self.index] 

31 

32 def advance(self, sym: Symbol) -> 'RulePtr': 

33 assert self.next == sym 

34 return RulePtr(self.rule, self.index+1) 

35 

36 @property 

37 def is_satisfied(self) -> bool: 

38 return self.index == len(self.rule.expansion) 

39 

40 def __eq__(self, other) -> bool: 

41 if not isinstance(other, RulePtr): 

42 return NotImplemented 

43 return self.rule == other.rule and self.index == other.index 

44 

45 def __hash__(self) -> int: 

46 return hash((self.rule, self.index)) 

47 

48 

49State = FrozenSet[RulePtr] 

50 

51# state generation ensures no duplicate LR0ItemSets 

52class LR0ItemSet: 

53 __slots__ = ('kernel', 'closure', 'transitions', 'lookaheads') 

54 

55 kernel: State 

56 closure: State 

57 transitions: Dict[Symbol, 'LR0ItemSet'] 

58 lookaheads: Dict[Symbol, Set[Rule]] 

59 

60 def __init__(self, kernel, closure): 

61 self.kernel = fzset(kernel) 

62 self.closure = fzset(closure) 

63 self.transitions = {} 

64 self.lookaheads = defaultdict(set) 

65 

66 def __repr__(self): 

67 return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure])) 

68 

69 

70def update_set(set1, set2): 

71 if not set2 or set1 > set2: 

72 return False 

73 

74 copy = set(set1) 

75 set1 |= set2 

76 return set1 != copy 

77 

78def calculate_sets(rules): 

79 """Calculate FOLLOW sets. 

80 

81 Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets""" 

82 symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules} 

83 

84 # foreach grammar rule X ::= Y(1) ... Y(k) 

85 # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then 

86 # NULLABLE = NULLABLE union {X} 

87 # for i = 1 to k 

88 # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then 

89 # FIRST(X) = FIRST(X) union FIRST(Y(i)) 

90 # for j = i+1 to k 

91 # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then 

92 # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X) 

93 # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then 

94 # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j)) 

95 # until none of NULLABLE,FIRST,FOLLOW changed in last iteration 

96 

97 NULLABLE = set() 

98 FIRST = {} 

99 FOLLOW = {} 

100 for sym in symbols: 

101 FIRST[sym]={sym} if sym.is_term else set() 

102 FOLLOW[sym]=set() 

103 

104 # Calculate NULLABLE and FIRST 

105 changed = True 

106 while changed: 

107 changed = False 

108 

109 for rule in rules: 

110 if set(rule.expansion) <= NULLABLE: 

111 if update_set(NULLABLE, {rule.origin}): 

112 changed = True 

113 

114 for i, sym in enumerate(rule.expansion): 

115 if set(rule.expansion[:i]) <= NULLABLE: 

116 if update_set(FIRST[rule.origin], FIRST[sym]): 

117 changed = True 

118 else: 

119 break 

120 

121 # Calculate FOLLOW 

122 changed = True 

123 while changed: 

124 changed = False 

125 

126 for rule in rules: 

127 for i, sym in enumerate(rule.expansion): 

128 if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE: 

129 if update_set(FOLLOW[sym], FOLLOW[rule.origin]): 

130 changed = True 

131 

132 for j in range(i+1, len(rule.expansion)): 

133 if set(rule.expansion[i+1:j]) <= NULLABLE: 

134 if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]): 

135 changed = True 

136 

137 return FIRST, FOLLOW, NULLABLE 

138 

139 

140class GrammarAnalyzer: 

141 def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False): 

142 self.debug = debug 

143 self.strict = strict 

144 

145 root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')]) 

146 for start in parser_conf.start} 

147 

148 rules = parser_conf.rules + list(root_rules.values()) 

149 self.rules_by_origin: Dict[NonTerminal, List[Rule]] = classify(rules, lambda r: r.origin) 

150 

151 if len(rules) != len(set(rules)): 

152 duplicates = [item for item, count in Counter(rules).items() if count > 1] 

153 raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates)) 

154 

155 for r in rules: 

156 for sym in r.expansion: 

157 if not (sym.is_term or sym in self.rules_by_origin): 

158 raise GrammarError("Using an undefined rule: %s" % sym) 

159 

160 self.start_states = {start: self.expand_rule(root_rule.origin) 

161 for start, root_rule in root_rules.items()} 

162 

163 self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))}) 

164 for start, root_rule in root_rules.items()} 

165 

166 lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)]) 

167 for start in parser_conf.start} 

168 

169 lr0_rules = parser_conf.rules + list(lr0_root_rules.values()) 

170 assert(len(lr0_rules) == len(set(lr0_rules))) 

171 

172 self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin) 

173 

174 # cache RulePtr(r, 0) in r (no duplicate RulePtr objects) 

175 self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin)) 

176 for start, root_rule in lr0_root_rules.items()} 

177 

178 self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules) 

179 

180 def expand_rule(self, source_rule: NonTerminal, rules_by_origin=None) -> OrderedSet[RulePtr]: 

181 "Returns all init_ptrs accessible by rule (recursive)" 

182 

183 if rules_by_origin is None: 

184 rules_by_origin = self.rules_by_origin 

185 

186 init_ptrs = OrderedSet[RulePtr]() 

187 def _expand_rule(rule: NonTerminal) -> Iterator[NonTerminal]: 

188 assert not rule.is_term, rule 

189 

190 for r in rules_by_origin[rule]: 

191 init_ptr = RulePtr(r, 0) 

192 init_ptrs.add(init_ptr) 

193 

194 if r.expansion: # if not empty rule 

195 new_r = init_ptr.next 

196 if not new_r.is_term: 

197 assert isinstance(new_r, NonTerminal) 

198 yield new_r 

199 

200 for _ in bfs([source_rule], _expand_rule): 

201 pass 

202 

203 return init_ptrs