Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/lark/parsers/grammar_analysis.py: 95%
114 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:30 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:30 +0000
1"Provides for superficial grammar analysis."
3from collections import Counter, defaultdict
5from ..utils import bfs, fzset, classify
6from ..exceptions import GrammarError
7from ..grammar import Rule, Terminal, NonTerminal
10class RulePtr:
11 __slots__ = ('rule', 'index')
13 def __init__(self, rule, index):
14 assert isinstance(rule, Rule)
15 assert index <= len(rule.expansion)
16 self.rule = rule
17 self.index = index
19 def __repr__(self):
20 before = [x.name for x in self.rule.expansion[:self.index]]
21 after = [x.name for x in self.rule.expansion[self.index:]]
22 return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after))
24 @property
25 def next(self):
26 return self.rule.expansion[self.index]
28 def advance(self, sym):
29 assert self.next == sym
30 return RulePtr(self.rule, self.index+1)
32 @property
33 def is_satisfied(self):
34 return self.index == len(self.rule.expansion)
36 def __eq__(self, other):
37 return self.rule == other.rule and self.index == other.index
38 def __hash__(self):
39 return hash((self.rule, self.index))
42# state generation ensures no duplicate LR0ItemSets
43class LR0ItemSet:
44 __slots__ = ('kernel', 'closure', 'transitions', 'lookaheads')
46 def __init__(self, kernel, closure):
47 self.kernel = fzset(kernel)
48 self.closure = fzset(closure)
49 self.transitions = {}
50 self.lookaheads = defaultdict(set)
52 def __repr__(self):
53 return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure]))
56def update_set(set1, set2):
57 if not set2 or set1 > set2:
58 return False
60 copy = set(set1)
61 set1 |= set2
62 return set1 != copy
64def calculate_sets(rules):
65 """Calculate FOLLOW sets.
67 Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
68 symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
70 # foreach grammar rule X ::= Y(1) ... Y(k)
71 # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
72 # NULLABLE = NULLABLE union {X}
73 # for i = 1 to k
74 # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
75 # FIRST(X) = FIRST(X) union FIRST(Y(i))
76 # for j = i+1 to k
77 # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
78 # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
79 # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
80 # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
81 # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
83 NULLABLE = set()
84 FIRST = {}
85 FOLLOW = {}
86 for sym in symbols:
87 FIRST[sym]={sym} if sym.is_term else set()
88 FOLLOW[sym]=set()
90 # Calculate NULLABLE and FIRST
91 changed = True
92 while changed:
93 changed = False
95 for rule in rules:
96 if set(rule.expansion) <= NULLABLE:
97 if update_set(NULLABLE, {rule.origin}):
98 changed = True
100 for i, sym in enumerate(rule.expansion):
101 if set(rule.expansion[:i]) <= NULLABLE:
102 if update_set(FIRST[rule.origin], FIRST[sym]):
103 changed = True
104 else:
105 break
107 # Calculate FOLLOW
108 changed = True
109 while changed:
110 changed = False
112 for rule in rules:
113 for i, sym in enumerate(rule.expansion):
114 if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE:
115 if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
116 changed = True
118 for j in range(i+1, len(rule.expansion)):
119 if set(rule.expansion[i+1:j]) <= NULLABLE:
120 if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
121 changed = True
123 return FIRST, FOLLOW, NULLABLE
126class GrammarAnalyzer:
127 def __init__(self, parser_conf, debug=False, strict=False):
128 self.debug = debug
129 self.strict = strict
131 root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')])
132 for start in parser_conf.start}
134 rules = parser_conf.rules + list(root_rules.values())
135 self.rules_by_origin = classify(rules, lambda r: r.origin)
137 if len(rules) != len(set(rules)):
138 duplicates = [item for item, count in Counter(rules).items() if count > 1]
139 raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates))
141 for r in rules:
142 for sym in r.expansion:
143 if not (sym.is_term or sym in self.rules_by_origin):
144 raise GrammarError("Using an undefined rule: %s" % sym)
146 self.start_states = {start: self.expand_rule(root_rule.origin)
147 for start, root_rule in root_rules.items()}
149 self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))})
150 for start, root_rule in root_rules.items()}
152 lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)])
153 for start in parser_conf.start}
155 lr0_rules = parser_conf.rules + list(lr0_root_rules.values())
156 assert(len(lr0_rules) == len(set(lr0_rules)))
158 self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)
160 # cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
161 self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
162 for start, root_rule in lr0_root_rules.items()}
164 self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
166 def expand_rule(self, source_rule, rules_by_origin=None):
167 "Returns all init_ptrs accessible by rule (recursive)"
169 if rules_by_origin is None:
170 rules_by_origin = self.rules_by_origin
172 init_ptrs = set()
173 def _expand_rule(rule):
174 assert not rule.is_term, rule
176 for r in rules_by_origin[rule]:
177 init_ptr = RulePtr(r, 0)
178 init_ptrs.add(init_ptr)
180 if r.expansion: # if not empty rule
181 new_r = init_ptr.next
182 if not new_r.is_term:
183 yield new_r
185 for _ in bfs([source_rule], _expand_rule):
186 pass
188 return fzset(init_ptrs)