Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/lark/parsers/grammar_analysis.py: 94%
126 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-14 06:19 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-14 06:19 +0000
1"Provides for superficial grammar analysis."
3from collections import Counter, defaultdict
4from typing import List, Dict, Iterator, FrozenSet, Set
6from ..utils import bfs, fzset, classify
7from ..exceptions import GrammarError
8from ..grammar import Rule, Terminal, NonTerminal, Symbol
9from ..common import ParserConf
12class RulePtr:
13 __slots__ = ('rule', 'index')
14 rule: Rule
15 index: int
17 def __init__(self, rule: Rule, index: int):
18 assert isinstance(rule, Rule)
19 assert index <= len(rule.expansion)
20 self.rule = rule
21 self.index = index
23 def __repr__(self):
24 before = [x.name for x in self.rule.expansion[:self.index]]
25 after = [x.name for x in self.rule.expansion[self.index:]]
26 return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after))
28 @property
29 def next(self) -> Symbol:
30 return self.rule.expansion[self.index]
32 def advance(self, sym: Symbol) -> 'RulePtr':
33 assert self.next == sym
34 return RulePtr(self.rule, self.index+1)
36 @property
37 def is_satisfied(self) -> bool:
38 return self.index == len(self.rule.expansion)
40 def __eq__(self, other) -> bool:
41 if not isinstance(other, RulePtr):
42 return NotImplemented
43 return self.rule == other.rule and self.index == other.index
45 def __hash__(self) -> int:
46 return hash((self.rule, self.index))
49State = FrozenSet[RulePtr]
51# state generation ensures no duplicate LR0ItemSets
52class LR0ItemSet:
53 __slots__ = ('kernel', 'closure', 'transitions', 'lookaheads')
55 kernel: State
56 closure: State
57 transitions: Dict[Symbol, 'LR0ItemSet']
58 lookaheads: Dict[Symbol, Set[Rule]]
60 def __init__(self, kernel, closure):
61 self.kernel = fzset(kernel)
62 self.closure = fzset(closure)
63 self.transitions = {}
64 self.lookaheads = defaultdict(set)
66 def __repr__(self):
67 return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure]))
70def update_set(set1, set2):
71 if not set2 or set1 > set2:
72 return False
74 copy = set(set1)
75 set1 |= set2
76 return set1 != copy
78def calculate_sets(rules):
79 """Calculate FOLLOW sets.
81 Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
82 symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
84 # foreach grammar rule X ::= Y(1) ... Y(k)
85 # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
86 # NULLABLE = NULLABLE union {X}
87 # for i = 1 to k
88 # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
89 # FIRST(X) = FIRST(X) union FIRST(Y(i))
90 # for j = i+1 to k
91 # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
92 # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
93 # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
94 # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
95 # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
97 NULLABLE = set()
98 FIRST = {}
99 FOLLOW = {}
100 for sym in symbols:
101 FIRST[sym]={sym} if sym.is_term else set()
102 FOLLOW[sym]=set()
104 # Calculate NULLABLE and FIRST
105 changed = True
106 while changed:
107 changed = False
109 for rule in rules:
110 if set(rule.expansion) <= NULLABLE:
111 if update_set(NULLABLE, {rule.origin}):
112 changed = True
114 for i, sym in enumerate(rule.expansion):
115 if set(rule.expansion[:i]) <= NULLABLE:
116 if update_set(FIRST[rule.origin], FIRST[sym]):
117 changed = True
118 else:
119 break
121 # Calculate FOLLOW
122 changed = True
123 while changed:
124 changed = False
126 for rule in rules:
127 for i, sym in enumerate(rule.expansion):
128 if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE:
129 if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
130 changed = True
132 for j in range(i+1, len(rule.expansion)):
133 if set(rule.expansion[i+1:j]) <= NULLABLE:
134 if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
135 changed = True
137 return FIRST, FOLLOW, NULLABLE
140class GrammarAnalyzer:
141 def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False):
142 self.debug = debug
143 self.strict = strict
145 root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')])
146 for start in parser_conf.start}
148 rules = parser_conf.rules + list(root_rules.values())
149 self.rules_by_origin: Dict[NonTerminal, List[Rule]] = classify(rules, lambda r: r.origin)
151 if len(rules) != len(set(rules)):
152 duplicates = [item for item, count in Counter(rules).items() if count > 1]
153 raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates))
155 for r in rules:
156 for sym in r.expansion:
157 if not (sym.is_term or sym in self.rules_by_origin):
158 raise GrammarError("Using an undefined rule: %s" % sym)
160 self.start_states = {start: self.expand_rule(root_rule.origin)
161 for start, root_rule in root_rules.items()}
163 self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))})
164 for start, root_rule in root_rules.items()}
166 lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)])
167 for start in parser_conf.start}
169 lr0_rules = parser_conf.rules + list(lr0_root_rules.values())
170 assert(len(lr0_rules) == len(set(lr0_rules)))
172 self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)
174 # cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
175 self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
176 for start, root_rule in lr0_root_rules.items()}
178 self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
180 def expand_rule(self, source_rule: NonTerminal, rules_by_origin=None) -> State:
181 "Returns all init_ptrs accessible by rule (recursive)"
183 if rules_by_origin is None:
184 rules_by_origin = self.rules_by_origin
186 init_ptrs = set()
187 def _expand_rule(rule: NonTerminal) -> Iterator[NonTerminal]:
188 assert not rule.is_term, rule
190 for r in rules_by_origin[rule]:
191 init_ptr = RulePtr(r, 0)
192 init_ptrs.add(init_ptr)
194 if r.expansion: # if not empty rule
195 new_r = init_ptr.next
196 if not new_r.is_term:
197 assert isinstance(new_r, NonTerminal)
198 yield new_r
200 for _ in bfs([source_rule], _expand_rule):
201 pass
203 return fzset(init_ptrs)