Import Upstream version 20180207
[hcoop/debian/mlton.git] / mlyacc / src / coreutils.sml
1 (* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
2
3 functor mkCoreUtils(structure Core : CORE) : CORE_UTILS =
4 struct
5 val sub = Array.sub
6 infix 9 sub
7 val DEBUG = true
8 structure Core = Core
9 structure IntGrammar = Core.IntGrammar
10 structure Grammar = IntGrammar.Grammar
11
12 open Grammar IntGrammar Core
13
14 structure Assoc = SymbolAssoc
15
16 structure NtList = ListOrdSet
17 (struct
18 type elem = nonterm
19 val eq = eqNonterm
20 val gt = gtNonterm
21 end)
22
23 val mkFuncs = fn (GRAMMAR {rules,terms,nonterms,...}) =>
24 let val derives=Array.array(nonterms,nil : rule list)
25
26 (* sort rules by their lhs nonterminal by placing them in an array indexed
27 in their lhs nonterminal *)
28
29 val _ =
30 let val f = fn {lhs=lhs as (NT n), rhs, precedence,rulenum} =>
31 let val rule=RULE{lhs=lhs,rhs=rhs,precedence=precedence,
32 rulenum=rulenum,num=0}
33 in Array.update(derives,n,rule::(derives sub n))
34 end
35 in app f rules
36 end
37
38 (* renumber rules so that rule numbers increase monotonically with
39 the number of their lhs nonterminal, and so that rules are numbered
40 sequentially. **Functions below assume that this number is true**,
41 i.e. productions for nonterm i are numbered from j to k,
42 productions for nonterm i+1 are numbered from k+1 to m, and
43 productions for nonterm 0 start at 0 *)
44
45 val _ =
46 let val f =
47 fn (RULE{lhs,rhs,precedence,rulenum,num}, (l,i)) =>
48 (RULE{lhs=lhs,rhs=rhs, precedence=precedence,
49 rulenum=rulenum, num=i}::l,i+1)
50 fun g(i,num) =
51 if i<nonterms then
52 let val (l,n) =
53 List.foldr f ([], num) (derives sub i)
54 in Array.update(derives,i,rev l); g(i+1,n)
55 end
56 else ()
57 in g(0,0)
58 end
59
60 (* list of rules - sorted by rule number. *)
61
62 val rules =
63 let fun g i =
64 if i < nonterms then (derives sub i) @ (g (i+1))
65 else nil
66 in g 0
67 end
68
69 (* produces: set of productions with nonterminal n as the lhs. The set
70 of productions *must* be sorted by rule number, because functions
71 below assume that this list is sorted *)
72
73 val produces = fn (NT n) =>
74 if DEBUG andalso (n<0 orelse n>=nonterms) then
75 let exception Produces of int in raise (Produces n) end
76 else derives sub n
77
78 val memoize = fn f =>
79 let fun loop i = if i = nonterms then nil
80 else f (NT i) :: (loop (i+1))
81 val data = Array.fromList(loop 0)
82 in fn (NT i) => data sub i
83 end
84
85 (* compute nonterminals which must be added to a closure when a given
86 nonterminal is added, i.e all nonterminals C for each nonterminal A such
87 that A =*=> Cx *)
88
89 val nontermClosure =
90 let val collectNonterms = fn n =>
91 List.foldr (fn (r,l) =>
92 case r
93 of RULE {rhs=NONTERM n :: _,...} =>
94 NtList.insert(n,l)
95 | _ => l) NtList.empty (produces n)
96 val closureNonterm = fn n =>
97 NtList.closure(NtList.singleton n,
98 collectNonterms)
99 in memoize closureNonterm
100 end
101
102 (* ntShifts: Take the items produced by a nonterminal, and sort them
103 by their first symbol. For each first symbol, make sure the item
104 list associated with the symbol is sorted also. ** This function
105 assumes that the item list returned by produces is sorted **
106
107 Create a table of item lists keyed by symbols. Scan the list
108 of items produced by a nonterminal, and insert those with a first
109 symbol on to the beginning of the item list for that symbol, creating
110 a list if necessary. Since produces returns an item list that is
111 already in order, the list for each symbol will also end up in order.
112 *)
113
114 fun sortItems nt =
115 let fun add_item (a as RULE{rhs=symbol::rest,...},r) =
116 let val item = ITEM{rule=a,dot=1,rhsAfter=rest}
117 in Assoc.insert((symbol,case Assoc.find (symbol,r)
118 of SOME l => item::l
119 | NONE => [item]),r)
120 end
121 | add_item (_,r) = r
122 in List.foldr add_item Assoc.empty (produces nt)
123 end
124
125 val ntShifts = memoize sortItems
126
127 (* getNonterms: get the nonterminals with a . before them in a core.
128 Returns a list of nonterminals in ascending order *)
129
130 fun getNonterms l =
131 List.foldr (fn (ITEM {rhsAfter=NONTERM sym ::_, ...},r) =>
132 NtList.insert(sym,r)
133 | (_,r) => r) [] l
134
135 (* closureNonterms: compute the nonterminals that would have a . before them
136 in the closure of the core. Returns a list of nonterminals in ascending
137 order *)
138 fun closureNonterms a =
139 let val nonterms = getNonterms a
140 in List.foldr (fn (nt,r) =>
141 NtList.union(nontermClosure nt,r))
142 nonterms nonterms
143 end
144
145 (* shifts: compute the core sets that result from shift/gotoing on
146 the closure of a kernal set. The items in core sets are sorted, of
147 course.
148
149 (1) compute the core sets that result just from items added
150 through the closure operation.
151 (2) then add the shift/gotos on kernal items.
152
153 We can do (1) the following way. Keep a table which for each shift/goto
154 symbol gives the list of items that result from shifting or gotoing on the
155 symbol. Compute the nonterminals that would have dots before them in the
156 closure of the kernal set. For each of these nonterminals, we already have an
157 item list in sorted order for each possible shift symbol. Scan the nonterminal
158 list from back to front. For each nonterminal, prepend the shift/goto list
159 for each shift symbol to the list already in the table.
160
161 We end up with the list of items in correct order for each shift/goto
162 symbol. We have kept the item lists in order, scanned the nonterminals from
163 back to front (=> that the items end up in ascending order), and never had any
164 duplicate items (each item is derived from only one nonterminal). *)
165
166 fun shifts (CORE (itemList,_)) =
167 let
168
169 (* mergeShiftItems: add an item list for a shift/goto symbol to the table *)
170
171 fun mergeShiftItems (args as ((k,l),r)) =
172 case Assoc.find(k,r)
173 of NONE => Assoc.insert args
174 | SOME old => Assoc.insert ((k,l@old),r)
175
176 (* mergeItems: add all items derived from a nonterminal to the table. We've
177 kept these items sorted by their shift/goto symbol (the first symbol on
178 their rhs) *)
179
180 fun mergeItems (n,r) =
181 Assoc.fold mergeShiftItems (ntShifts n) r
182
183 (* nonterms: a list of nonterminals that are in a core after the
184 closure operation *)
185
186 val nonterms = closureNonterms itemList
187
188 (* now create a table which for each shift/goto symbol gives the sorted list
189 of closure items which would result from first taking all the closure items
190 and then sorting them by the shift/goto symbols *)
191
192 val newsets = List.foldr mergeItems Assoc.empty nonterms
193
194 (* finally prepare to insert the kernal items of a core *)
195
196 fun insertItem ((k,i),r) =
197 case (Assoc.find(k,r))
198 of NONE => Assoc.insert((k,[i]),r)
199 | SOME l => Assoc.insert((k,Core.insert(i,l)),r)
200 fun shiftCores(ITEM{rule,dot,rhsAfter=symbol::rest},r) =
201 insertItem((symbol,
202 ITEM{rule=rule,dot=dot+1,rhsAfter=rest}),r)
203 | shiftCores(_,r) = r
204
205 (* insert the kernal items of a core *)
206
207 val newsets = List.foldr shiftCores newsets itemList
208 in Assoc.make_list newsets
209 end
210
211 (* nontermEpsProds: returns a list of epsilon productions produced by a
212 nonterminal sorted by rule number. ** Depends on produces returning
213 an ordered list **. It does not alter the order in which the rules
214 were returned by produces; it only removes non-epsilon productions *)
215
216 val nontermEpsProds =
217 let val f = fn nt =>
218 List.foldr
219 (fn (rule as RULE {rhs=nil,...},results) => rule :: results
220 | (_,results) => results)
221 [] (produces nt)
222 in memoize f
223 end
224
225 (* epsProds: take a core and compute a list of epsilon productions for it
226 sorted by rule number. ** Depends on closureNonterms returning a list
227 of nonterminals sorted by nonterminal #, rule numbers increasing
228 monotonically with their lhs production #, and nontermEpsProds returning
229 an ordered item list for each production
230 *)
231
232 fun epsProds (CORE (itemList,state)) =
233 let val prods = map nontermEpsProds (closureNonterms itemList)
234 in List.concat prods
235 end
236
237 in {produces=produces,shifts=shifts,rules=rules,epsProds=epsProds}
238 end
239 end;