Import Upstream version 20180207
[hcoop/debian/mlton.git] / lib / mlyacc-lib / parser2.sml
1 (* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
2
3 (* parser.sml: This is a parser driver for LR tables with an error-recovery
4 routine added to it. The routine used is described in detail in this
5 article:
6
7 'A Practical Method for LR and LL Syntactic Error Diagnosis and
8 Recovery', by M. Burke and G. Fisher, ACM Transactions on
9 Programming Langauges and Systems, Vol. 9, No. 2, April 1987,
10 pp. 164-197.
11
12 This program is an implementation is the partial, deferred method discussed
13 in the article. The algorithm and data structures used in the program
14 are described below.
15
16 This program assumes that all semantic actions are delayed. A semantic
17 action should produce a function from unit -> value instead of producing the
18 normal value. The parser returns the semantic value on the top of the
19 stack when accept is encountered. The user can deconstruct this value
20 and apply the unit -> value function in it to get the answer.
21
22 It also assumes that the lexer is a lazy stream.
23
24 Data Structures:
25 ----------------
26
27 * The parser:
28
29 The state stack has the type
30
31 (state * (semantic value * line # * line #)) list
32
33 The parser keeps a queue of (state stack * lexer pair). A lexer pair
34 consists of a terminal * value pair and a lexer. This allows the
35 parser to reconstruct the states for terminals to the left of a
36 syntax error, and attempt to make error corrections there.
37
38 The queue consists of a pair of lists (x,y). New additions to
39 the queue are cons'ed onto y. The first element of x is the top
40 of the queue. If x is nil, then y is reversed and used
41 in place of x.
42
43 Algorithm:
44 ----------
45
46 * The steady-state parser:
47
48 This parser keeps the length of the queue of state stacks at
49 a steady state by always removing an element from the front when
50 another element is placed on the end.
51
52 It has these arguments:
53
54 stack: current stack
55 queue: value of the queue
56 lexPair ((terminal,value),lex stream)
57
58 When SHIFT is encountered, the state to shift to and the value are
59 are pushed onto the state stack. The state stack and lexPair are
60 placed on the queue. The front element of the queue is removed.
61
62 When REDUCTION is encountered, the rule is applied to the current
63 stack to yield a triple (nonterm,value,new stack). A new
64 stack is formed by adding (goto(top state of stack,nonterm),value)
65 to the stack.
66
67 When ACCEPT is encountered, the top value from the stack and the
68 lexer are returned.
69
70 When an ERROR is encountered, fixError is called. FixError
71 takes the arguments to the parser, fixes the error if possible and
72 returns a new set of arguments.
73
74 * The distance-parser:
75
76 This parser includes an additional argument distance. It pushes
77 elements on the queue until it has parsed distance tokens, or an
78 ACCEPT or ERROR occurs. It returns a stack, lexer, the number of
79 tokens left unparsed, a queue, and an action option.
80 *)
81
82 signature FIFO =
83 sig type 'a queue
84 val empty : 'a queue
85 exception Empty
86 val get : 'a queue -> 'a * 'a queue
87 val put : 'a * 'a queue -> 'a queue
88 end
89
90 (* drt (12/15/89) -- the functor should be used in development work, but
91 it wastes space in the release version.
92
93 functor ParserGen(structure LrTable : LR_TABLE
94 structure Stream : STREAM) : LR_PARSER =
95 *)
96
97 structure LrParser :> LR_PARSER =
98 struct
99 structure LrTable = LrTable
100 structure Stream = Stream
101
102 fun eqT (LrTable.T i, LrTable.T i') = i = i'
103
104 structure Token : TOKEN =
105 struct
106 structure LrTable = LrTable
107 datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b)
108 val sameToken = fn (TOKEN(t,_),TOKEN(t',_)) => eqT (t,t')
109 end
110
111 open LrTable
112 open Token
113
114 val DEBUG1 = false
115 val DEBUG2 = false
116 exception ParseError
117 exception ParseImpossible of int
118
119 structure Fifo :> FIFO =
120 struct
121 type 'a queue = ('a list * 'a list)
122 val empty = (nil,nil)
123 exception Empty
124 fun get(a::x, y) = (a, (x,y))
125 | get(nil, nil) = raise Empty
126 | get(nil, y) = get(rev y, nil)
127 fun put(a,(x,y)) = (x,a::y)
128 end
129
130 type ('a,'b) elem = (state * ('a * 'b * 'b))
131 type ('a,'b) stack = ('a,'b) elem list
132 type ('a,'b) lexv = ('a,'b) token
133 type ('a,'b) lexpair = ('a,'b) lexv * (('a,'b) lexv Stream.stream)
134 type ('a,'b) distanceParse =
135 ('a,'b) lexpair *
136 ('a,'b) stack *
137 (('a,'b) stack * ('a,'b) lexpair) Fifo.queue *
138 int ->
139 ('a,'b) lexpair *
140 ('a,'b) stack *
141 (('a,'b) stack * ('a,'b) lexpair) Fifo.queue *
142 int *
143 action option
144
145 type ('a,'b) ecRecord =
146 {is_keyword : term -> bool,
147 preferred_change : (term list * term list) list,
148 error : string * 'b * 'b -> unit,
149 errtermvalue : term -> 'a,
150 terms : term list,
151 showTerminal : term -> string,
152 noShift : term -> bool}
153
154 local
155 val print = fn s => TextIO.output(TextIO.stdOut,s)
156 val println = fn s => (print s; print "\n")
157 val showState = fn (STATE s) => "STATE " ^ (Int.toString s)
158 in
159 fun printStack(stack: ('a,'b) stack, n: int) =
160 case stack
161 of (state,_) :: rest =>
162 (print("\t" ^ Int.toString n ^ ": ");
163 println(showState state);
164 printStack(rest, n+1))
165 | nil => ()
166
167 fun prAction showTerminal
168 (stack as (state,_) :: _, next as (TOKEN (term,_),_), action) =
169 (println "Parse: state stack:";
170 printStack(stack, 0);
171 print(" state="
172 ^ showState state
173 ^ " next="
174 ^ showTerminal term
175 ^ " action="
176 );
177 case action
178 of SHIFT state => println ("SHIFT " ^ (showState state))
179 | REDUCE i => println ("REDUCE " ^ (Int.toString i))
180 | ERROR => println "ERROR"
181 | ACCEPT => println "ACCEPT")
182 | prAction _ (_,_,action) = ()
183 end
184
185 (* ssParse: parser which maintains the queue of (state * lexvalues) in a
186 steady-state. It takes a table, showTerminal function, saction
187 function, and fixError function. It parses until an ACCEPT is
188 encountered, or an exception is raised. When an error is encountered,
189 fixError is called with the arguments of parseStep (lexv,stack,and
190 queue). It returns the lexv, and a new stack and queue adjusted so
191 that the lexv can be parsed *)
192
193 val ssParse =
194 fn (table,showTerminal,saction,fixError,arg) =>
195 let val prAction = prAction showTerminal
196 val action = LrTable.action table
197 val goto = LrTable.goto table
198 fun parseStep(args as
199 (lexPair as (TOKEN (terminal, value as (_,leftPos,_)),
200 lexer
201 ),
202 stack as (state,_) :: _,
203 queue)) =
204 let val nextAction = action (state,terminal)
205 val _ = if DEBUG1 then prAction(stack,lexPair,nextAction)
206 else ()
207 in case nextAction
208 of SHIFT s =>
209 let val newStack = (s,value) :: stack
210 val newLexPair = Stream.get lexer
211 val (_,newQueue) =Fifo.get(Fifo.put((newStack,newLexPair),
212 queue))
213 in parseStep(newLexPair,(s,value)::stack,newQueue)
214 end
215 | REDUCE i =>
216 (case saction(i,leftPos,stack,arg)
217 of (nonterm,value,stack as (state,_) :: _) =>
218 parseStep(lexPair,(goto(state,nonterm),value)::stack,
219 queue)
220 | _ => raise (ParseImpossible 197))
221 | ERROR => parseStep(fixError args)
222 | ACCEPT =>
223 (case stack
224 of (_,(topvalue,_,_)) :: _ =>
225 let val (token,restLexer) = lexPair
226 in (topvalue,Stream.cons(token,restLexer))
227 end
228 | _ => raise (ParseImpossible 202))
229 end
230 | parseStep _ = raise (ParseImpossible 204)
231 in parseStep
232 end
233
234 (* distanceParse: parse until n tokens are shifted, or accept or
235 error are encountered. Takes a table, showTerminal function, and
236 semantic action function. Returns a parser which takes a lexPair
237 (lex result * lexer), a state stack, a queue, and a distance
238 (must be > 0) to parse. The parser returns a new lex-value, a stack
239 with the nth token shifted on top, a queue, a distance, and action
240 option. *)
241
242 val distanceParse =
243 fn (table,showTerminal,saction,arg) =>
244 let val prAction = prAction showTerminal
245 val action = LrTable.action table
246 val goto = LrTable.goto table
247 fun parseStep(lexPair,stack,queue,0) = (lexPair,stack,queue,0,NONE)
248 | parseStep(lexPair as (TOKEN (terminal, value as (_,leftPos,_)),
249 lexer
250 ),
251 stack as (state,_) :: _,
252 queue,distance) =
253 let val nextAction = action(state,terminal)
254 val _ = if DEBUG1 then prAction(stack,lexPair,nextAction)
255 else ()
256 in case nextAction
257 of SHIFT s =>
258 let val newStack = (s,value) :: stack
259 val newLexPair = Stream.get lexer
260 in parseStep(newLexPair,(s,value)::stack,
261 Fifo.put((newStack,newLexPair),queue),distance-1)
262 end
263 | REDUCE i =>
264 (case saction(i,leftPos,stack,arg)
265 of (nonterm,value,stack as (state,_) :: _) =>
266 parseStep(lexPair,(goto(state,nonterm),value)::stack,
267 queue,distance)
268 | _ => raise (ParseImpossible 240))
269 | ERROR => (lexPair,stack,queue,distance,SOME nextAction)
270 | ACCEPT => (lexPair,stack,queue,distance,SOME nextAction)
271 end
272 | parseStep _ = raise (ParseImpossible 242)
273 in parseStep : ('_a,'_b) distanceParse
274 end
275
276 (* mkFixError: function to create fixError function which adjusts parser state
277 so that parse may continue in the presence of an error *)
278
279 fun mkFixError({is_keyword,terms,errtermvalue,
280 preferred_change,noShift,
281 showTerminal,error,...} : ('_a,'_b) ecRecord,
282 distanceParse : ('_a,'_b) distanceParse,
283 minAdvance,maxAdvance)
284
285 (lexv as (TOKEN (term,value as (_,leftPos,rightPos)),_),stack,queue) =
286 let val _ = if DEBUG2 then
287 error("syntax error found at " ^ (showTerminal term),
288 leftPos,rightPos)
289 else ()
290
291 fun tokAt(t,p) = TOKEN(t,(errtermvalue t,p,p))
292
293 val minDelta = 3
294
295 (* pull all the state * lexv elements from the queue *)
296
297 val stateList =
298 let fun f q = let val (elem,newQueue) = Fifo.get q
299 in elem :: (f newQueue)
300 end handle Fifo.Empty => nil
301 in f queue
302 end
303
304 (* now number elements of stateList, giving distance from
305 error token *)
306
307 val (_, numStateList) =
308 List.foldr (fn (a,(num,r)) => (num+1,(a,num)::r)) (0, []) stateList
309
310 (* Represent the set of potential changes as a linked list.
311
312 Values of datatype Change hold information about a potential change.
313
314 oper = oper to be applied
315 pos = the # of the element in stateList that would be altered.
316 distance = the number of tokens beyond the error token which the
317 change allows us to parse.
318 new = new terminal * value pair at that point
319 orig = original terminal * value pair at the point being changed.
320 *)
321
322 datatype ('a,'b) change = CHANGE of
323 {pos : int, distance : int, leftPos: 'b, rightPos: 'b,
324 new : ('a,'b) lexv list, orig : ('a,'b) lexv list}
325
326
327 val showTerms = concat o map (fn TOKEN(t,_) => " " ^ showTerminal t)
328
329 val printChange = fn c =>
330 let val CHANGE {distance,new,orig,pos,...} = c
331 in (print ("{distance= " ^ (Int.toString distance));
332 print (",orig ="); print(showTerms orig);
333 print (",new ="); print(showTerms new);
334 print (",pos= " ^ (Int.toString pos));
335 print "}\n")
336 end
337
338 val printChangeList = app printChange
339
340 (* parse: given a lexPair, a stack, and the distance from the error
341 token, return the distance past the error token that we are able to parse.*)
342
343 fun parse (lexPair,stack,queuePos : int) =
344 case distanceParse(lexPair,stack,Fifo.empty,queuePos+maxAdvance+1)
345 of (_,_,_,distance,SOME ACCEPT) =>
346 if maxAdvance-distance-1 >= 0
347 then maxAdvance
348 else maxAdvance-distance-1
349 | (_,_,_,distance,_) => maxAdvance - distance - 1
350
351 (* catList: concatenate results of scanning list *)
352
353 fun catList l f = List.foldr (fn(a,r)=> f a @ r) [] l
354
355 fun keywordsDelta new = if List.exists (fn(TOKEN(t,_))=>is_keyword t) new
356 then minDelta else 0
357
358 fun tryChange{lex,stack,pos,leftPos,rightPos,orig,new} =
359 let val lex' = List.foldr (fn (t',p)=>(t',Stream.cons p)) lex new
360 val distance = parse(lex',stack,pos+length new-length orig)
361 in if distance >= minAdvance + keywordsDelta new
362 then [CHANGE{pos=pos,leftPos=leftPos,rightPos=rightPos,
363 distance=distance,orig=orig,new=new}]
364 else []
365 end
366
367
368 (* tryDelete: Try to delete n terminals.
369 Return single-element [success] or nil.
370 Do not delete unshiftable terminals. *)
371
372
373 fun tryDelete n ((stack,lexPair as (TOKEN(term,(_,l,r)),_)),qPos) =
374 let fun del(0,accum,left,right,lexPair) =
375 tryChange{lex=lexPair,stack=stack,
376 pos=qPos,leftPos=left,rightPos=right,
377 orig=rev accum, new=[]}
378 | del(n,accum,left,right,(tok as TOKEN(term,(_,_,r)),lexer)) =
379 if noShift term then []
380 else del(n-1,tok::accum,left,r,Stream.get lexer)
381 in del(n,[],l,r,lexPair)
382 end
383
384 (* tryInsert: try to insert tokens before the current terminal;
385 return a list of the successes *)
386
387 fun tryInsert((stack,lexPair as (TOKEN(_,(_,l,_)),_)),queuePos) =
388 catList terms (fn t =>
389 tryChange{lex=lexPair,stack=stack,
390 pos=queuePos,orig=[],new=[tokAt(t,l)],
391 leftPos=l,rightPos=l})
392
393 (* trySubst: try to substitute tokens for the current terminal;
394 return a list of the successes *)
395
396 fun trySubst ((stack,lexPair as (orig as TOKEN (term,(_,l,r)),lexer)),
397 queuePos) =
398 if noShift term then []
399 else
400 catList terms (fn t =>
401 tryChange{lex=Stream.get lexer,stack=stack,
402 pos=queuePos,
403 leftPos=l,rightPos=r,orig=[orig],
404 new=[tokAt(t,r)]})
405
406 (* do_delete(toks,lexPair) tries to delete tokens "toks" from "lexPair".
407 If it succeeds, returns SOME(toks',l,r,lp), where
408 toks' is the actual tokens (with positions and values) deleted,
409 (l,r) are the (leftmost,rightmost) position of toks',
410 lp is what remains of the stream after deletion
411 *)
412 fun do_delete(nil,lp as (TOKEN(_,(_,l,_)),_)) = SOME(nil,l,l,lp)
413 | do_delete([t],(tok as TOKEN(t',(_,l,r)),lp')) =
414 if eqT (t, t')
415 then SOME([tok],l,r,Stream.get lp')
416 else NONE
417 | do_delete(t::rest,(tok as TOKEN(t',(_,l,r)),lp')) =
418 if eqT (t,t')
419 then case do_delete(rest,Stream.get lp')
420 of SOME(deleted,l',r',lp'') =>
421 SOME(tok::deleted,l,r',lp'')
422 | NONE => NONE
423 else NONE
424
425 fun tryPreferred((stack,lexPair),queuePos) =
426 catList preferred_change (fn (delete,insert) =>
427 if List.exists noShift delete then [] (* should give warning at
428 parser-generation time *)
429 else case do_delete(delete,lexPair)
430 of SOME(deleted,l,r,lp) =>
431 tryChange{lex=lp,stack=stack,pos=queuePos,
432 leftPos=l,rightPos=r,orig=deleted,
433 new=map (fn t=>(tokAt(t,r))) insert}
434 | NONE => [])
435
436 val changes = catList numStateList tryPreferred @
437 catList numStateList tryInsert @
438 catList numStateList trySubst @
439 catList numStateList (tryDelete 1) @
440 catList numStateList (tryDelete 2) @
441 catList numStateList (tryDelete 3)
442
443 val findMaxDist = fn l =>
444 foldr (fn (CHANGE {distance,...},high) => Int.max(distance,high)) 0 l
445
446 (* maxDist: max distance past error taken that we could parse *)
447
448 val maxDist = findMaxDist changes
449
450 (* remove changes which did not parse maxDist tokens past the error token *)
451
452 val changes = catList changes
453 (fn(c as CHANGE{distance,...}) =>
454 if distance=maxDist then [c] else [])
455
456 in case changes
457 of (l as change :: _) =>
458 let fun print_msg (CHANGE {new,orig,leftPos,rightPos,...}) =
459 let val s =
460 case (orig,new)
461 of (_::_,[]) => "deleting " ^ (showTerms orig)
462 | ([],_::_) => "inserting " ^ (showTerms new)
463 | _ => "replacing " ^ (showTerms orig) ^
464 " with " ^ (showTerms new)
465 in error ("syntax error: " ^ s,leftPos,rightPos)
466 end
467
468 val _ =
469 (if length l > 1 andalso DEBUG2 then
470 (print "multiple fixes possible; could fix it by:\n";
471 app print_msg l;
472 print "chosen correction:\n")
473 else ();
474 print_msg change)
475
476 (* findNth: find nth queue entry from the error
477 entry. Returns the Nth queue entry and the portion of
478 the queue from the beginning to the nth-1 entry. The
479 error entry is at the end of the queue.
480
481 Examples:
482
483 queue = a b c d e
484 findNth 0 = (e,a b c d)
485 findNth 1 = (d,a b c)
486 *)
487
488 val findNth = fn n =>
489 let fun f (h::t,0) = (h,rev t)
490 | f (h::t,n) = f(t,n-1)
491 | f (nil,_) = let exception FindNth
492 in raise FindNth
493 end
494 in f (rev stateList,n)
495 end
496
497 val CHANGE {pos,orig,new,...} = change
498 val (last,queueFront) = findNth pos
499 val (stack,lexPair) = last
500
501 val lp1 = foldl(fn (_,(_,r)) => Stream.get r) lexPair orig
502 val lp2 = foldr(fn(t,r)=>(t,Stream.cons r)) lp1 new
503
504 val restQueue =
505 Fifo.put((stack,lp2),
506 foldl Fifo.put Fifo.empty queueFront)
507
508 val (lexPair,stack,queue,_,_) =
509 distanceParse(lp2,stack,restQueue,pos)
510
511 in (lexPair,stack,queue)
512 end
513 | nil => (error("syntax error found at " ^ (showTerminal term),
514 leftPos,rightPos); raise ParseError)
515 end
516
517 val parse = fn {arg,table,lexer,saction,void,lookahead,
518 ec=ec as {showTerminal,...} : ('_a,'_b) ecRecord} =>
519 let val distance = 15 (* defer distance tokens *)
520 val minAdvance = 1 (* must parse at least 1 token past error *)
521 val maxAdvance = Int.max(lookahead,0)(* max distance for parse check *)
522 val lexPair = Stream.get lexer
523 val (TOKEN (_,(_,leftPos,_)),_) = lexPair
524 val startStack = [(initialState table,(void,leftPos,leftPos))]
525 val startQueue = Fifo.put((startStack,lexPair),Fifo.empty)
526 val distanceParse = distanceParse(table,showTerminal,saction,arg)
527 val fixError = mkFixError(ec,distanceParse,minAdvance,maxAdvance)
528 val ssParse = ssParse(table,showTerminal,saction,fixError,arg)
529 fun loop (lexPair,stack,queue,_,SOME ACCEPT) =
530 ssParse(lexPair,stack,queue)
531 | loop (lexPair,stack,queue,0,_) = ssParse(lexPair,stack,queue)
532 | loop (lexPair,stack,queue,distance,SOME ERROR) =
533 let val (lexPair,stack,queue) = fixError(lexPair,stack,queue)
534 in loop (distanceParse(lexPair,stack,queue,distance))
535 end
536 | loop _ = let exception ParseInternal
537 in raise ParseInternal
538 end
539 in loop (distanceParse(lexPair,startStack,startQueue,distance))
540 end
541 end;