1 (* ML
-Yacc Parser
Generator (c
) 1989 Andrew W
. Appel
, David R
. Tarditi
*)
3 (* parser
.sml
: This is a parser driver for LR tables
with an error
-recovery
4 routine added to it
. The routine used is described
in detail
in this
7 'A Practical Method for LR
and LL Syntactic Error Diagnosis
and
8 Recovery
', by M
. Burke
and G
. Fisher
, ACM Transactions on
9 Programming Langauges
and Systems
, Vol
. 9, No
. 2, April
1987,
12 This program is an implementation is the partial
, deferred method discussed
13 in the article
. The algorithm
and data structures used
in the program
16 This program assumes that all semantic actions are delayed
. A semantic
17 action should produce a function from unit
-> value instead
of producing the
18 normal value
. The parser returns the semantic value on the top
of the
19 stack when accept is encountered
. The user can deconstruct this value
20 and apply the unit
-> value function
in it to get the answer
.
22 It also assumes that the lexer is a lazy stream
.
29 The state stack has the
type
31 (state
* (semantic value
* line #
* line #
)) list
33 The parser keeps a queue
of (state stack
* lexer pair
). A lexer pair
34 consists
of a terminal
* value pair
and a lexer
. This allows the
35 parser to reconstruct the states for terminals to the left
of a
36 syntax error
, and attempt to make error corrections there
.
38 The queue consists
of a pair
of lists (x
,y
). New additions to
39 the queue are cons
'ed onto y
. The first element
of x is the top
40 of the queue
. If x is nil
, then y is reversed
and used
46 * The steady
-state parser
:
48 This parser keeps the length
of the queue
of state stacks at
49 a steady state by always removing an element from the front when
50 another element is placed on the
end.
52 It has these arguments
:
55 queue
: value
of the queue
56 lexPair ((terminal
,value
),lex stream
)
58 When SHIFT is encountered
, the state to shift to
and the value are
59 are pushed onto the state stack
. The state stack
and lexPair are
60 placed on the queue
. The front element
of the queue is removed
.
62 When REDUCTION is encountered
, the rule is applied to the current
63 stack to yield a
triple (nonterm
,value
,new stack
). A new
64 stack is formed by
adding (goto(top state
of stack
,nonterm
),value
)
67 When ACCEPT is encountered
, the top value from the stack
and the
70 When an ERROR is encountered
, fixError is called
. FixError
71 takes the arguments to the parser
, fixes the error
if possible
and
72 returns a new set
of arguments
.
74 * The distance
-parser
:
76 This parser includes an additional argument distance
. It pushes
77 elements on the queue until it has parsed distance tokens
, or an
78 ACCEPT or ERROR occurs
. It returns a stack
, lexer
, the number
of
79 tokens left unparsed
, a queue
, and an action option
.
86 val get
: 'a queue
-> 'a
* 'a queue
87 val put
: 'a
* 'a queue
-> 'a queue
90 (* drt (12/15/89) -- the
functor should be used
in development work
, but
91 it wastes space
in the release version
.
93 functor ParserGen(structure LrTable
: LR_TABLE
94 structure Stream
: STREAM
) : LR_PARSER
=
97 structure LrParser
:> LR_PARSER
=
99 structure LrTable
= LrTable
100 structure Stream
= Stream
102 fun eqT (LrTable
.T i
, LrTable
.T i
') = i
= i
'
104 structure Token
: TOKEN
=
106 structure LrTable
= LrTable
107 datatype ('a
,'b
) token
= TOKEN
of LrTable
.term
* ('a
* 'b
* 'b
)
108 val sameToken
= fn (TOKEN(t
,_
),TOKEN(t
',_
)) => eqT (t
,t
')
117 exception ParseImpossible
of int
119 structure Fifo
:> FIFO
=
121 type 'a queue
= ('a list
* 'a list
)
122 val empty
= (nil
,nil
)
124 fun get(a
::x
, y
) = (a
, (x
,y
))
125 |
get(nil
, nil
) = raise Empty
126 |
get(nil
, y
) = get(rev y
, nil
)
127 fun put(a
,(x
,y
)) = (x
,a
::y
)
130 type ('a
,'b
) elem
= (state
* ('a
* 'b
* 'b
))
131 type ('a
,'b
) stack
= ('a
,'b
) elem list
132 type ('a
,'b
) lexv
= ('a
,'b
) token
133 type ('a
,'b
) lexpair
= ('a
,'b
) lexv
* (('a
,'b
) lexv Stream
.stream
)
134 type ('a
,'b
) distanceParse
=
137 (('a
,'b
) stack
* ('a
,'b
) lexpair
) Fifo
.queue
*
141 (('a
,'b
) stack
* ('a
,'b
) lexpair
) Fifo
.queue
*
145 type ('a
,'b
) ecRecord
=
146 {is_keyword
: term
-> bool,
147 preferred_change
: (term list
* term list
) list
,
148 error
: string * 'b
* 'b
-> unit
,
149 errtermvalue
: term
-> 'a
,
151 showTerminal
: term
-> string,
152 noShift
: term
-> bool}
155 val print
= fn s
=> TextIO.output(TextIO.stdOut
,s
)
156 val println
= fn s
=> (print s
; print
"\n")
157 val showState
= fn (STATE s
) => "STATE " ^
(Int.toString s
)
159 fun printStack(stack
: ('a
,'b
) stack
, n
: int) =
161 of (state
,_
) :: rest
=>
162 (print("\t" ^
Int.toString n ^
": ");
163 println(showState state
);
164 printStack(rest
, n
+1))
167 fun prAction showTerminal
168 (stack
as (state
,_
) :: _
, next
as (TOKEN (term
,_
),_
), action
) =
169 (println
"Parse: state stack:";
170 printStack(stack
, 0);
178 of SHIFT state
=> println ("SHIFT " ^
(showState state
))
179 | REDUCE i
=> println ("REDUCE " ^
(Int.toString i
))
180 | ERROR
=> println
"ERROR"
181 | ACCEPT
=> println
"ACCEPT")
182 | prAction
_ (_
,_
,action
) = ()
185 (* ssParse
: parser which maintains the queue
of (state
* lexvalues
) in a
186 steady
-state
. It takes a table
, showTerminal function
, saction
187 function
, and fixError function
. It parses until an ACCEPT is
188 encountered
, or an
exception is raised
. When an error is encountered
,
189 fixError is called
with the arguments
of parseStep (lexv
,stack
,and
190 queue
). It returns the lexv
, and a new stack
and queue adjusted so
191 that the lexv can be parsed
*)
194 fn (table
,showTerminal
,saction
,fixError
,arg
) =>
195 let val prAction
= prAction showTerminal
196 val action
= LrTable
.action table
197 val goto
= LrTable
.goto table
198 fun parseStep(args
as
199 (lexPair
as (TOKEN (terminal
, value
as (_
,leftPos
,_
)),
202 stack
as (state
,_
) :: _
,
204 let val nextAction
= action (state
,terminal
)
205 val _
= if DEBUG1
then prAction(stack
,lexPair
,nextAction
)
209 let val newStack
= (s
,value
) :: stack
210 val newLexPair
= Stream
.get lexer
211 val (_
,newQueue
) =Fifo
.get(Fifo
.put((newStack
,newLexPair
),
213 in parseStep(newLexPair
,(s
,value
)::stack
,newQueue
)
216 (case saction(i
,leftPos
,stack
,arg
)
217 of (nonterm
,value
,stack
as (state
,_
) :: _
) =>
218 parseStep(lexPair
,(goto(state
,nonterm
),value
)::stack
,
220 | _
=> raise (ParseImpossible
197))
221 | ERROR
=> parseStep(fixError args
)
224 of (_
,(topvalue
,_
,_
)) :: _
=>
225 let val (token
,restLexer
) = lexPair
226 in (topvalue
,Stream
.cons(token
,restLexer
))
228 | _
=> raise (ParseImpossible
202))
230 | parseStep _
= raise (ParseImpossible
204)
234 (* distanceParse
: parse until n tokens are shifted
, or accept or
235 error are encountered
. Takes a table
, showTerminal function
, and
236 semantic action function
. Returns a parser which takes a lexPair
237 (lex result
* lexer
), a state stack
, a queue
, and a distance
238 (must be
> 0) to parse
. The parser returns a new lex
-value
, a stack
239 with the nth token shifted on top
, a queue
, a distance
, and action
243 fn (table
,showTerminal
,saction
,arg
) =>
244 let val prAction
= prAction showTerminal
245 val action
= LrTable
.action table
246 val goto
= LrTable
.goto table
247 fun parseStep(lexPair
,stack
,queue
,0) = (lexPair
,stack
,queue
,0,NONE
)
248 |
parseStep(lexPair
as (TOKEN (terminal
, value
as (_
,leftPos
,_
)),
251 stack
as (state
,_
) :: _
,
253 let val nextAction
= action(state
,terminal
)
254 val _
= if DEBUG1
then prAction(stack
,lexPair
,nextAction
)
258 let val newStack
= (s
,value
) :: stack
259 val newLexPair
= Stream
.get lexer
260 in parseStep(newLexPair
,(s
,value
)::stack
,
261 Fifo
.put((newStack
,newLexPair
),queue
),distance
-1)
264 (case saction(i
,leftPos
,stack
,arg
)
265 of (nonterm
,value
,stack
as (state
,_
) :: _
) =>
266 parseStep(lexPair
,(goto(state
,nonterm
),value
)::stack
,
268 | _
=> raise (ParseImpossible
240))
269 | ERROR
=> (lexPair
,stack
,queue
,distance
,SOME nextAction
)
270 | ACCEPT
=> (lexPair
,stack
,queue
,distance
,SOME nextAction
)
272 | parseStep _
= raise (ParseImpossible
242)
273 in parseStep
: ('_a
,'_b
) distanceParse
276 (* mkFixError
: function to create fixError function which adjusts parser state
277 so that parse may continue
in the presence
of an error
*)
279 fun mkFixError({is_keyword
,terms
,errtermvalue
,
280 preferred_change
,noShift
,
281 showTerminal
,error
,...} : ('_a
,'_b
) ecRecord
,
282 distanceParse
: ('_a
,'_b
) distanceParse
,
283 minAdvance
,maxAdvance
)
285 (lexv
as (TOKEN (term
,value
as (_
,leftPos
,rightPos
)),_
),stack
,queue
) =
286 let val _
= if DEBUG2
then
287 error("syntax error found at " ^
(showTerminal term
),
291 fun tokAt(t
,p
) = TOKEN(t
,(errtermvalue t
,p
,p
))
295 (* pull all the state
* lexv elements from the queue
*)
298 let fun f q
= let val (elem
,newQueue
) = Fifo
.get q
299 in elem
:: (f newQueue
)
300 end handle Fifo
.Empty
=> nil
304 (* now number elements
of stateList
, giving distance from
307 val (_
, numStateList
) =
308 List.foldr (fn (a
,(num
,r
)) => (num
+1,(a
,num
)::r
)) (0, []) stateList
310 (* Represent the set
of potential changes
as a linked list
.
312 Values
of datatype Change hold information about a potential change
.
314 oper
= oper to be applied
315 pos
= the #
of the element
in stateList that would be altered
.
316 distance
= the number
of tokens beyond the error token which the
317 change allows us to parse
.
318 new
= new terminal
* value pair at that point
319 orig
= original terminal
* value pair at the point being changed
.
322 datatype ('a
,'b
) change
= CHANGE
of
323 {pos
: int, distance
: int, leftPos
: 'b
, rightPos
: 'b
,
324 new
: ('a
,'b
) lexv list
, orig
: ('a
,'b
) lexv list
}
327 val showTerms
= concat
o map (fn TOKEN(t
,_
) => " " ^ showTerminal t
)
329 val printChange
= fn c
=>
330 let val CHANGE
{distance
,new
,orig
,pos
,...} = c
331 in (print ("{distance= " ^
(Int.toString distance
));
332 print (",orig ="); print(showTerms orig
);
333 print (",new ="); print(showTerms new
);
334 print (",pos= " ^
(Int.toString pos
));
338 val printChangeList
= app printChange
340 (* parse
: given a lexPair
, a stack
, and the distance from the error
341 token
, return the distance past the error token that we are able to parse
.*)
343 fun parse (lexPair
,stack
,queuePos
: int) =
344 case distanceParse(lexPair
,stack
,Fifo
.empty
,queuePos
+maxAdvance
+1)
345 of (_
,_
,_
,distance
,SOME ACCEPT
) =>
346 if maxAdvance
-distance
-1 >= 0
348 else maxAdvance
-distance
-1
349 |
(_
,_
,_
,distance
,_
) => maxAdvance
- distance
- 1
351 (* catList
: concatenate results
of scanning list
*)
353 fun catList l f
= List.foldr (fn(a
,r
)=> f a @ r
) [] l
355 fun keywordsDelta new
= if List.exists (fn(TOKEN(t
,_
))=>is_keyword t
) new
358 fun tryChange
{lex
,stack
,pos
,leftPos
,rightPos
,orig
,new
} =
359 let val lex
' = List.foldr (fn (t
',p
)=>(t
',Stream
.cons p
)) lex new
360 val distance
= parse(lex
',stack
,pos
+length new
-length orig
)
361 in if distance
>= minAdvance
+ keywordsDelta new
362 then [CHANGE
{pos
=pos
,leftPos
=leftPos
,rightPos
=rightPos
,
363 distance
=distance
,orig
=orig
,new
=new
}]
368 (* tryDelete
: Try to delete n terminals
.
369 Return single
-element
[success
] or nil
.
370 Do not delete unshiftable terminals
. *)
373 fun tryDelete
n ((stack
,lexPair
as (TOKEN(term
,(_
,l
,r
)),_
)),qPos
) =
374 let fun del(0,accum
,left
,right
,lexPair
) =
375 tryChange
{lex
=lexPair
,stack
=stack
,
376 pos
=qPos
,leftPos
=left
,rightPos
=right
,
377 orig
=rev accum
, new
=[]}
378 |
del(n
,accum
,left
,right
,(tok
as TOKEN(term
,(_
,_
,r
)),lexer
)) =
379 if noShift term
then []
380 else del(n
-1,tok
::accum
,left
,r
,Stream
.get lexer
)
381 in del(n
,[],l
,r
,lexPair
)
384 (* tryInsert
: try to insert tokens
before the current terminal
;
385 return a list
of the successes
*)
387 fun tryInsert((stack
,lexPair
as (TOKEN(_
,(_
,l
,_
)),_
)),queuePos
) =
388 catList
terms (fn t
=>
389 tryChange
{lex
=lexPair
,stack
=stack
,
390 pos
=queuePos
,orig
=[],new
=[tokAt(t
,l
)],
391 leftPos
=l
,rightPos
=l
})
393 (* trySubst
: try to substitute tokens for the current terminal
;
394 return a list
of the successes
*)
396 fun trySubst ((stack
,lexPair
as (orig
as TOKEN (term
,(_
,l
,r
)),lexer
)),
398 if noShift term
then []
400 catList
terms (fn t
=>
401 tryChange
{lex
=Stream
.get lexer
,stack
=stack
,
403 leftPos
=l
,rightPos
=r
,orig
=[orig
],
406 (* do_delete(toks
,lexPair
) tries to delete tokens
"toks" from
"lexPair".
407 If it succeeds
, returns
SOME(toks
',l
,r
,lp
), where
408 toks
' is the actual
tokens (with positions
and values
) deleted
,
409 (l
,r
) are
the (leftmost
,rightmost
) position
of toks
',
410 lp is what remains
of the stream after deletion
412 fun do_delete(nil
,lp
as (TOKEN(_
,(_
,l
,_
)),_
)) = SOME(nil
,l
,l
,lp
)
413 |
do_delete([t
],(tok
as TOKEN(t
',(_
,l
,r
)),lp
')) =
415 then SOME([tok
],l
,r
,Stream
.get lp
')
417 |
do_delete(t
::rest
,(tok
as TOKEN(t
',(_
,l
,r
)),lp
')) =
419 then case do_delete(rest
,Stream
.get lp
')
420 of SOME(deleted
,l
',r
',lp
'') =>
421 SOME(tok
::deleted
,l
,r
',lp
'')
425 fun tryPreferred((stack
,lexPair
),queuePos
) =
426 catList
preferred_change (fn (delete
,insert
) =>
427 if List.exists noShift delete
then [] (* should give warning at
428 parser
-generation time
*)
429 else case do_delete(delete
,lexPair
)
430 of SOME(deleted
,l
,r
,lp
) =>
431 tryChange
{lex
=lp
,stack
=stack
,pos
=queuePos
,
432 leftPos
=l
,rightPos
=r
,orig
=deleted
,
433 new
=map (fn t
=>(tokAt(t
,r
))) insert
}
436 val changes
= catList numStateList tryPreferred @
437 catList numStateList tryInsert @
438 catList numStateList trySubst @
439 catList
numStateList (tryDelete
1) @
440 catList
numStateList (tryDelete
2) @
441 catList
numStateList (tryDelete
3)
443 val findMaxDist
= fn l
=>
444 foldr (fn (CHANGE
{distance
,...},high
) => Int.max(distance
,high
)) 0 l
446 (* maxDist
: max distance past error taken that we could parse
*)
448 val maxDist
= findMaxDist changes
450 (* remove changes which did not parse maxDist tokens past the error token
*)
452 val changes
= catList changes
453 (fn(c
as CHANGE
{distance
,...}) =>
454 if distance
=maxDist
then [c
] else [])
457 of (l
as change
:: _
) =>
458 let fun print_msg (CHANGE
{new
,orig
,leftPos
,rightPos
,...}) =
461 of (_
::_
,[]) => "deleting " ^
(showTerms orig
)
462 |
([],_
::_
) => "inserting " ^
(showTerms new
)
463 | _
=> "replacing " ^
(showTerms orig
) ^
464 " with " ^
(showTerms new
)
465 in error ("syntax error: " ^ s
,leftPos
,rightPos
)
469 (if length l
> 1 andalso DEBUG2
then
470 (print
"multiple fixes possible; could fix it by:\n";
472 print
"chosen correction:\n")
476 (* findNth
: find nth queue entry from the error
477 entry
. Returns the Nth queue entry
and the portion
of
478 the queue from the beginning to the nth
-1 entry
. The
479 error entry is at the
end of the queue
.
484 findNth
0 = (e
,a b c d
)
485 findNth
1 = (d
,a b c
)
488 val findNth
= fn n
=>
489 let fun f (h
::t
,0) = (h
,rev t
)
490 |
f (h
::t
,n
) = f(t
,n
-1)
491 |
f (nil
,_
) = let exception FindNth
494 in f (rev stateList
,n
)
497 val CHANGE
{pos
,orig
,new
,...} = change
498 val (last
,queueFront
) = findNth pos
499 val (stack
,lexPair
) = last
501 val lp1
= foldl(fn (_
,(_
,r
)) => Stream
.get r
) lexPair orig
502 val lp2
= foldr(fn(t
,r
)=>(t
,Stream
.cons r
)) lp1 new
505 Fifo
.put((stack
,lp2
),
506 foldl Fifo
.put Fifo
.empty queueFront
)
508 val (lexPair
,stack
,queue
,_
,_
) =
509 distanceParse(lp2
,stack
,restQueue
,pos
)
511 in (lexPair
,stack
,queue
)
513 | nil
=> (error("syntax error found at " ^
(showTerminal term
),
514 leftPos
,rightPos
); raise ParseError
)
517 val parse
= fn {arg
,table
,lexer
,saction
,void
,lookahead
,
518 ec
=ec
as {showTerminal
,...} : ('_a
,'_b
) ecRecord
} =>
519 let val distance
= 15 (* defer distance tokens
*)
520 val minAdvance
= 1 (* must parse at least
1 token past error
*)
521 val maxAdvance
= Int.max(lookahead
,0)(* max distance for parse check
*)
522 val lexPair
= Stream
.get lexer
523 val (TOKEN (_
,(_
,leftPos
,_
)),_
) = lexPair
524 val startStack
= [(initialState table
,(void
,leftPos
,leftPos
))]
525 val startQueue
= Fifo
.put((startStack
,lexPair
),Fifo
.empty
)
526 val distanceParse
= distanceParse(table
,showTerminal
,saction
,arg
)
527 val fixError
= mkFixError(ec
,distanceParse
,minAdvance
,maxAdvance
)
528 val ssParse
= ssParse(table
,showTerminal
,saction
,fixError
,arg
)
529 fun loop (lexPair
,stack
,queue
,_
,SOME ACCEPT
) =
530 ssParse(lexPair
,stack
,queue
)
531 |
loop (lexPair
,stack
,queue
,0,_
) = ssParse(lexPair
,stack
,queue
)
532 |
loop (lexPair
,stack
,queue
,distance
,SOME ERROR
) =
533 let val (lexPair
,stack
,queue
) = fixError(lexPair
,stack
,queue
)
534 in loop (distanceParse(lexPair
,stack
,queue
,distance
))
536 | loop _
= let exception ParseInternal
537 in raise ParseInternal
539 in loop (distanceParse(lexPair
,startStack
,startQueue
,distance
))