1 (**************************************************************************)
5 (* François Pottier, INRIA Rocquencourt *)
6 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
8 (* Copyright 2005-2008 Institut National de Recherche en Informatique *)
9 (* et en Automatique. All rights reserved. This file is distributed *)
10 (* under the terms of the Q Public License version 1.0, with the change *)
11 (* described in file LICENSE. *)
13 (**************************************************************************)
17 open UnparameterizedSyntax
21 (* Inference for non terminals. *)
23 (* Unification variables convey [variable_info] to describe
24 the multi-equation they take part of. *)
27 mutable structure
: nt_type
option;
28 mutable name
: string option;
32 (* [UnionFind] is used to improve the union and the equality test
33 between multi-equations. *)
34 and variable
= variable_info
UnionFind.point
36 (* Types are simple types.
37 [star] denotes the type of ground symbol (non terminal or terminal).
38 [Arrow] describes the type of a parameterized non terminal. *)
40 Arrow
of variable list
45 (* [var_name] is a name generator for unification variables. *)
47 let name_counter = ref (-1) in
50 String.make
1 (char_of_int
(97 + !name_counter mod 26))
51 ^
let d = !name_counter / 26 in if d = 0 then "" else string_of_int
d
54 let repr = UnionFind.find v
in
56 None
-> let name = next_name () in repr.name <- Some
name; name
59 (* [string_of_nt_type] is a simple pretty printer for types (they can be
62 (* 2011/04/05: types can no longer be recursive, but I won't touch the printer -fpottier *)
64 let string_of paren_fun ?paren ?colors t
: string =
67 None
-> (Mark.fresh
(), Mark.fresh
())
70 let s, p
= paren_fun
colors t
in
71 if paren
<> None
&& p
= true then
75 let rec paren_nt_type ((white
, black
) as colors) = function
81 let args = separated_list_to_string
82 (string_of paren_var ~paren
:true ~
colors) ", " ins
85 if List.length ins
> 1 then
92 and paren_var
(white
, black
) x
=
93 let descr = UnionFind.find x
in
94 if Mark.same
descr.mark white
then begin
100 let s, p
= match descr.structure
with
101 None
-> var_name x
, false
102 | Some t
-> paren_nt_type (white
, black
) t
104 if Mark.same
descr.mark black
then
105 (var_name x ^
" = " ^
s, true)
110 let string_of_nt_type ?paren ?
colors t
=
111 string_of ?
colors paren_nt_type t
113 let string_of_var ?paren ?
colors v
=
114 string_of ?
colors paren_var v
116 (* [print_env env] returns a string description of the typing environment. *)
118 List.iter
(fun (k
, (_
, v
)) ->
119 Printf.eprintf
"%s: %s\n" k
(string_of_var v
))
121 (* [occurs_check x y] checks that [x] does not occur within [y]. *)
125 let black = Mark.fresh
() in
127 let rec visit_var x
=
128 let descr = UnionFind.find x
in
129 if not
(Mark.same
descr.mark
black) then begin
132 match descr.structure
with
139 and visit_term
(Arrow ins
) =
140 List.iter
visit_var ins
145 exception OccursError
of variable
* variable
147 let occurs_check x y
=
148 dfs (fun z
-> if UnionFind.equivalent x z
then raise
(OccursError
(x
, y
))) y
150 (* First order unification. *)
152 (* 2011/04/05: perform an eager occurs check and prevent the construction
155 let fresh_flexible_variable () =
156 UnionFind.fresh
{ structure
= None
; name = None
; mark
= Mark.none
}
158 let fresh_structured_variable t
=
159 UnionFind.fresh
{ structure
= Some t
; name = None
; mark
= Mark.none
}
162 fresh_structured_variable star
164 exception UnificationError
of nt_type
* nt_type
165 exception BadArityError
of int * int
167 let rec unify_var toplevel x y
=
168 if not
(UnionFind.equivalent x y
) then
169 let reprx, repry
= UnionFind.find x
, UnionFind.find y
in
170 match reprx.structure
, repry
.structure
with
171 None
, Some t
-> occurs_check x y
; UnionFind.union x y
172 | Some t
, None
-> occurs_check y x
; UnionFind.union y x
173 | None
, None
-> UnionFind.union x y
174 | Some t
, Some t'
-> unify toplevel t t'
; UnionFind.union x y
176 and unify toplevel t1 t2
=
179 | Arrow ins
, Arrow ins'
->
180 let n1, n2
= List.length ins
, List.length ins'
in
182 if n1 = 0 || n2
= 0 || not toplevel
then
183 raise
(UnificationError
(t1
, t2
))
185 (* the flag [toplevel] is used only here and influences which
186 exception is raised; BadArityError is raised only at toplevel *)
187 raise
(BadArityError
(n1, n2
));
188 List.iter2
(unify_var false) ins ins'
193 (* Typing environment. *)
195 (string * (Positions.t list
* variable
)) list
197 (* [lookup x env] returns the type related to [x] in the typing environment
199 By convention, identifiers that are not in [env] are terminals. They are
200 given the type [Star]. *)
201 let lookup x
(env
: environment
) =
203 snd
(List.assoc x env
)
204 with Not_found
-> star_variable
206 (* This function checks that the symbol [k] has the type [expected_type]. *)
207 let check positions env k expected_type
=
208 let inference_var = lookup k env
in
209 let checking_var = fresh_structured_variable expected_type
in
211 unify_var inference_var checking_var
213 UnificationError
(t1
, t2
) ->
217 "How is this symbol parameterized?\n\
218 It is used at sorts %s and %s.\n\
219 The sort %s is not compatible with the sort %s."
220 (string_of_var inference_var) (string_of_var checking_var)
221 (string_of_nt_type t1
) (string_of_nt_type t2
))
223 | BadArityError
(n1, n2
) ->
227 "does this symbol expect %d or %d arguments?"
228 (min
n1 n2
) (max
n1 n2
))
230 | OccursError
(x
, y
) ->
234 "How is this symbol parameterized?\n\
235 It is used at sorts %s and %s.\n\
236 The sort %s cannot be unified with the sort %s."
237 (string_of_var inference_var) (string_of_var checking_var)
238 (string_of_var x
) (string_of_var y
))
242 (* An identifier can be used either in a total application or as a
243 higher-order non terminal (no partial application is allowed). *)
244 let rec parameter_type env
= function
248 | ParameterApp
(x
, args) ->
251 (* [x] is applied, it must be to the exact number
253 Arrow
(List.map
(parameter_type env
) args)
255 (* Check the well-formedness of the application. *)
256 check [x
.position
] env x
.value expected_type;
258 (* Similarly, if it was a total application the result is
259 [Star] otherwise it is the flexible variable. *)
262 let check_grammar p_grammar
=
263 (* [n] is the grammar size. *)
264 let n = StringMap.cardinal p_grammar
.p_rules
in
266 (* The successors of the non terminal [N] are its producers. It
267 induce a graph over the non terminals and its successor function
268 is implemented by [successors]. Non terminals are indexed using
271 let nt, conv
, iconv
= index_map p_grammar
.p_rules
in
272 let parameters, name, branches
, positions
=
273 (fun n -> (nt n).pr_parameters
), (fun n -> (nt n).pr_nt
),
274 (fun n -> (nt n).pr_branches
), (fun n -> (nt n).pr_positions
)
277 (* The successors function is implemented as an array using the
278 indexing previously created. *)
280 Array.init
n (fun node
->
281 (* We only are interested by parameterized non terminals. *)
282 if parameters node
<> [] then
283 List.fold_left
(fun succs
{ pr_producers
= symbols
} ->
284 List.fold_left
(fun succs
-> function (_
, p
) ->
285 let symbol, _
= Parameters.unapp p
in
287 let symbol_node = conv
symbol.value in
288 (* [symbol] is a parameterized non terminal, we add it
289 to the successors set. *)
290 if parameters symbol_node <> [] then
291 IntSet.add
symbol_node succs
295 (* [symbol] is a token, it is not interesting for type inference
299 ) IntSet.empty
(branches node
)
305 (* The successors function and the indexing induce the following graph
307 let module RulesGraph
=
317 let successors f node
=
318 IntSet.iter f
successors.(node
)
321 for i
= 0 to n - 1 do
327 let module ConnectedComponents
= Tarjan.Run
(RulesGraph
) in
329 - all the parameterized definitions of a particular component
330 have the same number of parameters.
331 - every parameterized non terminal definition always uses
332 parameterized definitions of the same component with its
335 Components are marked during the traversal:
337 n with n > 0 is the number of parameters of the clique.
340 let marked_components = Array.create
n unseen in
342 let flexible_arrow args =
343 let ty = Arrow
(List.map
(fun _
-> fresh_flexible_variable ()) args) in
344 fresh_structured_variable ty
347 (* [nt_type i] is the type of the i-th non terminal. *)
349 match parameters i
with
357 (* [actual_parameters_as_formal] is the well-formedness checker for
358 parameterized non terminal application. *)
359 let actual_parameters_as_formal actual_parameters formal_parameters
=
360 List.for_all2
(fun y
-> (function ParameterVar x
-> x
.value = y
362 formal_parameters actual_parameters
365 (* The environment is initialized. *)
366 let env : environment
= StringMap.fold
368 (k
, (r
.pr_positions
, nt_type (conv k
)))
373 (* We traverse the graph checking each parameterized non terminal
374 definition is well-formed. *)
377 let params = parameters i
379 and repr = ConnectedComponents.representative i
380 and positions
= positions i
383 (* The environment is augmented with the parameters whose types are
386 (fun k
-> (k
, (positions
, fresh_flexible_variable ()))) params
388 let env = env'
@ env in
390 (* The type of the parameterized non terminal is constrained to be
393 check positions
env iname
(Arrow
(List.map
(fun (_
, (_
, t
)) -> t
) env'
))
396 (* We check the number of parameters. *)
397 let check_parameters () =
398 let parameters_len = List.length
params in
399 (* The component is visited for the first time. *)
400 if marked_components.(repr) = unseen then
401 marked_components.(repr) <- parameters_len
402 else (* Otherwise, we check that the arity is homogeneous
404 if marked_components.(repr) <> parameters_len then
405 Error.error positions
407 "Mutually recursive definitions must have the same parameters.\n\
408 This is not the case for %s and %s."
412 (* In each production rule, the parameterized non terminal
413 of the same component must be instantiated with the same
415 let check_producers () =
417 (fun { pr_producers
= symbols
} -> List.iter
419 let symbol, actuals
= Parameters.unapp p
in
420 (* We take the use of each symbol into account. *)
421 check [ symbol.position
] env symbol.value
422 (if actuals
= [] then star else
423 Arrow
(List.map
(parameter_type env) actuals
));
424 (* If it is in the same component, check in addition that
425 the arguments are the formal arguments. *)
427 let idx = conv
symbol.value in
428 if ConnectedComponents.representative
idx = repr then
429 if not
(actual_parameters_as_formal actuals
params)
431 Error.error
[ symbol.position
]
433 "Mutually recursive definitions must have the same \
435 This is not the case for %s."
436 (let name1, name2
= (name idx), (name i
) in
437 if name1 <> name2
then name1 ^
" and "^ name2
440 symbols
) (branches i
)
447 let rec subst_parameter subst
= function
450 List.assoc x
.value subst
454 | ParameterApp
(x
, ps
) ->
456 match List.assoc x
.value subst
with
458 ParameterApp
(y
, List.map
(subst_parameter subst
) ps
)
461 (* Type-checking ensures that we cannot do partial
462 application. Consequently, if an higher-order non terminal
463 is an actual argument, it cannot be the result of a
464 partial application. *)
468 ParameterApp
(x
, List.map
(subst_parameter subst
) ps
))
470 let subst_parameters subst
=
471 List.map
(subst_parameter subst
)
473 let names_of_p_grammar p_grammar
=
474 StringMap.fold
(fun tok _ acu
-> StringSet.add tok acu
)
475 p_grammar
.p_tokens
StringSet.empty
476 $$
(StringMap.fold
(fun nt _ acu
-> StringSet.add
nt acu
)
479 let expand p_grammar
=
480 (* Check that it is safe to expand this parameterized grammar. *)
481 check_grammar p_grammar
;
483 (* Set up a mechanism that ensures that names are unique -- and, in
484 fact, ensures the stronger condition that normalized names are
488 ref (StringSet.empty
)
490 let ensure_fresh name =
491 let normalized_name = Misc.normalize
name in
492 if StringSet.mem
normalized_name !names then
494 (Printf.sprintf
"internal name clash over %s" normalized_name);
495 names := StringSet.add
normalized_name !names;
501 let module InstanceTable
=
502 Hashtbl.Make
(Parameters
)
505 InstanceTable.create
13
508 (* [mangle p] chooses a name for the new nonterminal symbol that corresponds
509 to the parameter [p]. *)
511 let rec mangle = function
513 | ParameterApp
(x
, []) ->
515 | ParameterApp
(x
, ps
) ->
517 (* We include parentheses and commas in the names that we
518 assign to expanded nonterminals, because that is more
519 readable and acceptable in many situations. We replace them
520 with underscores in situations where these characters are
523 Printf.sprintf
"%s(%s)"
525 (separated_list_to_string
mangle "," ps
)
528 let name_of symbol parameters =
529 let param = ParameterApp
(symbol, parameters) in
531 InstanceTable.find
rule_names param
533 let name = ensure_fresh (mangle param) in
534 InstanceTable.add
rule_names param name;
537 (* Given the substitution [subst] from parameters to non terminal, we
538 instantiate the parameterized branch. *)
539 let rec expand_branch subst pbranch
=
540 let new_producers = List.map
541 (function (ido
, p
) ->
542 let sym, actual_parameters
=
543 Parameters.unapp p
in
544 let sym, actual_parameters
=
546 match List.assoc
sym.value subst
with
548 x
, subst_parameters subst actual_parameters
550 | ParameterApp
(x
, ps
) ->
551 assert (actual_parameters
= []);
555 sym, subst_parameters subst actual_parameters
557 (* Instantiate the definition of the producer. *)
558 (expand_branches subst
sym actual_parameters
, Option.map
Positions.value ido
))
562 branch_position
= pbranch
.pr_branch_position
;
563 producers
= new_producers;
564 action
= pbranch
.pr_action
;
565 branch_shift_precedence
= pbranch
.pr_branch_shift_precedence
;
566 branch_reduce_precedence
= pbranch
.pr_branch_reduce_precedence
;
569 (* Instantiate the branches of sym for a particular set of actual
571 and expand_branches subst
sym actual_parameters
=
572 let nsym = name_of sym actual_parameters
in
574 if not
(Hashtbl.mem
expanded_rules nsym) then begin
575 let prule = StringMap.find
(Positions.value sym) p_grammar
.p_rules
in
577 (* Type checking ensures that parameterized non terminal
578 instantiations are well defined. *)
579 assert (List.length
prule.pr_parameters
580 = List.length actual_parameters
);
581 List.combine
prule.pr_parameters actual_parameters
@ subst in
582 Hashtbl.add
expanded_rules nsym
583 { branches
= []; positions
= []; inline_flag
= false };
584 let rules = List.map
(expand_branch subst) prule.pr_branches
in
585 Hashtbl.replace
expanded_rules nsym
588 positions
= prule.pr_positions
;
589 inline_flag
= prule.pr_inline_flag
;
593 (* If [sym] is a terminal, then it is not in [p_grammar.p_rules].
594 Expansion is not needed. *)
595 with Not_found
-> Positions.value sym
597 let rec types_from_list = function
598 | [] -> StringMap.empty
600 let accu = types_from_list q
in
601 let mangled = mangle nt in
602 if StringMap.mem
mangled accu then
603 Error.error
[Positions.position
(Parameters.with_pos
nt)]
605 "There are multiple %%type definitions for nonterminal %s."
607 StringMap.add
mangled (Positions.value ty) accu
610 let start_symbols = StringMap.domain
(p_grammar
.p_start_symbols
) in
612 preludes
= p_grammar
.p_preludes
;
613 postludes
= p_grammar
.p_postludes
;
614 parameters = p_grammar
.p_parameters
;
615 start_symbols = start_symbols;
616 types
= types_from_list p_grammar
.p_types
;
617 tokens
= p_grammar
.p_tokens
;
619 let closed_rules = StringMap.fold
620 (fun k
prule rules ->
621 (* If [k] is a start symbol then it cannot be parameterized. *)
622 if prule.pr_parameters
<> [] && StringSet.mem k
start_symbols then
624 (Printf.sprintf
"The start symbol `%s' cannot be parameterized."
627 (* Entry points are the closed non terminals. *)
628 if prule.pr_parameters
= [] then
630 branches
= List.map
(expand_branch []) prule.pr_branches
;
631 positions
= prule.pr_positions
;
632 inline_flag
= prule.pr_inline_flag
;
638 Hashtbl.fold
StringMap.add
expanded_rules closed_rules