02be1b5d5edf15e81ab0e0acd547ad469ff41f5e
[bpt/coccinelle.git] / bundles / menhirLib / menhir-20120123 / src / parameterizedGrammar.ml
1 (**************************************************************************)
2 (* *)
3 (* Menhir *)
4 (* *)
5 (* François Pottier, INRIA Rocquencourt *)
6 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
7 (* *)
8 (* Copyright 2005-2008 Institut National de Recherche en Informatique *)
9 (* et en Automatique. All rights reserved. This file is distributed *)
10 (* under the terms of the Q Public License version 1.0, with the change *)
11 (* described in file LICENSE. *)
12 (* *)
13 (**************************************************************************)
14
15 open Positions
16 open Syntax
17 open UnparameterizedSyntax
18 open InternalSyntax
19 open Misc
20
21 (* Inference for non terminals. *)
22
23 (* Unification variables convey [variable_info] to describe
24 the multi-equation they take part of. *)
25 type variable_info =
26 {
27 mutable structure : nt_type option;
28 mutable name : string option;
29 mutable mark : Mark.t
30 }
31
32 (* [UnionFind] is used to improve the union and the equality test
33 between multi-equations. *)
34 and variable = variable_info UnionFind.point
35
36 (* Types are simple types.
37 [star] denotes the type of ground symbol (non terminal or terminal).
38 [Arrow] describes the type of a parameterized non terminal. *)
39 and nt_type =
40 Arrow of variable list
41
42 let star =
43 Arrow []
44
45 (* [var_name] is a name generator for unification variables. *)
46 let var_name =
47 let name_counter = ref (-1) in
48 let next_name () =
49 incr name_counter;
50 String.make 1 (char_of_int (97 + !name_counter mod 26))
51 ^ let d = !name_counter / 26 in if d = 0 then "" else string_of_int d
52 in
53 fun v ->
54 let repr = UnionFind.find v in
55 match repr.name with
56 None -> let name = next_name () in repr.name <- Some name; name
57 | Some x -> x
58
59 (* [string_of_nt_type] is a simple pretty printer for types (they can be
60 recursive). *)
61
62 (* 2011/04/05: types can no longer be recursive, but I won't touch the printer -fpottier *)
63
64 let string_of paren_fun ?paren ?colors t : string =
65 let colors =
66 match colors with
67 None -> (Mark.fresh (), Mark.fresh ())
68 | Some cs -> cs
69 in
70 let s, p = paren_fun colors t in
71 if paren <> None && p = true then
72 "("^ s ^")"
73 else s
74
75 let rec paren_nt_type ((white, black) as colors) = function
76
77 Arrow [] ->
78 "*", false
79
80 | Arrow ins ->
81 let args = separated_list_to_string
82 (string_of paren_var ~paren:true ~colors) ", " ins
83 in
84 let args =
85 if List.length ins > 1 then
86 "("^ args ^ ")"
87 else
88 args
89 in
90 args^" -> *", true
91
92 and paren_var (white, black) x =
93 let descr = UnionFind.find x in
94 if Mark.same descr.mark white then begin
95 descr.mark <- black;
96 var_name x, false
97 end
98 else begin
99 descr.mark <- white;
100 let s, p = match descr.structure with
101 None -> var_name x, false
102 | Some t -> paren_nt_type (white, black) t
103 in
104 if Mark.same descr.mark black then
105 (var_name x ^ " = " ^ s, true)
106 else
107 (s, p)
108 end
109
110 let string_of_nt_type ?paren ?colors t =
111 string_of ?colors paren_nt_type t
112
113 let string_of_var ?paren ?colors v =
114 string_of ?colors paren_var v
115
116 (* [print_env env] returns a string description of the typing environment. *)
117 let print_env =
118 List.iter (fun (k, (_, v)) ->
119 Printf.eprintf "%s: %s\n" k (string_of_var v))
120
121 (* [occurs_check x y] checks that [x] does not occur within [y]. *)
122
123 let dfs action x =
124
125 let black = Mark.fresh () in
126
127 let rec visit_var x =
128 let descr = UnionFind.find x in
129 if not (Mark.same descr.mark black) then begin
130 descr.mark <- black;
131 action x;
132 match descr.structure with
133 | None ->
134 ()
135 | Some t ->
136 visit_term t
137 end
138
139 and visit_term (Arrow ins) =
140 List.iter visit_var ins
141
142 in
143 visit_var x
144
145 exception OccursError of variable * variable
146
147 let occurs_check x y =
148 dfs (fun z -> if UnionFind.equivalent x z then raise (OccursError (x, y))) y
149
150 (* First order unification. *)
151
152 (* 2011/04/05: perform an eager occurs check and prevent the construction
153 of any cycles. *)
154
155 let fresh_flexible_variable () =
156 UnionFind.fresh { structure = None; name = None; mark = Mark.none }
157
158 let fresh_structured_variable t =
159 UnionFind.fresh { structure = Some t; name = None; mark = Mark.none }
160
161 let star_variable =
162 fresh_structured_variable star
163
164 exception UnificationError of nt_type * nt_type
165 exception BadArityError of int * int
166
167 let rec unify_var toplevel x y =
168 if not (UnionFind.equivalent x y) then
169 let reprx, repry = UnionFind.find x, UnionFind.find y in
170 match reprx.structure, repry.structure with
171 None, Some t -> occurs_check x y; UnionFind.union x y
172 | Some t, None -> occurs_check y x; UnionFind.union y x
173 | None, None -> UnionFind.union x y
174 | Some t, Some t' -> unify toplevel t t'; UnionFind.union x y
175
176 and unify toplevel t1 t2 =
177 match t1, t2 with
178
179 | Arrow ins, Arrow ins' ->
180 let n1, n2 = List.length ins, List.length ins' in
181 if n1 <> n2 then
182 if n1 = 0 || n2 = 0 || not toplevel then
183 raise (UnificationError (t1, t2))
184 else
185 (* the flag [toplevel] is used only here and influences which
186 exception is raised; BadArityError is raised only at toplevel *)
187 raise (BadArityError (n1, n2));
188 List.iter2 (unify_var false) ins ins'
189
190 let unify_var x y =
191 unify_var true x y
192
193 (* Typing environment. *)
194 type environment =
195 (string * (Positions.t list * variable)) list
196
197 (* [lookup x env] returns the type related to [x] in the typing environment
198 [env].
199 By convention, identifiers that are not in [env] are terminals. They are
200 given the type [Star]. *)
201 let lookup x (env: environment) =
202 try
203 snd (List.assoc x env)
204 with Not_found -> star_variable
205
206 (* This function checks that the symbol [k] has the type [expected_type]. *)
207 let check positions env k expected_type =
208 let inference_var = lookup k env in
209 let checking_var = fresh_structured_variable expected_type in
210 try
211 unify_var inference_var checking_var
212 with
213 UnificationError (t1, t2) ->
214 Error.error
215 positions
216 (Printf.sprintf
217 "How is this symbol parameterized?\n\
218 It is used at sorts %s and %s.\n\
219 The sort %s is not compatible with the sort %s."
220 (string_of_var inference_var) (string_of_var checking_var)
221 (string_of_nt_type t1) (string_of_nt_type t2))
222
223 | BadArityError (n1, n2) ->
224 Error.error
225 positions
226 (Printf.sprintf
227 "does this symbol expect %d or %d arguments?"
228 (min n1 n2) (max n1 n2))
229
230 | OccursError (x, y) ->
231 Error.error
232 positions
233 (Printf.sprintf
234 "How is this symbol parameterized?\n\
235 It is used at sorts %s and %s.\n\
236 The sort %s cannot be unified with the sort %s."
237 (string_of_var inference_var) (string_of_var checking_var)
238 (string_of_var x) (string_of_var y))
239
240
241
242 (* An identifier can be used either in a total application or as a
243 higher-order non terminal (no partial application is allowed). *)
244 let rec parameter_type env = function
245 | ParameterVar x ->
246 lookup x.value env
247
248 | ParameterApp (x, args) ->
249 assert (args <> []);
250 let expected_type =
251 (* [x] is applied, it must be to the exact number
252 of arguments. *)
253 Arrow (List.map (parameter_type env) args)
254 in
255 (* Check the well-formedness of the application. *)
256 check [x.position] env x.value expected_type;
257
258 (* Similarly, if it was a total application the result is
259 [Star] otherwise it is the flexible variable. *)
260 star_variable
261
262 let check_grammar p_grammar =
263 (* [n] is the grammar size. *)
264 let n = StringMap.cardinal p_grammar.p_rules in
265
266 (* The successors of the non terminal [N] are its producers. It
267 induce a graph over the non terminals and its successor function
268 is implemented by [successors]. Non terminals are indexed using
269 [nt].
270 *)
271 let nt, conv, iconv = index_map p_grammar.p_rules in
272 let parameters, name, branches, positions =
273 (fun n -> (nt n).pr_parameters), (fun n -> (nt n).pr_nt),
274 (fun n -> (nt n).pr_branches), (fun n -> (nt n).pr_positions)
275 in
276
277 (* The successors function is implemented as an array using the
278 indexing previously created. *)
279 let successors =
280 Array.init n (fun node ->
281 (* We only are interested by parameterized non terminals. *)
282 if parameters node <> [] then
283 List.fold_left (fun succs { pr_producers = symbols } ->
284 List.fold_left (fun succs -> function (_, p) ->
285 let symbol, _ = Parameters.unapp p in
286 try
287 let symbol_node = conv symbol.value in
288 (* [symbol] is a parameterized non terminal, we add it
289 to the successors set. *)
290 if parameters symbol_node <> [] then
291 IntSet.add symbol_node succs
292 else
293 succs
294 with Not_found ->
295 (* [symbol] is a token, it is not interesting for type inference
296 purpose. *)
297 succs
298 ) succs symbols
299 ) IntSet.empty (branches node)
300 else
301 Misc.IntSet.empty
302 )
303 in
304
305 (* The successors function and the indexing induce the following graph
306 module. *)
307 let module RulesGraph =
308 struct
309
310 type node = int
311
312 let n = n
313
314 let index node =
315 node
316
317 let successors f node =
318 IntSet.iter f successors.(node)
319
320 let iter f =
321 for i = 0 to n - 1 do
322 f i
323 done
324
325 end
326 in
327 let module ConnectedComponents = Tarjan.Run (RulesGraph) in
328 (* We check that:
329 - all the parameterized definitions of a particular component
330 have the same number of parameters.
331 - every parameterized non terminal definition always uses
332 parameterized definitions of the same component with its
333 formal parameters.
334
335 Components are marked during the traversal:
336 -1 means unvisited
337 n with n > 0 is the number of parameters of the clique.
338 *)
339 let unseen = -1 in
340 let marked_components = Array.create n unseen in
341
342 let flexible_arrow args =
343 let ty = Arrow (List.map (fun _ -> fresh_flexible_variable ()) args) in
344 fresh_structured_variable ty
345 in
346
347 (* [nt_type i] is the type of the i-th non terminal. *)
348 let nt_type i =
349 match parameters i with
350 | [] ->
351 star_variable
352
353 | x ->
354 flexible_arrow x
355 in
356
357 (* [actual_parameters_as_formal] is the well-formedness checker for
358 parameterized non terminal application. *)
359 let actual_parameters_as_formal actual_parameters formal_parameters =
360 List.for_all2 (fun y -> (function ParameterVar x -> x.value = y
361 | _ -> false))
362 formal_parameters actual_parameters
363 in
364
365 (* The environment is initialized. *)
366 let env : environment = StringMap.fold
367 (fun k r acu ->
368 (k, (r.pr_positions, nt_type (conv k)))
369 :: acu)
370 p_grammar.p_rules []
371 in
372
373 (* We traverse the graph checking each parameterized non terminal
374 definition is well-formed. *)
375 RulesGraph.iter
376 (fun i ->
377 let params = parameters i
378 and iname = name i
379 and repr = ConnectedComponents.representative i
380 and positions = positions i
381 in
382
383 (* The environment is augmented with the parameters whose types are
384 unknown. *)
385 let env' = List.map
386 (fun k -> (k, (positions, fresh_flexible_variable ()))) params
387 in
388 let env = env' @ env in
389
390 (* The type of the parameterized non terminal is constrained to be
391 [expected_ty]. *)
392 let check_type () =
393 check positions env iname (Arrow (List.map (fun (_, (_, t)) -> t) env'))
394 in
395
396 (* We check the number of parameters. *)
397 let check_parameters () =
398 let parameters_len = List.length params in
399 (* The component is visited for the first time. *)
400 if marked_components.(repr) = unseen then
401 marked_components.(repr) <- parameters_len
402 else (* Otherwise, we check that the arity is homogeneous
403 in the component. *)
404 if marked_components.(repr) <> parameters_len then
405 Error.error positions
406 (Printf.sprintf
407 "Mutually recursive definitions must have the same parameters.\n\
408 This is not the case for %s and %s."
409 (name repr) iname)
410 in
411
412 (* In each production rule, the parameterized non terminal
413 of the same component must be instantiated with the same
414 formal arguments. *)
415 let check_producers () =
416 List.iter
417 (fun { pr_producers = symbols } -> List.iter
418 (function (_, p) ->
419 let symbol, actuals = Parameters.unapp p in
420 (* We take the use of each symbol into account. *)
421 check [ symbol.position ] env symbol.value
422 (if actuals = [] then star else
423 Arrow (List.map (parameter_type env) actuals));
424 (* If it is in the same component, check in addition that
425 the arguments are the formal arguments. *)
426 try
427 let idx = conv symbol.value in
428 if ConnectedComponents.representative idx = repr then
429 if not (actual_parameters_as_formal actuals params)
430 then
431 Error.error [ symbol.position ]
432 (Printf.sprintf
433 "Mutually recursive definitions must have the same \
434 parameters.\n\
435 This is not the case for %s."
436 (let name1, name2 = (name idx), (name i) in
437 if name1 <> name2 then name1 ^ " and "^ name2
438 else name1))
439 with _ -> ())
440 symbols) (branches i)
441 in
442 check_type ();
443 check_parameters ();
444 check_producers ())
445
446
447 let rec subst_parameter subst = function
448 | ParameterVar x ->
449 (try
450 List.assoc x.value subst
451 with Not_found ->
452 ParameterVar x)
453
454 | ParameterApp (x, ps) ->
455 (try
456 match List.assoc x.value subst with
457 | ParameterVar y ->
458 ParameterApp (y, List.map (subst_parameter subst) ps)
459
460 | ParameterApp _ ->
461 (* Type-checking ensures that we cannot do partial
462 application. Consequently, if an higher-order non terminal
463 is an actual argument, it cannot be the result of a
464 partial application. *)
465 assert false
466
467 with Not_found ->
468 ParameterApp (x, List.map (subst_parameter subst) ps))
469
470 let subst_parameters subst =
471 List.map (subst_parameter subst)
472
473 let names_of_p_grammar p_grammar =
474 StringMap.fold (fun tok _ acu -> StringSet.add tok acu)
475 p_grammar.p_tokens StringSet.empty
476 $$ (StringMap.fold (fun nt _ acu -> StringSet.add nt acu)
477 p_grammar.p_rules)
478
479 let expand p_grammar =
480 (* Check that it is safe to expand this parameterized grammar. *)
481 check_grammar p_grammar;
482
483 (* Set up a mechanism that ensures that names are unique -- and, in
484 fact, ensures the stronger condition that normalized names are
485 unique. *)
486
487 let names =
488 ref (StringSet.empty)
489 in
490 let ensure_fresh name =
491 let normalized_name = Misc.normalize name in
492 if StringSet.mem normalized_name !names then
493 Error.error []
494 (Printf.sprintf "internal name clash over %s" normalized_name);
495 names := StringSet.add normalized_name !names;
496 name
497 in
498 let expanded_rules =
499 Hashtbl.create 13
500 in
501 let module InstanceTable =
502 Hashtbl.Make (Parameters)
503 in
504 let rule_names =
505 InstanceTable.create 13
506 in
507
508 (* [mangle p] chooses a name for the new nonterminal symbol that corresponds
509 to the parameter [p]. *)
510
511 let rec mangle = function
512 | ParameterVar x
513 | ParameterApp (x, []) ->
514 Positions.value x
515 | ParameterApp (x, ps) ->
516
517 (* We include parentheses and commas in the names that we
518 assign to expanded nonterminals, because that is more
519 readable and acceptable in many situations. We replace them
520 with underscores in situations where these characters are
521 not valid. *)
522
523 Printf.sprintf "%s(%s)"
524 (Positions.value x)
525 (separated_list_to_string mangle "," ps)
526
527 in
528 let name_of symbol parameters =
529 let param = ParameterApp (symbol, parameters) in
530 try
531 InstanceTable.find rule_names param
532 with Not_found ->
533 let name = ensure_fresh (mangle param) in
534 InstanceTable.add rule_names param name;
535 name
536 in
537 (* Given the substitution [subst] from parameters to non terminal, we
538 instantiate the parameterized branch. *)
539 let rec expand_branch subst pbranch =
540 let new_producers = List.map
541 (function (ido, p) ->
542 let sym, actual_parameters =
543 Parameters.unapp p in
544 let sym, actual_parameters =
545 try
546 match List.assoc sym.value subst with
547 | ParameterVar x ->
548 x, subst_parameters subst actual_parameters
549
550 | ParameterApp (x, ps) ->
551 assert (actual_parameters = []);
552 x, ps
553
554 with Not_found ->
555 sym, subst_parameters subst actual_parameters
556 in
557 (* Instantiate the definition of the producer. *)
558 (expand_branches subst sym actual_parameters, Option.map Positions.value ido))
559 pbranch.pr_producers
560 in
561 {
562 branch_position = pbranch.pr_branch_position;
563 producers = new_producers;
564 action = pbranch.pr_action;
565 branch_shift_precedence = pbranch.pr_branch_shift_precedence;
566 branch_reduce_precedence = pbranch.pr_branch_reduce_precedence;
567 }
568
569 (* Instantiate the branches of sym for a particular set of actual
570 parameters. *)
571 and expand_branches subst sym actual_parameters =
572 let nsym = name_of sym actual_parameters in
573 try
574 if not (Hashtbl.mem expanded_rules nsym) then begin
575 let prule = StringMap.find (Positions.value sym) p_grammar.p_rules in
576 let subst =
577 (* Type checking ensures that parameterized non terminal
578 instantiations are well defined. *)
579 assert (List.length prule.pr_parameters
580 = List.length actual_parameters);
581 List.combine prule.pr_parameters actual_parameters @ subst in
582 Hashtbl.add expanded_rules nsym
583 { branches = []; positions = []; inline_flag = false };
584 let rules = List.map (expand_branch subst) prule.pr_branches in
585 Hashtbl.replace expanded_rules nsym
586 {
587 branches = rules;
588 positions = prule.pr_positions;
589 inline_flag = prule.pr_inline_flag;
590 }
591 end;
592 nsym
593 (* If [sym] is a terminal, then it is not in [p_grammar.p_rules].
594 Expansion is not needed. *)
595 with Not_found -> Positions.value sym
596 in
597 let rec types_from_list = function
598 | [] -> StringMap.empty
599 | (nt, ty)::q ->
600 let accu = types_from_list q in
601 let mangled = mangle nt in
602 if StringMap.mem mangled accu then
603 Error.error [Positions.position (Parameters.with_pos nt)]
604 (Printf.sprintf
605 "There are multiple %%type definitions for nonterminal %s."
606 mangled);
607 StringMap.add mangled (Positions.value ty) accu
608 in
609
610 let start_symbols = StringMap.domain (p_grammar.p_start_symbols) in
611 {
612 preludes = p_grammar.p_preludes;
613 postludes = p_grammar.p_postludes;
614 parameters = p_grammar.p_parameters;
615 start_symbols = start_symbols;
616 types = types_from_list p_grammar.p_types;
617 tokens = p_grammar.p_tokens;
618 rules =
619 let closed_rules = StringMap.fold
620 (fun k prule rules ->
621 (* If [k] is a start symbol then it cannot be parameterized. *)
622 if prule.pr_parameters <> [] && StringSet.mem k start_symbols then
623 Error.error []
624 (Printf.sprintf "The start symbol `%s' cannot be parameterized."
625 k);
626
627 (* Entry points are the closed non terminals. *)
628 if prule.pr_parameters = [] then
629 StringMap.add k {
630 branches = List.map (expand_branch []) prule.pr_branches;
631 positions = prule.pr_positions;
632 inline_flag = prule.pr_inline_flag;
633 } rules
634 else rules)
635 p_grammar.p_rules
636 StringMap.empty
637 in
638 Hashtbl.fold StringMap.add expanded_rules closed_rules
639 }