Commit | Line | Data |
---|---|---|
0708f913 C |
1 | (* Yoann Padioleau |
2 | * | |
3 | * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes | |
34e49164 C |
4 | * |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU General Public License (GPL) | |
7 | * version 2 as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * file license.txt for more details. | |
13 | *) | |
14 | ||
15 | open Common | |
16 | ||
17 | module TH = Token_helpers | |
18 | module LP = Lexer_parser | |
19 | ||
485bce71 C |
20 | module Stat = Parsing_stat |
21 | ||
34e49164 C |
22 | (*****************************************************************************) |
23 | (* Wrappers *) | |
24 | (*****************************************************************************) | |
708f4980 | 25 | let pr2_err, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing |
34e49164 C |
26 | |
27 | (*****************************************************************************) | |
28 | (* Helpers *) | |
29 | (*****************************************************************************) | |
30 | ||
31 | let lexbuf_to_strpos lexbuf = | |
32 | (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf) | |
33 | ||
34 | let token_to_strpos tok = | |
35 | (TH.str_of_tok tok, TH.pos_of_tok tok) | |
36 | ||
37 | ||
34e49164 C |
38 | let mk_info_item2 filename toks = |
39 | let buf = Buffer.create 100 in | |
40 | let s = | |
41 | (* old: get_slice_file filename (line1, line2) *) | |
42 | begin | |
43 | toks +> List.iter (fun tok -> | |
44 | match TH.pinfo_of_tok tok with | |
91eba41f C |
45 | | Ast_c.OriginTok _ -> |
46 | Buffer.add_string buf (TH.str_of_tok tok) | |
47 | | Ast_c.AbstractLineTok _ -> | |
48 | raise Impossible | |
34e49164 C |
49 | | _ -> () |
50 | ); | |
51 | Buffer.contents buf | |
52 | end | |
53 | in | |
54 | (s, toks) | |
55 | ||
56 | let mk_info_item a b = | |
57 | Common.profile_code "C parsing.mk_info_item" | |
58 | (fun () -> mk_info_item2 a b) | |
59 | ||
60 | ||
91eba41f | 61 | let info_same_line line xs = |
b1b2de81 | 62 | xs +> List.filter (fun info -> Ast_c.line_of_info info =|= line) |
34e49164 | 63 | |
34e49164 | 64 | |
978fd7e5 C |
65 | (* move in cpp_token_c ? *) |
66 | let is_define_passed passed = | |
67 | let xs = passed +> List.rev +> List.filter TH.is_not_comment in | |
68 | if List.length xs >= 2 | |
69 | then | |
70 | (match Common.head_middle_tail xs with | |
71 | | Parser_c.TDefine _, _, Parser_c.TDefEOL _ -> | |
72 | true | |
73 | | _ -> false | |
74 | ) | |
75 | else begin | |
76 | pr2_err "WEIRD: length list of error recovery tokens < 2 "; | |
77 | false | |
78 | end | |
79 | ||
80 | ||
81 | (*****************************************************************************) | |
82 | (* Error diagnostic *) | |
83 | (*****************************************************************************) | |
84 | ||
85 | let error_msg_tok tok = | |
86 | let file = TH.file_of_tok tok in | |
87 | if !Flag_parsing_c.verbose_parsing | |
88 | then Common.error_message file (token_to_strpos tok) | |
89 | else ("error in " ^ file ^ "; set verbose_parsing for more info") | |
90 | ||
91 | ||
92 | let print_bad line_error (start_line, end_line) filelines = | |
93 | begin | |
94 | pr2 ("badcount: " ^ i_to_s (end_line - start_line)); | |
95 | ||
96 | for i = start_line to end_line do | |
97 | let line = filelines.(i) in | |
98 | ||
99 | if i =|= line_error | |
100 | then pr2 ("BAD:!!!!!" ^ " " ^ line) | |
101 | else pr2 ("bad:" ^ " " ^ line) | |
102 | done | |
103 | end | |
104 | ||
105 | ||
34e49164 C |
106 | (*****************************************************************************) |
107 | (* Stats on what was passed/commentized *) | |
108 | (*****************************************************************************) | |
109 | ||
110 | let commentized xs = xs +> Common.map_filter (function | |
111 | | Parser_c.TCommentCpp (cppkind, ii) -> | |
485bce71 C |
112 | let s = Ast_c.str_of_info ii in |
113 | let legal_passing = | |
114 | match !Flag_parsing_c.filter_passed_level with | |
115 | | 0 -> false | |
116 | | 1 -> | |
0708f913 | 117 | List.mem cppkind [Token_c.CppAttr] |
485bce71 C |
118 | || |
119 | (s =~ "__.*") | |
120 | | 2 -> | |
0708f913 | 121 | List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal] |
485bce71 C |
122 | || |
123 | (s =~ "__.*") | |
124 | | 3 -> | |
0708f913 | 125 | List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective] |
485bce71 C |
126 | || |
127 | (s =~ "__.*") | |
128 | | 4 -> | |
0708f913 | 129 | List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppMacro] |
485bce71 C |
130 | || |
131 | (s =~ "__.*") | |
132 | ||
133 | ||
134 | | 5 -> | |
0708f913 | 135 | List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective;Token_c.CppMacro] |
485bce71 C |
136 | || |
137 | (s =~ "__.*") | |
138 | ||
139 | ||
140 | ||
141 | ||
142 | | _ -> failwith "not valid level passing number" | |
143 | in | |
144 | if legal_passing then None else Some (ii.Ast_c.pinfo) | |
145 | ||
146 | (* | |
34e49164 | 147 | | Ast_c.CppOther -> |
34e49164 C |
148 | (match s with |
149 | | s when s =~ "KERN_.*" -> None | |
150 | | s when s =~ "__.*" -> None | |
485bce71 C |
151 | | _ -> |
152 | Some (ii.Ast_c.pinfo) | |
34e49164 | 153 | ) |
485bce71 C |
154 | *) |
155 | ||
34e49164 C |
156 | |
157 | | Parser_c.TCommentMisc ii | |
158 | | Parser_c.TAction ii | |
159 | -> | |
160 | Some (ii.Ast_c.pinfo) | |
161 | | _ -> | |
162 | None | |
163 | ) | |
164 | ||
165 | let count_lines_commentized xs = | |
166 | let line = ref (-1) in | |
167 | let count = ref 0 in | |
168 | begin | |
169 | commentized xs +> | |
170 | List.iter | |
171 | (function | |
172 | Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) -> | |
173 | let newline = pinfo.Common.line in | |
174 | if newline <> !line | |
175 | then begin | |
176 | line := newline; | |
177 | incr count | |
178 | end | |
179 | | _ -> ()); | |
180 | !count | |
181 | end | |
182 | ||
183 | ||
184 | ||
185 | let print_commentized xs = | |
186 | let line = ref (-1) in | |
187 | begin | |
188 | let ys = commentized xs in | |
189 | ys +> | |
190 | List.iter | |
191 | (function | |
192 | Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) -> | |
193 | let newline = pinfo.Common.line in | |
194 | let s = pinfo.Common.str in | |
195 | let s = Str.global_substitute | |
196 | (Str.regexp "\n") (fun s -> "") s | |
197 | in | |
b1b2de81 | 198 | if newline =|= !line |
34e49164 C |
199 | then prerr_string (s ^ " ") |
200 | else begin | |
b1b2de81 | 201 | if !line =|= -1 |
34e49164 C |
202 | then pr2_no_nl "passed:" |
203 | else pr2_no_nl "\npassed:"; | |
204 | line := newline; | |
205 | pr2_no_nl (s ^ " "); | |
206 | end | |
207 | | _ -> ()); | |
208 | if not (null ys) then pr2 ""; | |
209 | end | |
210 | ||
211 | ||
212 | ||
213 | ||
214 | (*****************************************************************************) | |
215 | (* Lexing only *) | |
216 | (*****************************************************************************) | |
217 | ||
218 | (* called by parse_print_error_heuristic *) | |
219 | let tokens2 file = | |
708f4980 | 220 | let table = Common.full_charpos_to_pos_large file in |
34e49164 C |
221 | |
222 | Common.with_open_infile file (fun chan -> | |
223 | let lexbuf = Lexing.from_channel chan in | |
224 | try | |
225 | let rec tokens_aux acc = | |
226 | let tok = Lexer_c.token lexbuf in | |
227 | (* fill in the line and col information *) | |
228 | let tok = tok +> TH.visitor_info_of_tok (fun ii -> | |
229 | { ii with Ast_c.pinfo= | |
230 | (* could assert pinfo.filename = file ? *) | |
231 | match Ast_c.pinfo_of_info ii with | |
232 | Ast_c.OriginTok pi -> | |
708f4980 | 233 | Ast_c.OriginTok (Common.complete_parse_info_large file table pi) |
34e49164 | 234 | | Ast_c.ExpandedTok (pi,vpi) -> |
708f4980 | 235 | Ast_c.ExpandedTok((Common.complete_parse_info_large file table pi),vpi) |
34e49164 C |
236 | | Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi) |
237 | | Ast_c.AbstractLineTok pi -> failwith "should not occur" | |
238 | }) | |
239 | in | |
240 | ||
241 | if TH.is_eof tok | |
242 | then List.rev (tok::acc) | |
243 | else tokens_aux (tok::acc) | |
244 | in | |
245 | tokens_aux [] | |
246 | with | |
247 | | Lexer_c.Lexical s -> | |
248 | failwith ("lexical error " ^ s ^ "\n =" ^ | |
249 | (Common.error_message file (lexbuf_to_strpos lexbuf))) | |
250 | | e -> raise e | |
251 | ) | |
252 | ||
485bce71 C |
253 | let time_lexing ?(profile=true) a = |
254 | if profile | |
255 | then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a) | |
951c7801 | 256 | else tokens2 a |
485bce71 C |
257 | let tokens ?profile a = |
258 | Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a) | |
34e49164 C |
259 | |
260 | ||
485bce71 | 261 | let tokens_of_string string = |
34e49164 C |
262 | let lexbuf = Lexing.from_string string in |
263 | try | |
264 | let rec tokens_s_aux () = | |
265 | let tok = Lexer_c.token lexbuf in | |
266 | if TH.is_eof tok | |
267 | then [tok] | |
268 | else tok::(tokens_s_aux ()) | |
269 | in | |
270 | tokens_s_aux () | |
271 | with | |
272 | | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" ) | |
273 | | e -> raise e | |
274 | ||
275 | ||
276 | (*****************************************************************************) | |
277 | (* Parsing, but very basic, no more used *) | |
278 | (*****************************************************************************) | |
279 | ||
280 | (* | |
281 | * !!!Those function use refs, and are not reentrant !!! so take care. | |
282 | * It use globals defined in Lexer_parser. | |
283 | * | |
284 | * update: because now lexer return comments tokens, those functions | |
285 | * may not work anymore. | |
286 | *) | |
287 | ||
288 | let parse file = | |
289 | let lexbuf = Lexing.from_channel (open_in file) in | |
290 | let result = Parser_c.main Lexer_c.token lexbuf in | |
291 | result | |
292 | ||
293 | ||
294 | let parse_print_error file = | |
295 | let chan = (open_in file) in | |
296 | let lexbuf = Lexing.from_channel chan in | |
297 | ||
298 | let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in | |
299 | try | |
300 | lexbuf +> Parser_c.main Lexer_c.token | |
301 | with | |
302 | | Lexer_c.Lexical s -> | |
303 | failwith ("lexical error " ^s^ "\n =" ^ error_msg ()) | |
304 | | Parsing.Parse_error -> | |
305 | failwith ("parse error \n = " ^ error_msg ()) | |
306 | | Semantic_c.Semantic (s, i) -> | |
307 | failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ()) | |
308 | | e -> raise e | |
309 | ||
310 | ||
311 | ||
312 | ||
313 | (*****************************************************************************) | |
314 | (* Parsing subelements, useful to debug parser *) | |
315 | (*****************************************************************************) | |
316 | ||
317 | (* | |
318 | * !!!Those function use refs, and are not reentrant !!! so take care. | |
319 | * It use globals defined in Lexer_parser. | |
320 | *) | |
321 | ||
322 | ||
323 | (* old: | |
324 | * let parse_gen parsefunc s = | |
325 | * let lexbuf = Lexing.from_string s in | |
326 | * let result = parsefunc Lexer_c.token lexbuf in | |
327 | * result | |
328 | *) | |
329 | ||
330 | let parse_gen parsefunc s = | |
485bce71 | 331 | let toks = tokens_of_string s +> List.filter TH.is_not_comment in |
34e49164 C |
332 | |
333 | ||
334 | (* Why use this lexing scheme ? Why not classically give lexer func | |
335 | * to parser ? Because I now keep comments in lexer. Could | |
336 | * just do a simple wrapper that when comment ask again for a token, | |
337 | * but maybe simpler to use cur_tok technique. | |
338 | *) | |
339 | let all_tokens = ref toks in | |
340 | let cur_tok = ref (List.hd !all_tokens) in | |
341 | ||
342 | let lexer_function = | |
343 | (fun _ -> | |
344 | if TH.is_eof !cur_tok | |
708f4980 | 345 | then (pr2_err "LEXER: ALREADY AT END"; !cur_tok) |
34e49164 C |
346 | else |
347 | let v = Common.pop2 all_tokens in | |
348 | cur_tok := v; | |
349 | !cur_tok | |
350 | ) | |
351 | in | |
352 | let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in | |
353 | let result = parsefunc lexer_function lexbuf_fake in | |
354 | result | |
355 | ||
356 | ||
357 | let type_of_string = parse_gen Parser_c.type_name | |
358 | let statement_of_string = parse_gen Parser_c.statement | |
359 | let expression_of_string = parse_gen Parser_c.expr | |
360 | ||
361 | (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *) | |
362 | ||
363 | ||
364 | ||
365 | ||
366 | ||
34e49164 | 367 | (*****************************************************************************) |
485bce71 | 368 | (* Parsing default define macros, usually in a standard.h file *) |
34e49164 C |
369 | (*****************************************************************************) |
370 | ||
978fd7e5 | 371 | let extract_macros2 file = |
708f4980 C |
372 | Common.save_excursion Flag_parsing_c.verbose_lexing (fun () -> |
373 | Flag_parsing_c.verbose_lexing := false; | |
374 | let toks = tokens ~profile:false file in | |
978fd7e5 C |
375 | let toks = Parsing_hacks.fix_tokens_define toks in |
376 | Cpp_token_c.extract_macros toks | |
708f4980 | 377 | ) |
34e49164 | 378 | |
978fd7e5 C |
379 | let extract_macros a = |
380 | Common.profile_code_exclusif "HACK" (fun () -> extract_macros2 a) | |
485bce71 | 381 | |
34e49164 C |
382 | |
383 | (*****************************************************************************) | |
978fd7e5 | 384 | (* Helper for main entry point *) |
34e49164 C |
385 | (*****************************************************************************) |
386 | ||
34e49164 C |
387 | |
388 | (* The use of local refs (remaining_tokens, passed_tokens, ...) makes | |
389 | * possible error recovery. Indeed, they allow to skip some tokens and | |
390 | * still be able to call again the ocamlyacc parser. It is ugly code | |
391 | * because we cant modify ocamllex and ocamlyacc. As we want some | |
392 | * extended lexing tricks, we have to use such refs. | |
393 | * | |
394 | * Those refs are now also used for my lalr(k) technique. Indeed They | |
395 | * store the futur and previous tokens that were parsed, and so | |
396 | * provide enough context information for powerful lex trick. | |
397 | * | |
398 | * - passed_tokens_last_ckp stores the passed tokens since last | |
485bce71 | 399 | * checkpoint. Used for NotParsedCorrectly and also to build the |
34e49164 C |
400 | * info_item attached to each program_element. |
401 | * - passed_tokens_clean is used for lookahead, in fact for lookback. | |
402 | * - remaining_tokens_clean is used for lookahead. Now remaining_tokens | |
403 | * contain some comments and so would make pattern matching difficult | |
404 | * in lookahead. Hence this variable. We would like also to get rid | |
405 | * of cpp instruction because sometimes a cpp instruction is between | |
406 | * two tokens and makes a pattern matching fail. But lookahead also | |
407 | * transform some cpp instruction (in comment) so can't remove them. | |
408 | * | |
409 | * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens, | |
410 | * whereas passed_tokens_clean and remaining_tokens_clean does not contain | |
411 | * comment-tokens. | |
412 | * | |
413 | * Normally we have: | |
414 | * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens | |
415 | * after the call to pop2. | |
416 | * toks = (reverse passed_tok) ++ remaining_tokens | |
417 | * at the and of the lexer_function call. | |
418 | * At the very beginning, cur_tok and remaining_tokens overlap, but not after. | |
419 | * At the end of lexer_function call, cur_tok overlap with passed_tok. | |
420 | * | |
421 | * convention: I use "tr" for "tokens refs" | |
485bce71 C |
422 | * |
423 | * I now also need this lexing trick because the lexer return comment | |
424 | * tokens. | |
34e49164 C |
425 | *) |
426 | ||
427 | type tokens_state = { | |
428 | mutable rest : Parser_c.token list; | |
429 | mutable rest_clean : Parser_c.token list; | |
430 | mutable current : Parser_c.token; | |
431 | (* it's passed since last "checkpoint", not passed from the beginning *) | |
432 | mutable passed : Parser_c.token list; | |
433 | mutable passed_clean : Parser_c.token list; | |
434 | } | |
708f4980 C |
435 | |
436 | let mk_tokens_state toks = | |
437 | { | |
438 | rest = toks; | |
439 | rest_clean = (toks +> List.filter TH.is_not_comment); | |
440 | current = (List.hd toks); | |
441 | passed = []; | |
442 | passed_clean = []; | |
443 | } | |
444 | ||
445 | ||
446 | ||
447 | let clone_tokens_state tr = | |
485bce71 C |
448 | { rest = tr.rest; |
449 | rest_clean = tr.rest_clean; | |
450 | current = tr.current; | |
451 | passed = tr.passed; | |
452 | passed_clean = tr.passed_clean; | |
453 | } | |
708f4980 | 454 | let copy_tokens_state ~src ~dst = |
485bce71 C |
455 | dst.rest <- src.rest; |
456 | dst.rest_clean <- src.rest_clean; | |
457 | dst.current <- src.current; | |
458 | dst.passed <- src.passed; | |
459 | dst.passed_clean <- src.passed_clean; | |
460 | () | |
461 | ||
708f4980 | 462 | (* todo? agglomerate the x##b ? *) |
485bce71 C |
463 | let rec filter_noise n xs = |
464 | match n, xs with | |
465 | | _, [] -> [] | |
466 | | 0, xs -> xs | |
467 | | n, x::xs -> | |
468 | (match x with | |
469 | | Parser_c.TMacroAttr _ -> | |
470 | filter_noise (n-1) xs | |
471 | | _ -> | |
472 | x::filter_noise (n-1) xs | |
473 | ) | |
474 | ||
475 | let clean_for_lookahead xs = | |
476 | match xs with | |
477 | | [] -> [] | |
478 | | [x] -> [x] | |
479 | | x::xs -> | |
480 | x::filter_noise 10 xs | |
481 | ||
34e49164 | 482 | |
485bce71 C |
483 | |
484 | (* Hacked lex. This function use refs passed by parse_print_error_heuristic | |
485 | * tr means token refs. | |
486 | *) | |
487 | let rec lexer_function ~pass tr = fun lexbuf -> | |
34e49164 | 488 | match tr.rest with |
708f4980 | 489 | | [] -> pr2_err "ALREADY AT END"; tr.current |
34e49164 C |
490 | | v::xs -> |
491 | tr.rest <- xs; | |
492 | tr.current <- v; | |
493 | ||
494 | if !Flag_parsing_c.debug_lexer then Common.pr2_gen v; | |
495 | ||
496 | if TH.is_comment v | |
497 | then begin | |
498 | tr.passed <- v::tr.passed; | |
485bce71 | 499 | lexer_function ~pass tr lexbuf |
34e49164 C |
500 | end |
501 | else begin | |
502 | let x = List.hd tr.rest_clean in | |
503 | tr.rest_clean <- List.tl tr.rest_clean; | |
b1b2de81 | 504 | assert (x =*= v); |
34e49164 C |
505 | |
506 | (match v with | |
113803cf C |
507 | |
508 | (* fix_define1. | |
509 | * | |
510 | * Why not in parsing_hacks lookahead and do passing like | |
485bce71 | 511 | * I do for some ifdef directives ? Because here I also need to |
113803cf C |
512 | * generate some tokens sometimes and so I need access to the |
513 | * tr.passed, tr.rest, etc. | |
485bce71 | 514 | *) |
9f8e26f4 | 515 | | Parser_c.TDefine (tok) -> |
b1b2de81 | 516 | if not (LP.current_context () =*= LP.InTopLevel) && |
708f4980 | 517 | (!Flag_parsing_c.cpp_directive_passing || (pass >= 2)) |
34e49164 | 518 | then begin |
485bce71 | 519 | incr Stat.nDefinePassing; |
34e49164 | 520 | pr2_once ("CPP-DEFINE: inside function, I treat it as comment"); |
0708f913 | 521 | let v' = Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v) |
34e49164 C |
522 | in |
523 | tr.passed <- v'::tr.passed; | |
978fd7e5 C |
524 | tr.rest <- Parsing_hacks.comment_until_defeol tr.rest; |
525 | tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean; | |
485bce71 | 526 | lexer_function ~pass tr lexbuf |
34e49164 C |
527 | end |
528 | else begin | |
529 | tr.passed <- v::tr.passed; | |
530 | tr.passed_clean <- v::tr.passed_clean; | |
531 | v | |
532 | end | |
533 | ||
534 | | Parser_c.TInclude (includes, filename, inifdef, info) -> | |
b1b2de81 | 535 | if not (LP.current_context () =*= LP.InTopLevel) && |
708f4980 | 536 | (!Flag_parsing_c.cpp_directive_passing || (pass >= 2)) |
34e49164 | 537 | then begin |
485bce71 | 538 | incr Stat.nIncludePassing; |
34e49164 | 539 | pr2_once ("CPP-INCLUDE: inside function, I treat it as comment"); |
0708f913 | 540 | let v = Parser_c.TCommentCpp(Token_c.CppDirective, info) in |
34e49164 | 541 | tr.passed <- v::tr.passed; |
485bce71 | 542 | lexer_function ~pass tr lexbuf |
34e49164 C |
543 | end |
544 | else begin | |
545 | let (v,new_tokens) = | |
978fd7e5 | 546 | Parsing_hacks.tokens_include (info, includes, filename, inifdef) in |
34e49164 C |
547 | let new_tokens_clean = |
548 | new_tokens +> List.filter TH.is_not_comment in | |
549 | ||
550 | tr.passed <- v::tr.passed; | |
551 | tr.passed_clean <- v::tr.passed_clean; | |
552 | tr.rest <- new_tokens ++ tr.rest; | |
553 | tr.rest_clean <- new_tokens_clean ++ tr.rest_clean; | |
554 | v | |
555 | end | |
556 | ||
557 | | _ -> | |
558 | ||
559 | (* typedef_fix1 *) | |
560 | let v = match v with | |
561 | | Parser_c.TIdent (s, ii) -> | |
485bce71 C |
562 | if |
563 | LP.is_typedef s && | |
564 | not (!Flag_parsing_c.disable_add_typedef) && | |
b1b2de81 | 565 | pass =|= 1 |
34e49164 C |
566 | then Parser_c.TypedefIdent (s, ii) |
567 | else Parser_c.TIdent (s, ii) | |
568 | | x -> x | |
569 | in | |
570 | ||
485bce71 C |
571 | let v = Parsing_hacks.lookahead ~pass |
572 | (clean_for_lookahead (v::tr.rest_clean)) | |
573 | tr.passed_clean in | |
34e49164 C |
574 | |
575 | tr.passed <- v::tr.passed; | |
576 | ||
485bce71 | 577 | (* the lookahead may have changed the status of the token and |
34e49164 | 578 | * consider it as a comment, for instance some #include are |
485bce71 | 579 | * turned into comments, hence this code. *) |
34e49164 | 580 | match v with |
485bce71 | 581 | | Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf |
34e49164 C |
582 | | v -> |
583 | tr.passed_clean <- v::tr.passed_clean; | |
584 | v | |
585 | ) | |
586 | end | |
587 | ||
588 | ||
708f4980 C |
589 | let max_pass = 4 |
590 | ||
34e49164 | 591 | |
485bce71 C |
592 | let get_one_elem ~pass tr (file, filelines) = |
593 | ||
594 | if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef | |
708f4980 | 595 | then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn"; |
485bce71 C |
596 | |
597 | (* normally have to do that only when come from an exception in which | |
598 | * case the dt() may not have been done | |
599 | * TODO but if was in scoped scope ? have to let only the last scope | |
600 | * so need do a LP.lexer_reset_typedef (); | |
601 | *) | |
602 | LP.enable_typedef(); | |
603 | LP._lexer_hint := (LP.default_hint ()); | |
604 | LP.save_typedef_state(); | |
605 | ||
606 | tr.passed <- []; | |
607 | ||
608 | let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in | |
609 | ||
610 | (try | |
611 | (* -------------------------------------------------- *) | |
612 | (* Call parser *) | |
613 | (* -------------------------------------------------- *) | |
614 | Common.profile_code_exclusif "YACC" (fun () -> | |
615 | Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake) | |
616 | ) | |
708f4980 | 617 | with e -> |
485bce71 C |
618 | LP.restore_typedef_state(); |
619 | ||
620 | (* must keep here, before the code that adjusts the tr fields *) | |
621 | let line_error = TH.line_of_tok tr.current in | |
708f4980 C |
622 | |
623 | let passed_before_error = tr.passed in | |
624 | let current = tr.current in | |
485bce71 C |
625 | |
626 | (* error recovery, go to next synchro point *) | |
978fd7e5 C |
627 | let (passed', rest') = |
628 | Parsing_recovery_c.find_next_synchro tr.rest tr.passed in | |
485bce71 C |
629 | tr.rest <- rest'; |
630 | tr.passed <- passed'; | |
631 | ||
632 | tr.current <- List.hd passed'; | |
633 | tr.passed_clean <- []; (* enough ? *) | |
634 | (* with error recovery, rest and rest_clean may not be in sync *) | |
635 | tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment); | |
636 | ||
637 | ||
638 | let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in | |
708f4980 C |
639 | Right (info_of_bads, line_error, |
640 | tr.passed, passed_before_error, | |
641 | current, e) | |
485bce71 C |
642 | ) |
643 | ||
644 | ||
645 | ||
978fd7e5 C |
646 | (* Macro problem recovery *) |
647 | (* used by the multi-pass error recovery expand-on-demand *) | |
648 | (* | |
649 | val candidate_macros_in_passed: | |
650 | defs: (string, define_def) Hashtbl.t -> | |
651 | Parser_c.token list -> (string * define_def) list | |
652 | *) | |
653 | ||
654 | let candidate_macros_in_passed2 ~defs passed = | |
655 | let res = ref [] in | |
656 | let res2 = ref [] in | |
657 | ||
658 | passed +> List.iter (function | |
659 | | Parser_c.TIdent (s,_) | |
660 | (* bugfix: may have to undo some infered things *) | |
661 | | Parser_c.TMacroIterator (s,_) | |
662 | | Parser_c.TypedefIdent (s,_) | |
663 | -> | |
664 | (match Common.hfind_option s defs with | |
665 | | Some def -> | |
666 | if s ==~ Parsing_hacks.regexp_macro | |
667 | then | |
668 | (* pr2 (spf "candidate: %s" s); *) | |
669 | Common.push2 (s, def) res | |
670 | else | |
671 | Common.push2 (s, def) res2 | |
672 | | None -> () | |
673 | ) | |
674 | ||
675 | | _ -> () | |
676 | ); | |
677 | if null !res | |
678 | then !res2 | |
679 | else !res | |
680 | ||
681 | let candidate_macros_in_passed ~defs b = | |
682 | Common.profile_code "MACRO managment" (fun () -> | |
683 | candidate_macros_in_passed2 ~defs b) | |
684 | ||
685 | ||
686 | ||
687 | ||
688 | ||
689 | let find_optional_macro_to_expand2 ~defs toks = | |
690 | ||
691 | let defs = Common.hash_of_list defs in | |
692 | ||
693 | let toks = toks +> Common.map (function | |
694 | ||
695 | (* special cases to undo *) | |
696 | | Parser_c.TMacroIterator (s, ii) -> | |
697 | if Hashtbl.mem defs s | |
698 | then Parser_c.TIdent (s, ii) | |
699 | else Parser_c.TMacroIterator (s, ii) | |
700 | ||
701 | | Parser_c.TypedefIdent (s, ii) -> | |
702 | if Hashtbl.mem defs s | |
703 | then Parser_c.TIdent (s, ii) | |
704 | else Parser_c.TypedefIdent (s, ii) | |
705 | ||
706 | | x -> x | |
707 | ) in | |
708 | ||
709 | let tokens = toks in | |
710 | Parsing_hacks.fix_tokens_cpp ~macro_defs:defs tokens | |
711 | ||
712 | (* just calling apply_macro_defs and having a specialized version | |
713 | * of the code in fix_tokens_cpp is not enough as some work such | |
714 | * as the passing of the body of attribute in Parsing_hacks.find_macro_paren | |
715 | * will not get the chance to be run on the new expanded tokens. | |
716 | * Hence even if it's expensive, it's currently better to | |
717 | * just call directly fix_tokens_cpp again here. | |
718 | ||
719 | let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in | |
720 | let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in | |
721 | let paren_grouped = TV.mk_parenthised cleaner in | |
722 | Cpp_token_c.apply_macro_defs | |
723 | ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s)) | |
724 | ~msg_apply_known_macro_hint:(fun s -> pr2 "hint") | |
725 | defs paren_grouped; | |
726 | (* because the before field is used by apply_macro_defs *) | |
727 | tokens2 := TV.rebuild_tokens_extented !tokens2; | |
728 | Parsing_hacks.insert_virtual_positions | |
729 | (!tokens2 +> Common.acc_map (fun x -> x.TV.tok)) | |
730 | *) | |
731 | let find_optional_macro_to_expand ~defs a = | |
732 | Common.profile_code "MACRO managment" (fun () -> | |
733 | find_optional_macro_to_expand2 ~defs a) | |
734 | ||
735 | ||
736 | ||
737 | ||
738 | ||
739 | (*****************************************************************************) | |
740 | (* Main entry points *) | |
741 | (*****************************************************************************) | |
742 | ||
743 | let (_defs : (string, Cpp_token_c.define_def) Hashtbl.t ref) = | |
744 | ref (Hashtbl.create 101) | |
745 | ||
746 | let (_defs_builtins : (string, Cpp_token_c.define_def) Hashtbl.t ref) = | |
747 | ref (Hashtbl.create 101) | |
748 | ||
749 | ||
750 | (* can not be put in parsing_hack, cos then mutually recursive problem as | |
751 | * we also want to parse the standard.h file. | |
752 | *) | |
753 | let init_defs_macros std_h = | |
754 | if not (Common.lfile_exists std_h) | |
755 | then pr2 ("warning: Can't find default macro file: " ^ std_h) | |
756 | else begin | |
757 | pr2 ("init_defs: " ^ std_h); | |
758 | _defs := Common.hash_of_list (extract_macros std_h); | |
759 | end | |
760 | ||
761 | let init_defs_builtins file_h = | |
762 | if not (Common.lfile_exists file_h) | |
763 | then pr2 ("warning: Can't find macro file: " ^ file_h) | |
764 | else begin | |
765 | pr2 ("init_defs_builtins: " ^ file_h); | |
766 | _defs_builtins := | |
767 | Common.hash_of_list (extract_macros file_h); | |
768 | end | |
769 | ||
770 | ||
771 | ||
772 | type info_item = string * Parser_c.token list | |
773 | ||
774 | type program2 = toplevel2 list | |
775 | and toplevel2 = Ast_c.toplevel * info_item | |
776 | ||
777 | let program_of_program2 xs = | |
778 | xs +> List.map fst | |
779 | ||
780 | let with_program2 f program2 = | |
781 | program2 | |
782 | +> Common.unzip | |
783 | +> (fun (program, infos) -> | |
784 | f program, infos | |
785 | ) | |
786 | +> Common.uncurry Common.zip | |
787 | ||
788 | ||
789 | ||
790 | ||
791 | ||
485bce71 | 792 | |
34e49164 | 793 | (* note: as now we go in 2 passes, there is first all the error message of |
485bce71 | 794 | * the lexer, and then the error of the parser. It is not anymore |
34e49164 C |
795 | * interwinded. |
796 | * | |
797 | * !!!This function use refs, and is not reentrant !!! so take care. | |
798 | * It use globals defined in Lexer_parser and also the _defs global | |
485bce71 C |
799 | * in parsing_hack.ml. |
800 | * | |
801 | * This function uses internally some semi globals in the | |
802 | * tokens_stat record and parsing_stat record. | |
34e49164 C |
803 | *) |
804 | ||
805 | let parse_print_error_heuristic2 file = | |
806 | ||
91eba41f | 807 | let filelines = Common.cat_array file in |
485bce71 C |
808 | let stat = Parsing_stat.default_stat file in |
809 | ||
34e49164 C |
810 | (* -------------------------------------------------- *) |
811 | (* call lexer and get all the tokens *) | |
812 | (* -------------------------------------------------- *) | |
813 | LP.lexer_reset_typedef(); | |
485bce71 | 814 | Parsing_hacks.ifdef_paren_cnt := 0; |
708f4980 | 815 | |
485bce71 | 816 | let toks_orig = tokens file in |
978fd7e5 | 817 | let toks = Parsing_hacks.fix_tokens_define toks_orig in |
708f4980 | 818 | let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs:!_defs_builtins toks in |
34e49164 | 819 | |
708f4980 C |
820 | (* expand macros on demand trick, preparation phase *) |
821 | let macros = | |
822 | Common.profile_code "MACRO mgmt prep 1" (fun () -> | |
823 | let macros = Hashtbl.copy !_defs in | |
824 | (* include also builtins as some macros may generate some builtins too | |
825 | * like __decl_spec or __stdcall | |
826 | *) | |
827 | !_defs_builtins +> Hashtbl.iter (fun s def -> | |
828 | Hashtbl.replace macros s def; | |
829 | ); | |
830 | macros | |
831 | ) | |
832 | in | |
833 | Common.profile_code "MACRO mgmt prep 2" (fun () -> | |
978fd7e5 | 834 | let local_macros = extract_macros file in |
708f4980 C |
835 | local_macros +> List.iter (fun (s, def) -> |
836 | Hashtbl.replace macros s def; | |
837 | ); | |
838 | ); | |
34e49164 | 839 | |
708f4980 | 840 | let tr = mk_tokens_state toks in |
485bce71 C |
841 | |
842 | let rec loop tr = | |
34e49164 C |
843 | |
844 | (* todo?: I am not sure that it represents current_line, cos maybe | |
845 | * tr.current partipated in the previous parsing phase, so maybe tr.current | |
846 | * is not the first token of the next parsing phase. Same with checkpoint2. | |
847 | * It would be better to record when we have a } or ; in parser.mly, | |
848 | * cos we know that they are the last symbols of external_declaration2. | |
485bce71 C |
849 | * |
850 | * bugfix: may not be equal to 'file' as after macro expansions we can | |
851 | * start to parse a new entity from the body of a macro, for instance | |
852 | * when parsing a define_machine() body, cf standard.h | |
34e49164 C |
853 | *) |
854 | let checkpoint = TH.line_of_tok tr.current in | |
485bce71 | 855 | let checkpoint_file = TH.file_of_tok tr.current in |
34e49164 | 856 | |
485bce71 | 857 | (* call the parser *) |
34e49164 | 858 | let elem = |
708f4980 C |
859 | let pass1 = |
860 | Common.profile_code "Parsing: 1st pass" (fun () -> | |
861 | get_one_elem ~pass:1 tr (file, filelines) | |
862 | ) in | |
485bce71 C |
863 | match pass1 with |
864 | | Left e -> Left e | |
708f4980 C |
865 | | Right (info,line_err, passed, passed_before_error, cur, exn) -> |
866 | if !Flag_parsing_c.disable_multi_pass | |
867 | then pass1 | |
485bce71 | 868 | else begin |
708f4980 C |
869 | Common.profile_code "Parsing: multi pass" (fun () -> |
870 | ||
871 | pr2_err "parsing pass2: try again"; | |
872 | let toks = List.rev passed ++ tr.rest in | |
873 | let new_tr = mk_tokens_state toks in | |
874 | copy_tokens_state ~src:new_tr ~dst:tr; | |
875 | let passx = get_one_elem ~pass:2 tr (file, filelines) in | |
876 | ||
877 | (match passx with | |
878 | | Left e -> passx | |
879 | | Right (info,line_err,passed,passed_before_error,cur,exn) -> | |
880 | let candidates = | |
978fd7e5 | 881 | candidate_macros_in_passed ~defs:macros passed |
708f4980 | 882 | in |
978fd7e5 C |
883 | |
884 | ||
708f4980 C |
885 | if is_define_passed passed || null candidates |
886 | then passx | |
887 | else begin | |
888 | (* todo factorize code *) | |
889 | ||
890 | pr2_err "parsing pass3: try again"; | |
891 | let toks = List.rev passed ++ tr.rest in | |
892 | let toks' = | |
893 | find_optional_macro_to_expand ~defs:candidates toks in | |
894 | let new_tr = mk_tokens_state toks' in | |
895 | copy_tokens_state ~src:new_tr ~dst:tr; | |
896 | let passx = get_one_elem ~pass:3 tr (file, filelines) in | |
897 | ||
898 | (match passx with | |
899 | | Left e -> passx | |
900 | | Right (info,line_err,passed,passed_before_error,cur,exn) -> | |
901 | pr2_err "parsing pass4: try again"; | |
902 | ||
903 | let candidates = | |
978fd7e5 C |
904 | candidate_macros_in_passed |
905 | ~defs:macros passed | |
906 | in | |
708f4980 C |
907 | |
908 | let toks = List.rev passed ++ tr.rest in | |
909 | let toks' = | |
910 | find_optional_macro_to_expand ~defs:candidates toks in | |
911 | let new_tr = mk_tokens_state toks' in | |
912 | copy_tokens_state ~src:new_tr ~dst:tr; | |
913 | let passx = get_one_elem ~pass:4 tr (file, filelines) in | |
914 | passx | |
915 | ) | |
916 | end | |
917 | ) | |
918 | ) | |
485bce71 C |
919 | end |
920 | in | |
921 | ||
922 | ||
923 | (* again not sure if checkpoint2 corresponds to end of bad region *) | |
924 | let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *) | |
925 | let checkpoint2_file = TH.file_of_tok tr.current in | |
926 | ||
485bce71 | 927 | let diffline = |
b1b2de81 | 928 | if (checkpoint_file =$= checkpoint2_file) && (checkpoint_file =$= file) |
485bce71 C |
929 | then (checkpoint2 - checkpoint) |
930 | else 0 | |
931 | (* TODO? so if error come in middle of something ? where the | |
932 | * start token was from original file but synchro found in body | |
933 | * of macro ? then can have wrong number of lines stat. | |
934 | * Maybe simpler just to look at tr.passed and count | |
935 | * the lines in the token from the correct file ? | |
936 | *) | |
34e49164 | 937 | in |
34e49164 C |
938 | let info = mk_info_item file (List.rev tr.passed) in |
939 | ||
485bce71 C |
940 | (* some stat updates *) |
941 | stat.Stat.commentized <- | |
942 | stat.Stat.commentized + count_lines_commentized (snd info); | |
943 | ||
944 | let elem = | |
945 | match elem with | |
91eba41f C |
946 | | Left e -> |
947 | stat.Stat.correct <- stat.Stat.correct + diffline; | |
948 | e | |
708f4980 C |
949 | | Right (info_of_bads, line_error, toks_of_bads, |
950 | _passed_before_error, cur, exn) -> | |
951 | ||
952 | let was_define = is_define_passed tr.passed in | |
953 | ||
954 | if was_define && !Flag_parsing_c.filter_msg_define_error | |
955 | then () | |
956 | else begin | |
957 | ||
958 | (match exn with | |
959 | | Lexer_c.Lexical _ | |
960 | | Parsing.Parse_error | |
961 | | Semantic_c.Semantic _ -> () | |
962 | | e -> raise e | |
963 | ); | |
964 | ||
965 | if !Flag_parsing_c.show_parsing_error | |
966 | then begin | |
967 | (match exn with | |
968 | (* Lexical is not anymore launched I think *) | |
969 | | Lexer_c.Lexical s -> | |
970 | pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok cur) | |
971 | | Parsing.Parse_error -> | |
972 | pr2 ("parse error \n = " ^ error_msg_tok cur) | |
973 | | Semantic_c.Semantic (s, i) -> | |
974 | pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok cur) | |
975 | | e -> raise Impossible | |
976 | ); | |
977 | (* bugfix: *) | |
978 | if (checkpoint_file =$= checkpoint2_file) && | |
979 | checkpoint_file =$= file | |
980 | then print_bad line_error (checkpoint, checkpoint2) filelines | |
981 | else pr2 "PB: bad: but on tokens not from original file" | |
982 | end; | |
983 | ||
984 | ||
985 | let pbline = | |
986 | toks_of_bads | |
987 | +> Common.filter (TH.is_same_line_or_close line_error) | |
988 | +> Common.filter TH.is_ident_like | |
989 | in | |
990 | let error_info = | |
991 | (pbline +> List.map TH.str_of_tok), line_error | |
992 | in | |
993 | stat.Stat.problematic_lines <- | |
994 | error_info::stat.Stat.problematic_lines; | |
995 | ||
996 | end; | |
997 | ||
91eba41f C |
998 | if was_define && !Flag_parsing_c.filter_define_error |
999 | then stat.Stat.correct <- stat.Stat.correct + diffline | |
1000 | else stat.Stat.bad <- stat.Stat.bad + diffline; | |
1001 | ||
485bce71 C |
1002 | Ast_c.NotParsedCorrectly info_of_bads |
1003 | in | |
34e49164 C |
1004 | |
1005 | (match elem with | |
1006 | | Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)] | |
485bce71 | 1007 | | xs -> (xs, info):: loop tr (* recurse *) |
34e49164 C |
1008 | ) |
1009 | in | |
485bce71 | 1010 | let v = loop tr in |
978fd7e5 | 1011 | let v = with_program2 Parsing_consistency_c.consistency_checking v in |
34e49164 C |
1012 | (v, stat) |
1013 | ||
1014 | ||
485bce71 C |
1015 | let time_total_parsing a = |
1016 | Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a) | |
1017 | ||
34e49164 | 1018 | let parse_print_error_heuristic a = |
485bce71 C |
1019 | Common.profile_code "C parsing" (fun () -> time_total_parsing a) |
1020 | ||
34e49164 C |
1021 | |
1022 | (* alias *) | |
1023 | let parse_c_and_cpp a = parse_print_error_heuristic a | |
1024 | ||
1025 | (*****************************************************************************) | |
1026 | (* Same but faster cos memoize stuff *) | |
1027 | (*****************************************************************************) | |
1028 | let parse_cache file = | |
1029 | if not !Flag_parsing_c.use_cache then parse_print_error_heuristic file | |
1030 | else | |
485bce71 | 1031 | let _ = pr2 "TOFIX" in |
34e49164 C |
1032 | let need_no_changed_files = |
1033 | (* should use Sys.argv.(0), would be safer. *) | |
485bce71 C |
1034 | |
1035 | [ | |
1036 | (* TOFIX | |
1037 | Config.path ^ "/parsing_c/c_parser.cma"; | |
1038 | (* we may also depend now on the semantic patch because | |
1039 | the SP may use macro and so we will disable some of the | |
1040 | macro expansions from standard.h. | |
1041 | *) | |
1042 | !Config.std_h; | |
1043 | *) | |
34e49164 C |
1044 | ] |
1045 | in | |
1046 | let need_no_changed_variables = | |
1047 | (* could add some of the flags of flag_parsing_c.ml *) | |
1048 | [] | |
1049 | in | |
1050 | Common.cache_computation_robust | |
1051 | file ".ast_raw" | |
1052 | (need_no_changed_files, need_no_changed_variables) ".depend_raw" | |
1053 | (fun () -> parse_print_error_heuristic file) | |
1054 | ||
1055 | ||
1056 | ||
1057 | (*****************************************************************************) | |
485bce71 | 1058 | (* Some special cases *) |
34e49164 C |
1059 | (*****************************************************************************) |
1060 | ||
485bce71 | 1061 | let (cstatement_of_string: string -> Ast_c.statement) = fun s -> |
708f4980 C |
1062 | let tmpfile = Common.new_temp_file "cocci_stmt_of_s" "c" in |
1063 | Common.write_file tmpfile ("void main() { \n" ^ s ^ "\n}"); | |
1064 | let program = parse_c_and_cpp tmpfile +> fst in | |
485bce71 C |
1065 | program +> Common.find_some (fun (e,_) -> |
1066 | match e with | |
1067 | | Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st | |
1068 | | _ -> None | |
1069 | ) | |
1070 | ||
1071 | let (cexpression_of_string: string -> Ast_c.expression) = fun s -> | |
708f4980 C |
1072 | let tmpfile = Common.new_temp_file "cocci_expr_of_s" "c" in |
1073 | Common.write_file tmpfile ("void main() { \n" ^ s ^ ";\n}"); | |
1074 | let program = parse_c_and_cpp tmpfile +> fst in | |
485bce71 C |
1075 | program +> Common.find_some (fun (e,_) -> |
1076 | match e with | |
1077 | | Ast_c.Definition ({Ast_c.f_body = compound},_) -> | |
1078 | (match compound with | |
708f4980 C |
1079 | | [Ast_c.StmtElem st] -> |
1080 | (match Ast_c.unwrap_st st with | |
1081 | | Ast_c.ExprStatement (Some e) -> Some e | |
1082 | | _ -> None | |
1083 | ) | |
485bce71 C |
1084 | | _ -> None |
1085 | ) | |
1086 | | _ -> None | |
1087 | ) |