Commit | Line | Data |
---|---|---|
0708f913 | 1 | (* Yoann Padioleau |
ae4735db C |
2 | * |
3 | * Copyright (C) 2010, University of Copenhagen DIKU and INRIA. | |
0708f913 | 4 | * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes |
34e49164 C |
5 | * |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License (GPL) | |
8 | * version 2 as published by the Free Software Foundation. | |
ae4735db | 9 | * |
34e49164 C |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | * file license.txt for more details. | |
14 | *) | |
15 | ||
16 | open Common | |
17 | ||
ae4735db | 18 | module TH = Token_helpers |
34e49164 C |
19 | module LP = Lexer_parser |
20 | ||
485bce71 C |
21 | module Stat = Parsing_stat |
22 | ||
34e49164 C |
23 | (*****************************************************************************) |
24 | (* Wrappers *) | |
25 | (*****************************************************************************) | |
ae4735db C |
26 | let pr2_err, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing |
27 | ||
34e49164 C |
28 | (*****************************************************************************) |
29 | (* Helpers *) | |
30 | (*****************************************************************************) | |
31 | ||
ae4735db C |
32 | let lexbuf_to_strpos lexbuf = |
33 | (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf) | |
34e49164 | 34 | |
ae4735db | 35 | let token_to_strpos tok = |
34e49164 C |
36 | (TH.str_of_tok tok, TH.pos_of_tok tok) |
37 | ||
38 | ||
ae4735db | 39 | let mk_info_item2 filename toks = |
34e49164 | 40 | let buf = Buffer.create 100 in |
ae4735db | 41 | let s = |
34e49164 C |
42 | (* old: get_slice_file filename (line1, line2) *) |
43 | begin | |
ae4735db | 44 | toks +> List.iter (fun tok -> |
34e49164 | 45 | match TH.pinfo_of_tok tok with |
ae4735db | 46 | | Ast_c.OriginTok _ -> |
91eba41f | 47 | Buffer.add_string buf (TH.str_of_tok tok) |
ae4735db | 48 | | Ast_c.AbstractLineTok _ -> |
91eba41f | 49 | raise Impossible |
34e49164 C |
50 | | _ -> () |
51 | ); | |
52 | Buffer.contents buf | |
53 | end | |
54 | in | |
ae4735db | 55 | (s, toks) |
34e49164 | 56 | |
ae4735db C |
57 | let mk_info_item a b = |
58 | Common.profile_code "C parsing.mk_info_item" | |
34e49164 C |
59 | (fun () -> mk_info_item2 a b) |
60 | ||
61 | ||
ae4735db | 62 | let info_same_line line xs = |
b1b2de81 | 63 | xs +> List.filter (fun info -> Ast_c.line_of_info info =|= line) |
34e49164 | 64 | |
34e49164 | 65 | |
978fd7e5 C |
66 | (* move in cpp_token_c ? *) |
67 | let is_define_passed passed = | |
68 | let xs = passed +> List.rev +> List.filter TH.is_not_comment in | |
ae4735db C |
69 | if List.length xs >= 2 |
70 | then | |
978fd7e5 | 71 | (match Common.head_middle_tail xs with |
ae4735db | 72 | | Parser_c.TDefine _, _, Parser_c.TDefEOL _ -> |
978fd7e5 C |
73 | true |
74 | | _ -> false | |
75 | ) | |
76 | else begin | |
77 | pr2_err "WEIRD: length list of error recovery tokens < 2 "; | |
ae4735db | 78 | false |
978fd7e5 C |
79 | end |
80 | ||
81 | ||
82 | (*****************************************************************************) | |
83 | (* Error diagnostic *) | |
84 | (*****************************************************************************) | |
85 | ||
ae4735db | 86 | let error_msg_tok tok = |
978fd7e5 C |
87 | let file = TH.file_of_tok tok in |
88 | if !Flag_parsing_c.verbose_parsing | |
ae4735db | 89 | then Common.error_message file (token_to_strpos tok) |
978fd7e5 C |
90 | else ("error in " ^ file ^ "; set verbose_parsing for more info") |
91 | ||
92 | ||
ae4735db | 93 | let print_bad line_error (start_line, end_line) filelines = |
978fd7e5 C |
94 | begin |
95 | pr2 ("badcount: " ^ i_to_s (end_line - start_line)); | |
96 | ||
ae4735db C |
97 | for i = start_line to end_line do |
98 | let line = filelines.(i) in | |
978fd7e5 | 99 | |
ae4735db C |
100 | if i =|= line_error |
101 | then pr2 ("BAD:!!!!!" ^ " " ^ line) | |
102 | else pr2 ("bad:" ^ " " ^ line) | |
978fd7e5 C |
103 | done |
104 | end | |
105 | ||
106 | ||
34e49164 C |
107 | (*****************************************************************************) |
108 | (* Stats on what was passed/commentized *) | |
109 | (*****************************************************************************) | |
110 | ||
c491d8ee | 111 | let commentized xs = xs +> Common.tail_map_filter (function |
ae4735db | 112 | | Parser_c.TCommentCpp (cppkind, ii) -> |
485bce71 | 113 | let s = Ast_c.str_of_info ii in |
ae4735db C |
114 | let legal_passing = |
115 | match !Flag_parsing_c.filter_passed_level with | |
485bce71 | 116 | | 0 -> false |
ae4735db | 117 | | 1 -> |
0708f913 | 118 | List.mem cppkind [Token_c.CppAttr] |
ae4735db | 119 | || |
485bce71 | 120 | (s =~ "__.*") |
ae4735db | 121 | | 2 -> |
0708f913 | 122 | List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal] |
ae4735db | 123 | || |
485bce71 | 124 | (s =~ "__.*") |
ae4735db | 125 | | 3 -> |
feec80c3 C |
126 | (match cppkind with |
127 | Token_c.CppAttr | Token_c.CppPassingNormal | |
128 | | Token_c.CppDirective | Token_c.CppIfDirective _ -> true | |
129 | | _ -> false) | |
ae4735db | 130 | || |
485bce71 | 131 | (s =~ "__.*") |
ae4735db | 132 | | 4 -> |
feec80c3 C |
133 | List.mem cppkind |
134 | [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppMacro] | |
ae4735db | 135 | || |
485bce71 C |
136 | (s =~ "__.*") |
137 | ||
138 | ||
ae4735db | 139 | | 5 -> |
feec80c3 C |
140 | (match cppkind with |
141 | Token_c.CppAttr | Token_c.CppPassingNormal | |
142 | | Token_c.CppDirective | Token_c.CppIfDirective _ | |
143 | | Token_c.CppMacro -> true | |
144 | | _ -> false) | |
ae4735db | 145 | || |
485bce71 C |
146 | (s =~ "__.*") |
147 | ||
148 | ||
149 | ||
150 | ||
151 | | _ -> failwith "not valid level passing number" | |
152 | in | |
153 | if legal_passing then None else Some (ii.Ast_c.pinfo) | |
154 | ||
155 | (* | |
ae4735db | 156 | | Ast_c.CppOther -> |
34e49164 C |
157 | (match s with |
158 | | s when s =~ "KERN_.*" -> None | |
159 | | s when s =~ "__.*" -> None | |
ae4735db | 160 | | _ -> |
485bce71 | 161 | Some (ii.Ast_c.pinfo) |
34e49164 | 162 | ) |
485bce71 C |
163 | *) |
164 | ||
ae4735db | 165 | |
34e49164 | 166 | | Parser_c.TCommentMisc ii |
ae4735db | 167 | | Parser_c.TAction ii |
34e49164 C |
168 | -> |
169 | Some (ii.Ast_c.pinfo) | |
ae4735db | 170 | | _ -> |
34e49164 C |
171 | None |
172 | ) | |
ae4735db C |
173 | |
174 | let count_lines_commentized xs = | |
34e49164 C |
175 | let line = ref (-1) in |
176 | let count = ref 0 in | |
177 | begin | |
178 | commentized xs +> | |
179 | List.iter | |
180 | (function | |
ae4735db | 181 | Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) -> |
34e49164 C |
182 | let newline = pinfo.Common.line in |
183 | if newline <> !line | |
184 | then begin | |
185 | line := newline; | |
186 | incr count | |
187 | end | |
188 | | _ -> ()); | |
189 | !count | |
190 | end | |
191 | ||
192 | ||
193 | ||
ae4735db | 194 | let print_commentized xs = |
34e49164 C |
195 | let line = ref (-1) in |
196 | begin | |
197 | let ys = commentized xs in | |
198 | ys +> | |
199 | List.iter | |
200 | (function | |
ae4735db | 201 | Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) -> |
34e49164 C |
202 | let newline = pinfo.Common.line in |
203 | let s = pinfo.Common.str in | |
ae4735db C |
204 | let s = Str.global_substitute |
205 | (Str.regexp "\n") (fun s -> "") s | |
34e49164 | 206 | in |
b1b2de81 | 207 | if newline =|= !line |
34e49164 C |
208 | then prerr_string (s ^ " ") |
209 | else begin | |
ae4735db C |
210 | if !line =|= -1 |
211 | then pr2_no_nl "passed:" | |
34e49164 C |
212 | else pr2_no_nl "\npassed:"; |
213 | line := newline; | |
214 | pr2_no_nl (s ^ " "); | |
215 | end | |
216 | | _ -> ()); | |
217 | if not (null ys) then pr2 ""; | |
218 | end | |
ae4735db | 219 | |
34e49164 C |
220 | |
221 | ||
222 | ||
223 | (*****************************************************************************) | |
224 | (* Lexing only *) | |
225 | (*****************************************************************************) | |
226 | ||
227 | (* called by parse_print_error_heuristic *) | |
ae4735db | 228 | let tokens2 file = |
708f4980 | 229 | let table = Common.full_charpos_to_pos_large file in |
34e49164 | 230 | |
ae4735db | 231 | Common.with_open_infile file (fun chan -> |
34e49164 | 232 | let lexbuf = Lexing.from_channel chan in |
ae4735db C |
233 | try |
234 | let rec tokens_aux acc = | |
34e49164 C |
235 | let tok = Lexer_c.token lexbuf in |
236 | (* fill in the line and col information *) | |
ae4735db | 237 | let tok = tok +> TH.visitor_info_of_tok (fun ii -> |
34e49164 C |
238 | { ii with Ast_c.pinfo= |
239 | (* could assert pinfo.filename = file ? *) | |
240 | match Ast_c.pinfo_of_info ii with | |
241 | Ast_c.OriginTok pi -> | |
708f4980 | 242 | Ast_c.OriginTok (Common.complete_parse_info_large file table pi) |
34e49164 | 243 | | Ast_c.ExpandedTok (pi,vpi) -> |
708f4980 | 244 | Ast_c.ExpandedTok((Common.complete_parse_info_large file table pi),vpi) |
34e49164 C |
245 | | Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi) |
246 | | Ast_c.AbstractLineTok pi -> failwith "should not occur" | |
247 | }) | |
248 | in | |
249 | ||
250 | if TH.is_eof tok | |
251 | then List.rev (tok::acc) | |
252 | else tokens_aux (tok::acc) | |
253 | in | |
254 | tokens_aux [] | |
255 | with | |
ae4735db C |
256 | | Lexer_c.Lexical s -> |
257 | failwith ("lexical error " ^ s ^ "\n =" ^ | |
34e49164 C |
258 | (Common.error_message file (lexbuf_to_strpos lexbuf))) |
259 | | e -> raise e | |
260 | ) | |
261 | ||
ae4735db C |
262 | let time_lexing ?(profile=true) a = |
263 | if profile | |
485bce71 | 264 | then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a) |
951c7801 | 265 | else tokens2 a |
ae4735db | 266 | let tokens ?profile a = |
485bce71 | 267 | Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a) |
34e49164 C |
268 | |
269 | ||
ae4735db | 270 | let tokens_of_string string = |
34e49164 | 271 | let lexbuf = Lexing.from_string string in |
ae4735db C |
272 | try |
273 | let rec tokens_s_aux () = | |
34e49164 C |
274 | let tok = Lexer_c.token lexbuf in |
275 | if TH.is_eof tok | |
276 | then [tok] | |
277 | else tok::(tokens_s_aux ()) | |
278 | in | |
279 | tokens_s_aux () | |
280 | with | |
281 | | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" ) | |
282 | | e -> raise e | |
283 | ||
284 | ||
285 | (*****************************************************************************) | |
286 | (* Parsing, but very basic, no more used *) | |
287 | (*****************************************************************************) | |
288 | ||
289 | (* | |
290 | * !!!Those function use refs, and are not reentrant !!! so take care. | |
291 | * It use globals defined in Lexer_parser. | |
ae4735db | 292 | * |
34e49164 C |
293 | * update: because now lexer return comments tokens, those functions |
294 | * may not work anymore. | |
295 | *) | |
296 | ||
ae4735db | 297 | let parse file = |
34e49164 C |
298 | let lexbuf = Lexing.from_channel (open_in file) in |
299 | let result = Parser_c.main Lexer_c.token lexbuf in | |
300 | result | |
301 | ||
302 | ||
ae4735db | 303 | let parse_print_error file = |
34e49164 C |
304 | let chan = (open_in file) in |
305 | let lexbuf = Lexing.from_channel chan in | |
306 | ||
307 | let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in | |
ae4735db | 308 | try |
34e49164 | 309 | lexbuf +> Parser_c.main Lexer_c.token |
ae4735db C |
310 | with |
311 | | Lexer_c.Lexical s -> | |
34e49164 | 312 | failwith ("lexical error " ^s^ "\n =" ^ error_msg ()) |
ae4735db | 313 | | Parsing.Parse_error -> |
34e49164 | 314 | failwith ("parse error \n = " ^ error_msg ()) |
ae4735db | 315 | | Semantic_c.Semantic (s, i) -> |
34e49164 C |
316 | failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ()) |
317 | | e -> raise e | |
318 | ||
319 | ||
320 | ||
321 | ||
322 | (*****************************************************************************) | |
323 | (* Parsing subelements, useful to debug parser *) | |
324 | (*****************************************************************************) | |
325 | ||
326 | (* | |
327 | * !!!Those function use refs, and are not reentrant !!! so take care. | |
328 | * It use globals defined in Lexer_parser. | |
329 | *) | |
330 | ||
331 | ||
ae4735db C |
332 | (* old: |
333 | * let parse_gen parsefunc s = | |
34e49164 C |
334 | * let lexbuf = Lexing.from_string s in |
335 | * let result = parsefunc Lexer_c.token lexbuf in | |
336 | * result | |
337 | *) | |
338 | ||
ae4735db | 339 | let parse_gen parsefunc s = |
485bce71 | 340 | let toks = tokens_of_string s +> List.filter TH.is_not_comment in |
34e49164 C |
341 | |
342 | ||
343 | (* Why use this lexing scheme ? Why not classically give lexer func | |
ae4735db | 344 | * to parser ? Because I now keep comments in lexer. Could |
34e49164 C |
345 | * just do a simple wrapper that when comment ask again for a token, |
346 | * but maybe simpler to use cur_tok technique. | |
347 | *) | |
348 | let all_tokens = ref toks in | |
349 | let cur_tok = ref (List.hd !all_tokens) in | |
350 | ||
ae4735db C |
351 | let lexer_function = |
352 | (fun _ -> | |
34e49164 | 353 | if TH.is_eof !cur_tok |
708f4980 | 354 | then (pr2_err "LEXER: ALREADY AT END"; !cur_tok) |
34e49164 C |
355 | else |
356 | let v = Common.pop2 all_tokens in | |
357 | cur_tok := v; | |
358 | !cur_tok | |
ae4735db | 359 | ) |
34e49164 C |
360 | in |
361 | let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in | |
362 | let result = parsefunc lexer_function lexbuf_fake in | |
363 | result | |
364 | ||
365 | ||
366 | let type_of_string = parse_gen Parser_c.type_name | |
367 | let statement_of_string = parse_gen Parser_c.statement | |
368 | let expression_of_string = parse_gen Parser_c.expr | |
369 | ||
370 | (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *) | |
371 | ||
372 | ||
373 | ||
374 | ||
375 | ||
34e49164 | 376 | (*****************************************************************************) |
485bce71 | 377 | (* Parsing default define macros, usually in a standard.h file *) |
34e49164 C |
378 | (*****************************************************************************) |
379 | ||
ae4735db C |
380 | let extract_macros2 file = |
381 | Common.save_excursion Flag_parsing_c.verbose_lexing (fun () -> | |
708f4980 C |
382 | Flag_parsing_c.verbose_lexing := false; |
383 | let toks = tokens ~profile:false file in | |
978fd7e5 C |
384 | let toks = Parsing_hacks.fix_tokens_define toks in |
385 | Cpp_token_c.extract_macros toks | |
708f4980 | 386 | ) |
34e49164 | 387 | |
ae4735db | 388 | let extract_macros a = |
978fd7e5 | 389 | Common.profile_code_exclusif "HACK" (fun () -> extract_macros2 a) |
485bce71 | 390 | |
34e49164 C |
391 | |
392 | (*****************************************************************************) | |
978fd7e5 | 393 | (* Helper for main entry point *) |
34e49164 C |
394 | (*****************************************************************************) |
395 | ||
34e49164 C |
396 | |
397 | (* The use of local refs (remaining_tokens, passed_tokens, ...) makes | |
398 | * possible error recovery. Indeed, they allow to skip some tokens and | |
399 | * still be able to call again the ocamlyacc parser. It is ugly code | |
400 | * because we cant modify ocamllex and ocamlyacc. As we want some | |
401 | * extended lexing tricks, we have to use such refs. | |
ae4735db | 402 | * |
34e49164 C |
403 | * Those refs are now also used for my lalr(k) technique. Indeed They |
404 | * store the futur and previous tokens that were parsed, and so | |
405 | * provide enough context information for powerful lex trick. | |
ae4735db | 406 | * |
34e49164 | 407 | * - passed_tokens_last_ckp stores the passed tokens since last |
485bce71 | 408 | * checkpoint. Used for NotParsedCorrectly and also to build the |
34e49164 C |
409 | * info_item attached to each program_element. |
410 | * - passed_tokens_clean is used for lookahead, in fact for lookback. | |
411 | * - remaining_tokens_clean is used for lookahead. Now remaining_tokens | |
412 | * contain some comments and so would make pattern matching difficult | |
ae4735db | 413 | * in lookahead. Hence this variable. We would like also to get rid |
34e49164 C |
414 | * of cpp instruction because sometimes a cpp instruction is between |
415 | * two tokens and makes a pattern matching fail. But lookahead also | |
416 | * transform some cpp instruction (in comment) so can't remove them. | |
ae4735db | 417 | * |
34e49164 C |
418 | * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens, |
419 | * whereas passed_tokens_clean and remaining_tokens_clean does not contain | |
420 | * comment-tokens. | |
ae4735db | 421 | * |
34e49164 | 422 | * Normally we have: |
ae4735db | 423 | * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens |
34e49164 | 424 | * after the call to pop2. |
ae4735db | 425 | * toks = (reverse passed_tok) ++ remaining_tokens |
34e49164 C |
426 | * at the and of the lexer_function call. |
427 | * At the very beginning, cur_tok and remaining_tokens overlap, but not after. | |
428 | * At the end of lexer_function call, cur_tok overlap with passed_tok. | |
ae4735db | 429 | * |
34e49164 | 430 | * convention: I use "tr" for "tokens refs" |
ae4735db | 431 | * |
485bce71 C |
432 | * I now also need this lexing trick because the lexer return comment |
433 | * tokens. | |
34e49164 C |
434 | *) |
435 | ||
436 | type tokens_state = { | |
437 | mutable rest : Parser_c.token list; | |
438 | mutable rest_clean : Parser_c.token list; | |
439 | mutable current : Parser_c.token; | |
440 | (* it's passed since last "checkpoint", not passed from the beginning *) | |
441 | mutable passed : Parser_c.token list; | |
442 | mutable passed_clean : Parser_c.token list; | |
443 | } | |
708f4980 | 444 | |
ae4735db C |
445 | let mk_tokens_state toks = |
446 | { | |
708f4980 C |
447 | rest = toks; |
448 | rest_clean = (toks +> List.filter TH.is_not_comment); | |
449 | current = (List.hd toks); | |
ae4735db | 450 | passed = []; |
708f4980 C |
451 | passed_clean = []; |
452 | } | |
453 | ||
454 | ||
455 | ||
ae4735db | 456 | let clone_tokens_state tr = |
485bce71 C |
457 | { rest = tr.rest; |
458 | rest_clean = tr.rest_clean; | |
459 | current = tr.current; | |
460 | passed = tr.passed; | |
461 | passed_clean = tr.passed_clean; | |
462 | } | |
ae4735db | 463 | let copy_tokens_state ~src ~dst = |
485bce71 C |
464 | dst.rest <- src.rest; |
465 | dst.rest_clean <- src.rest_clean; | |
466 | dst.current <- src.current; | |
467 | dst.passed <- src.passed; | |
468 | dst.passed_clean <- src.passed_clean; | |
469 | () | |
470 | ||
708f4980 | 471 | (* todo? agglomerate the x##b ? *) |
485bce71 C |
472 | let rec filter_noise n xs = |
473 | match n, xs with | |
474 | | _, [] -> [] | |
475 | | 0, xs -> xs | |
ae4735db | 476 | | n, x::xs -> |
485bce71 | 477 | (match x with |
ae4735db | 478 | | Parser_c.TMacroAttr _ -> |
485bce71 | 479 | filter_noise (n-1) xs |
ae4735db | 480 | | _ -> |
485bce71 C |
481 | x::filter_noise (n-1) xs |
482 | ) | |
483 | ||
ae4735db | 484 | let clean_for_lookahead xs = |
485bce71 C |
485 | match xs with |
486 | | [] -> [] | |
487 | | [x] -> [x] | |
ae4735db | 488 | | x::xs -> |
485bce71 C |
489 | x::filter_noise 10 xs |
490 | ||
34e49164 | 491 | |
485bce71 | 492 | |
ae4735db | 493 | (* Hacked lex. This function use refs passed by parse_print_error_heuristic |
485bce71 C |
494 | * tr means token refs. |
495 | *) | |
ae4735db | 496 | let rec lexer_function ~pass tr = fun lexbuf -> |
34e49164 | 497 | match tr.rest with |
708f4980 | 498 | | [] -> pr2_err "ALREADY AT END"; tr.current |
ae4735db | 499 | | v::xs -> |
34e49164 C |
500 | tr.rest <- xs; |
501 | tr.current <- v; | |
502 | ||
503 | if !Flag_parsing_c.debug_lexer then Common.pr2_gen v; | |
504 | ||
505 | if TH.is_comment v | |
506 | then begin | |
507 | tr.passed <- v::tr.passed; | |
485bce71 | 508 | lexer_function ~pass tr lexbuf |
34e49164 C |
509 | end |
510 | else begin | |
511 | let x = List.hd tr.rest_clean in | |
512 | tr.rest_clean <- List.tl tr.rest_clean; | |
b1b2de81 | 513 | assert (x =*= v); |
ae4735db | 514 | |
34e49164 | 515 | (match v with |
113803cf | 516 | |
ae4735db | 517 | (* fix_define1. |
113803cf C |
518 | * |
519 | * Why not in parsing_hacks lookahead and do passing like | |
ae4735db C |
520 | * I do for some ifdef directives ? Because here I also need to |
521 | * generate some tokens sometimes and so I need access to the | |
113803cf | 522 | * tr.passed, tr.rest, etc. |
485bce71 | 523 | *) |
9f8e26f4 | 524 | | Parser_c.TDefine (tok) -> |
ae4735db | 525 | if not (LP.current_context () =*= LP.InTopLevel) && |
708f4980 | 526 | (!Flag_parsing_c.cpp_directive_passing || (pass >= 2)) |
34e49164 | 527 | then begin |
485bce71 | 528 | incr Stat.nDefinePassing; |
34e49164 | 529 | pr2_once ("CPP-DEFINE: inside function, I treat it as comment"); |
3a314143 C |
530 | let v' = |
531 | Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v) | |
532 | in | |
533 | tr.passed <- v'::tr.passed; | |
534 | tr.rest <- Parsing_hacks.comment_until_defeol tr.rest; | |
535 | tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean; | |
536 | lexer_function ~pass tr lexbuf | |
537 | end | |
538 | else begin | |
539 | tr.passed <- v::tr.passed; | |
540 | tr.passed_clean <- v::tr.passed_clean; | |
541 | v | |
542 | end | |
543 | ||
544 | | Parser_c.TUndef (tok) -> | |
545 | if not (LP.current_context () =*= LP.InTopLevel) && | |
546 | (!Flag_parsing_c.cpp_directive_passing || (pass >= 2)) | |
547 | then begin | |
548 | incr Stat.nUndefPassing; | |
549 | pr2_once ("CPP-UNDEF: inside function, I treat it as comment"); | |
550 | let v' = | |
551 | Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v) | |
34e49164 C |
552 | in |
553 | tr.passed <- v'::tr.passed; | |
978fd7e5 C |
554 | tr.rest <- Parsing_hacks.comment_until_defeol tr.rest; |
555 | tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean; | |
485bce71 | 556 | lexer_function ~pass tr lexbuf |
34e49164 C |
557 | end |
558 | else begin | |
559 | tr.passed <- v::tr.passed; | |
560 | tr.passed_clean <- v::tr.passed_clean; | |
561 | v | |
562 | end | |
ae4735db C |
563 | |
564 | | Parser_c.TInclude (includes, filename, inifdef, info) -> | |
b1b2de81 | 565 | if not (LP.current_context () =*= LP.InTopLevel) && |
708f4980 | 566 | (!Flag_parsing_c.cpp_directive_passing || (pass >= 2)) |
34e49164 | 567 | then begin |
485bce71 | 568 | incr Stat.nIncludePassing; |
34e49164 | 569 | pr2_once ("CPP-INCLUDE: inside function, I treat it as comment"); |
0708f913 | 570 | let v = Parser_c.TCommentCpp(Token_c.CppDirective, info) in |
34e49164 | 571 | tr.passed <- v::tr.passed; |
485bce71 | 572 | lexer_function ~pass tr lexbuf |
34e49164 C |
573 | end |
574 | else begin | |
ae4735db | 575 | let (v,new_tokens) = |
3a314143 | 576 | Parsing_hacks.tokens_include(info, includes, filename, inifdef) in |
ae4735db | 577 | let new_tokens_clean = |
34e49164 C |
578 | new_tokens +> List.filter TH.is_not_comment in |
579 | ||
580 | tr.passed <- v::tr.passed; | |
581 | tr.passed_clean <- v::tr.passed_clean; | |
582 | tr.rest <- new_tokens ++ tr.rest; | |
583 | tr.rest_clean <- new_tokens_clean ++ tr.rest_clean; | |
584 | v | |
585 | end | |
ae4735db C |
586 | |
587 | | _ -> | |
588 | ||
34e49164 C |
589 | (* typedef_fix1 *) |
590 | let v = match v with | |
ae4735db C |
591 | | Parser_c.TIdent (s, ii) -> |
592 | if | |
593 | LP.is_typedef s && | |
485bce71 | 594 | not (!Flag_parsing_c.disable_add_typedef) && |
b1b2de81 | 595 | pass =|= 1 |
34e49164 C |
596 | then Parser_c.TypedefIdent (s, ii) |
597 | else Parser_c.TIdent (s, ii) | |
598 | | x -> x | |
599 | in | |
ae4735db | 600 | |
485bce71 C |
601 | let v = Parsing_hacks.lookahead ~pass |
602 | (clean_for_lookahead (v::tr.rest_clean)) | |
603 | tr.passed_clean in | |
34e49164 C |
604 | |
605 | tr.passed <- v::tr.passed; | |
ae4735db | 606 | |
485bce71 | 607 | (* the lookahead may have changed the status of the token and |
34e49164 | 608 | * consider it as a comment, for instance some #include are |
485bce71 | 609 | * turned into comments, hence this code. *) |
34e49164 | 610 | match v with |
485bce71 | 611 | | Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf |
ae4735db | 612 | | v -> |
34e49164 C |
613 | tr.passed_clean <- v::tr.passed_clean; |
614 | v | |
615 | ) | |
616 | end | |
617 | ||
618 | ||
708f4980 C |
619 | let max_pass = 4 |
620 | ||
34e49164 | 621 | |
ae4735db | 622 | let get_one_elem ~pass tr (file, filelines) = |
485bce71 C |
623 | |
624 | if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef | |
708f4980 | 625 | then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn"; |
485bce71 C |
626 | |
627 | (* normally have to do that only when come from an exception in which | |
ae4735db | 628 | * case the dt() may not have been done |
485bce71 C |
629 | * TODO but if was in scoped scope ? have to let only the last scope |
630 | * so need do a LP.lexer_reset_typedef (); | |
631 | *) | |
ae4735db | 632 | LP.enable_typedef(); |
485bce71 C |
633 | LP._lexer_hint := (LP.default_hint ()); |
634 | LP.save_typedef_state(); | |
635 | ||
636 | tr.passed <- []; | |
637 | ||
638 | let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in | |
ae4735db C |
639 | |
640 | (try | |
485bce71 C |
641 | (* -------------------------------------------------- *) |
642 | (* Call parser *) | |
643 | (* -------------------------------------------------- *) | |
ae4735db | 644 | Common.profile_code_exclusif "YACC" (fun () -> |
c491d8ee | 645 | Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake) |
485bce71 | 646 | ) |
ae4735db | 647 | with e -> |
485bce71 C |
648 | LP.restore_typedef_state(); |
649 | ||
650 | (* must keep here, before the code that adjusts the tr fields *) | |
651 | let line_error = TH.line_of_tok tr.current in | |
708f4980 C |
652 | |
653 | let passed_before_error = tr.passed in | |
654 | let current = tr.current in | |
485bce71 | 655 | (* error recovery, go to next synchro point *) |
ae4735db | 656 | let (passed', rest') = |
978fd7e5 | 657 | Parsing_recovery_c.find_next_synchro tr.rest tr.passed in |
485bce71 C |
658 | tr.rest <- rest'; |
659 | tr.passed <- passed'; | |
ae4735db | 660 | |
485bce71 C |
661 | tr.current <- List.hd passed'; |
662 | tr.passed_clean <- []; (* enough ? *) | |
663 | (* with error recovery, rest and rest_clean may not be in sync *) | |
664 | tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment); | |
ae4735db C |
665 | |
666 | ||
667 | let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in | |
668 | Right (info_of_bads, line_error, | |
669 | tr.passed, passed_before_error, | |
708f4980 | 670 | current, e) |
485bce71 C |
671 | ) |
672 | ||
673 | ||
674 | ||
978fd7e5 C |
675 | (* Macro problem recovery *) |
676 | (* used by the multi-pass error recovery expand-on-demand *) | |
677 | (* | |
ae4735db C |
678 | val candidate_macros_in_passed: |
679 | defs: (string, define_def) Hashtbl.t -> | |
978fd7e5 C |
680 | Parser_c.token list -> (string * define_def) list |
681 | *) | |
682 | ||
ae4735db | 683 | let candidate_macros_in_passed2 ~defs passed = |
978fd7e5 C |
684 | let res = ref [] in |
685 | let res2 = ref [] in | |
686 | ||
687 | passed +> List.iter (function | |
688 | | Parser_c.TIdent (s,_) | |
689 | (* bugfix: may have to undo some infered things *) | |
690 | | Parser_c.TMacroIterator (s,_) | |
691 | | Parser_c.TypedefIdent (s,_) | |
ae4735db | 692 | -> |
978fd7e5 | 693 | (match Common.hfind_option s defs with |
ae4735db C |
694 | | Some def -> |
695 | if s ==~ Parsing_hacks.regexp_macro | |
978fd7e5 C |
696 | then |
697 | (* pr2 (spf "candidate: %s" s); *) | |
ae4735db C |
698 | Common.push2 (s, def) res |
699 | else | |
978fd7e5 C |
700 | Common.push2 (s, def) res2 |
701 | | None -> () | |
702 | ) | |
703 | ||
704 | | _ -> () | |
705 | ); | |
ae4735db C |
706 | if null !res |
707 | then !res2 | |
978fd7e5 C |
708 | else !res |
709 | ||
ae4735db C |
710 | let candidate_macros_in_passed ~defs b = |
711 | Common.profile_code "MACRO managment" (fun () -> | |
978fd7e5 | 712 | candidate_macros_in_passed2 ~defs b) |
ae4735db | 713 | |
978fd7e5 C |
714 | |
715 | ||
716 | ||
717 | ||
718 | let find_optional_macro_to_expand2 ~defs toks = | |
719 | ||
720 | let defs = Common.hash_of_list defs in | |
721 | ||
c491d8ee | 722 | let toks = toks +> Common.tail_map (function |
978fd7e5 C |
723 | |
724 | (* special cases to undo *) | |
ae4735db | 725 | | Parser_c.TMacroIterator (s, ii) -> |
978fd7e5 C |
726 | if Hashtbl.mem defs s |
727 | then Parser_c.TIdent (s, ii) | |
728 | else Parser_c.TMacroIterator (s, ii) | |
729 | ||
ae4735db | 730 | | Parser_c.TypedefIdent (s, ii) -> |
978fd7e5 C |
731 | if Hashtbl.mem defs s |
732 | then Parser_c.TIdent (s, ii) | |
733 | else Parser_c.TypedefIdent (s, ii) | |
734 | ||
735 | | x -> x | |
736 | ) in | |
737 | ||
738 | let tokens = toks in | |
739 | Parsing_hacks.fix_tokens_cpp ~macro_defs:defs tokens | |
740 | ||
741 | (* just calling apply_macro_defs and having a specialized version | |
ae4735db | 742 | * of the code in fix_tokens_cpp is not enough as some work such |
978fd7e5 C |
743 | * as the passing of the body of attribute in Parsing_hacks.find_macro_paren |
744 | * will not get the chance to be run on the new expanded tokens. | |
ae4735db | 745 | * Hence even if it's expensive, it's currently better to |
978fd7e5 C |
746 | * just call directly fix_tokens_cpp again here. |
747 | ||
748 | let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in | |
749 | let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in | |
750 | let paren_grouped = TV.mk_parenthised cleaner in | |
751 | Cpp_token_c.apply_macro_defs | |
752 | ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s)) | |
753 | ~msg_apply_known_macro_hint:(fun s -> pr2 "hint") | |
754 | defs paren_grouped; | |
755 | (* because the before field is used by apply_macro_defs *) | |
ae4735db C |
756 | tokens2 := TV.rebuild_tokens_extented !tokens2; |
757 | Parsing_hacks.insert_virtual_positions | |
978fd7e5 C |
758 | (!tokens2 +> Common.acc_map (fun x -> x.TV.tok)) |
759 | *) | |
ae4735db C |
760 | let find_optional_macro_to_expand ~defs a = |
761 | Common.profile_code "MACRO managment" (fun () -> | |
978fd7e5 | 762 | find_optional_macro_to_expand2 ~defs a) |
ae4735db | 763 | |
978fd7e5 C |
764 | |
765 | ||
766 | ||
767 | ||
768 | (*****************************************************************************) | |
769 | (* Main entry points *) | |
770 | (*****************************************************************************) | |
771 | ||
ae4735db | 772 | let (_defs : (string, Cpp_token_c.define_def) Hashtbl.t ref) = |
978fd7e5 C |
773 | ref (Hashtbl.create 101) |
774 | ||
ae4735db | 775 | let (_defs_builtins : (string, Cpp_token_c.define_def) Hashtbl.t ref) = |
978fd7e5 C |
776 | ref (Hashtbl.create 101) |
777 | ||
778 | ||
779 | (* can not be put in parsing_hack, cos then mutually recursive problem as | |
780 | * we also want to parse the standard.h file. | |
781 | *) | |
ae4735db | 782 | let init_defs_macros std_h = |
978fd7e5 C |
783 | if not (Common.lfile_exists std_h) |
784 | then pr2 ("warning: Can't find default macro file: " ^ std_h) | |
785 | else begin | |
786 | pr2 ("init_defs: " ^ std_h); | |
787 | _defs := Common.hash_of_list (extract_macros std_h); | |
788 | end | |
789 | ||
ae4735db | 790 | let init_defs_builtins file_h = |
978fd7e5 C |
791 | if not (Common.lfile_exists file_h) |
792 | then pr2 ("warning: Can't find macro file: " ^ file_h) | |
793 | else begin | |
794 | pr2 ("init_defs_builtins: " ^ file_h); | |
ae4735db | 795 | _defs_builtins := |
978fd7e5 C |
796 | Common.hash_of_list (extract_macros file_h); |
797 | end | |
798 | ||
799 | ||
800 | ||
801 | type info_item = string * Parser_c.token list | |
802 | ||
803 | type program2 = toplevel2 list | |
ca417fcf C |
804 | and extended_program2 = toplevel2 list * |
805 | (string, Lexer_parser.identkind) Common.scoped_h_env (* type defs *) * | |
806 | (string, Cpp_token_c.define_def) Hashtbl.t (* macro defs *) | |
807 | and toplevel2 = Ast_c.toplevel * info_item | |
978fd7e5 | 808 | |
ae4735db | 809 | let program_of_program2 xs = |
978fd7e5 C |
810 | xs +> List.map fst |
811 | ||
ae4735db C |
812 | let with_program2 f program2 = |
813 | program2 | |
814 | +> Common.unzip | |
815 | +> (fun (program, infos) -> | |
978fd7e5 C |
816 | f program, infos |
817 | ) | |
818 | +> Common.uncurry Common.zip | |
819 | ||
820 | ||
821 | ||
822 | ||
823 | ||
485bce71 | 824 | |
34e49164 | 825 | (* note: as now we go in 2 passes, there is first all the error message of |
485bce71 | 826 | * the lexer, and then the error of the parser. It is not anymore |
34e49164 | 827 | * interwinded. |
ae4735db | 828 | * |
34e49164 C |
829 | * !!!This function use refs, and is not reentrant !!! so take care. |
830 | * It use globals defined in Lexer_parser and also the _defs global | |
ae4735db C |
831 | * in parsing_hack.ml. |
832 | * | |
485bce71 C |
833 | * This function uses internally some semi globals in the |
834 | * tokens_stat record and parsing_stat record. | |
34e49164 C |
835 | *) |
836 | ||
ca417fcf | 837 | let parse_print_error_heuristic2 saved_typedefs saved_macros file = |
34e49164 | 838 | |
91eba41f | 839 | let filelines = Common.cat_array file in |
485bce71 C |
840 | let stat = Parsing_stat.default_stat file in |
841 | ||
34e49164 C |
842 | (* -------------------------------------------------- *) |
843 | (* call lexer and get all the tokens *) | |
844 | (* -------------------------------------------------- *) | |
993936c0 | 845 | |
ca417fcf | 846 | LP.lexer_reset_typedef saved_typedefs; |
485bce71 | 847 | Parsing_hacks.ifdef_paren_cnt := 0; |
708f4980 | 848 | |
485bce71 | 849 | let toks_orig = tokens file in |
978fd7e5 | 850 | let toks = Parsing_hacks.fix_tokens_define toks_orig in |
708f4980 | 851 | let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs:!_defs_builtins toks in |
34e49164 | 852 | |
708f4980 | 853 | (* expand macros on demand trick, preparation phase *) |
ae4735db C |
854 | let macros = |
855 | Common.profile_code "MACRO mgmt prep 1" (fun () -> | |
ca417fcf C |
856 | let macros = |
857 | match saved_macros with None -> Hashtbl.copy !_defs | Some h -> h in | |
708f4980 C |
858 | (* include also builtins as some macros may generate some builtins too |
859 | * like __decl_spec or __stdcall | |
860 | *) | |
ae4735db | 861 | !_defs_builtins +> Hashtbl.iter (fun s def -> |
708f4980 C |
862 | Hashtbl.replace macros s def; |
863 | ); | |
864 | macros | |
865 | ) | |
866 | in | |
ae4735db | 867 | Common.profile_code "MACRO mgmt prep 2" (fun () -> |
978fd7e5 | 868 | let local_macros = extract_macros file in |
ae4735db | 869 | local_macros +> List.iter (fun (s, def) -> |
708f4980 C |
870 | Hashtbl.replace macros s def; |
871 | ); | |
872 | ); | |
34e49164 | 873 | |
708f4980 | 874 | let tr = mk_tokens_state toks in |
485bce71 C |
875 | |
876 | let rec loop tr = | |
34e49164 C |
877 | |
878 | (* todo?: I am not sure that it represents current_line, cos maybe | |
879 | * tr.current partipated in the previous parsing phase, so maybe tr.current | |
880 | * is not the first token of the next parsing phase. Same with checkpoint2. | |
881 | * It would be better to record when we have a } or ; in parser.mly, | |
882 | * cos we know that they are the last symbols of external_declaration2. | |
485bce71 C |
883 | * |
884 | * bugfix: may not be equal to 'file' as after macro expansions we can | |
885 | * start to parse a new entity from the body of a macro, for instance | |
886 | * when parsing a define_machine() body, cf standard.h | |
34e49164 C |
887 | *) |
888 | let checkpoint = TH.line_of_tok tr.current in | |
485bce71 | 889 | let checkpoint_file = TH.file_of_tok tr.current in |
34e49164 | 890 | |
485bce71 | 891 | (* call the parser *) |
ae4735db C |
892 | let elem = |
893 | let pass1 = | |
894 | Common.profile_code "Parsing: 1st pass" (fun () -> | |
708f4980 C |
895 | get_one_elem ~pass:1 tr (file, filelines) |
896 | ) in | |
485bce71 C |
897 | match pass1 with |
898 | | Left e -> Left e | |
ae4735db | 899 | | Right (info,line_err, passed, passed_before_error, cur, exn) -> |
708f4980 C |
900 | if !Flag_parsing_c.disable_multi_pass |
901 | then pass1 | |
485bce71 | 902 | else begin |
ae4735db | 903 | Common.profile_code "Parsing: multi pass" (fun () -> |
708f4980 C |
904 | |
905 | pr2_err "parsing pass2: try again"; | |
906 | let toks = List.rev passed ++ tr.rest in | |
907 | let new_tr = mk_tokens_state toks in | |
908 | copy_tokens_state ~src:new_tr ~dst:tr; | |
909 | let passx = get_one_elem ~pass:2 tr (file, filelines) in | |
910 | ||
911 | (match passx with | |
912 | | Left e -> passx | |
ae4735db C |
913 | | Right (info,line_err,passed,passed_before_error,cur,exn) -> |
914 | let candidates = | |
915 | candidate_macros_in_passed ~defs:macros passed | |
708f4980 | 916 | in |
ae4735db | 917 | |
978fd7e5 | 918 | |
708f4980 C |
919 | if is_define_passed passed || null candidates |
920 | then passx | |
921 | else begin | |
922 | (* todo factorize code *) | |
923 | ||
924 | pr2_err "parsing pass3: try again"; | |
925 | let toks = List.rev passed ++ tr.rest in | |
ae4735db | 926 | let toks' = |
708f4980 C |
927 | find_optional_macro_to_expand ~defs:candidates toks in |
928 | let new_tr = mk_tokens_state toks' in | |
929 | copy_tokens_state ~src:new_tr ~dst:tr; | |
930 | let passx = get_one_elem ~pass:3 tr (file, filelines) in | |
931 | ||
932 | (match passx with | |
933 | | Left e -> passx | |
ae4735db | 934 | | Right (info,line_err,passed,passed_before_error,cur,exn) -> |
708f4980 C |
935 | pr2_err "parsing pass4: try again"; |
936 | ||
ae4735db C |
937 | let candidates = |
938 | candidate_macros_in_passed | |
939 | ~defs:macros passed | |
978fd7e5 | 940 | in |
708f4980 C |
941 | |
942 | let toks = List.rev passed ++ tr.rest in | |
ae4735db | 943 | let toks' = |
708f4980 C |
944 | find_optional_macro_to_expand ~defs:candidates toks in |
945 | let new_tr = mk_tokens_state toks' in | |
946 | copy_tokens_state ~src:new_tr ~dst:tr; | |
947 | let passx = get_one_elem ~pass:4 tr (file, filelines) in | |
948 | passx | |
949 | ) | |
950 | end | |
951 | ) | |
952 | ) | |
485bce71 C |
953 | end |
954 | in | |
955 | ||
956 | ||
957 | (* again not sure if checkpoint2 corresponds to end of bad region *) | |
958 | let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *) | |
959 | let checkpoint2_file = TH.file_of_tok tr.current in | |
960 | ||
ae4735db | 961 | let diffline = |
b1b2de81 | 962 | if (checkpoint_file =$= checkpoint2_file) && (checkpoint_file =$= file) |
ae4735db | 963 | then (checkpoint2 - checkpoint) |
485bce71 C |
964 | else 0 |
965 | (* TODO? so if error come in middle of something ? where the | |
966 | * start token was from original file but synchro found in body | |
967 | * of macro ? then can have wrong number of lines stat. | |
968 | * Maybe simpler just to look at tr.passed and count | |
969 | * the lines in the token from the correct file ? | |
970 | *) | |
34e49164 | 971 | in |
ae4735db | 972 | let info = mk_info_item file (List.rev tr.passed) in |
34e49164 | 973 | |
485bce71 | 974 | (* some stat updates *) |
ae4735db | 975 | stat.Stat.commentized <- |
485bce71 C |
976 | stat.Stat.commentized + count_lines_commentized (snd info); |
977 | ||
ae4735db | 978 | let elem = |
485bce71 | 979 | match elem with |
ae4735db | 980 | | Left e -> |
91eba41f C |
981 | stat.Stat.correct <- stat.Stat.correct + diffline; |
982 | e | |
ae4735db C |
983 | | Right (info_of_bads, line_error, toks_of_bads, |
984 | _passed_before_error, cur, exn) -> | |
708f4980 C |
985 | |
986 | let was_define = is_define_passed tr.passed in | |
ae4735db | 987 | |
708f4980 C |
988 | if was_define && !Flag_parsing_c.filter_msg_define_error |
989 | then () | |
990 | else begin | |
991 | ||
992 | (match exn with | |
ae4735db | 993 | | Lexer_c.Lexical _ |
708f4980 C |
994 | | Parsing.Parse_error |
995 | | Semantic_c.Semantic _ -> () | |
996 | | e -> raise e | |
997 | ); | |
998 | ||
999 | if !Flag_parsing_c.show_parsing_error | |
ae4735db | 1000 | then begin |
708f4980 C |
1001 | (match exn with |
1002 | (* Lexical is not anymore launched I think *) | |
ae4735db | 1003 | | Lexer_c.Lexical s -> |
708f4980 | 1004 | pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok cur) |
ae4735db | 1005 | | Parsing.Parse_error -> |
708f4980 | 1006 | pr2 ("parse error \n = " ^ error_msg_tok cur) |
ae4735db | 1007 | | Semantic_c.Semantic (s, i) -> |
708f4980 C |
1008 | pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok cur) |
1009 | | e -> raise Impossible | |
1010 | ); | |
1011 | (* bugfix: *) | |
ae4735db | 1012 | if (checkpoint_file =$= checkpoint2_file) && |
708f4980 C |
1013 | checkpoint_file =$= file |
1014 | then print_bad line_error (checkpoint, checkpoint2) filelines | |
1015 | else pr2 "PB: bad: but on tokens not from original file" | |
1016 | end; | |
1017 | ||
ae4735db C |
1018 | |
1019 | let pbline = | |
1020 | toks_of_bads | |
708f4980 | 1021 | +> Common.filter (TH.is_same_line_or_close line_error) |
ae4735db | 1022 | +> Common.filter TH.is_ident_like |
708f4980 | 1023 | in |
ae4735db | 1024 | let error_info = |
708f4980 C |
1025 | (pbline +> List.map TH.str_of_tok), line_error |
1026 | in | |
ae4735db | 1027 | stat.Stat.problematic_lines <- |
708f4980 C |
1028 | error_info::stat.Stat.problematic_lines; |
1029 | ||
1030 | end; | |
1031 | ||
91eba41f C |
1032 | if was_define && !Flag_parsing_c.filter_define_error |
1033 | then stat.Stat.correct <- stat.Stat.correct + diffline | |
1034 | else stat.Stat.bad <- stat.Stat.bad + diffline; | |
1035 | ||
485bce71 C |
1036 | Ast_c.NotParsedCorrectly info_of_bads |
1037 | in | |
34e49164 C |
1038 | |
1039 | (match elem with | |
1040 | | Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)] | |
485bce71 | 1041 | | xs -> (xs, info):: loop tr (* recurse *) |
34e49164 C |
1042 | ) |
1043 | in | |
485bce71 | 1044 | let v = loop tr in |
978fd7e5 | 1045 | let v = with_program2 Parsing_consistency_c.consistency_checking v in |
ca417fcf C |
1046 | let v = |
1047 | let new_td = ref (Common.clone_scoped_h_env !LP._typedef) in | |
1048 | Common.clean_scope_h new_td; | |
1049 | (v, !new_td, macros) in | |
34e49164 C |
1050 | (v, stat) |
1051 | ||
1052 | ||
ca417fcf C |
1053 | let time_total_parsing a b = |
1054 | Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a b) | |
485bce71 | 1055 | |
ca417fcf C |
1056 | let parse_print_error_heuristic a b = |
1057 | Common.profile_code "C parsing" (fun () -> time_total_parsing a b) | |
485bce71 | 1058 | |
34e49164 C |
1059 | |
1060 | (* alias *) | |
ca417fcf C |
1061 | let parse_c_and_cpp a = |
1062 | let ((c,_,_),stat) = parse_print_error_heuristic None None a in (c,stat) | |
1063 | let parse_c_and_cpp_keep_typedefs td macs a = | |
1064 | parse_print_error_heuristic td macs a | |
34e49164 C |
1065 | |
1066 | (*****************************************************************************) | |
1067 | (* Same but faster cos memoize stuff *) | |
1068 | (*****************************************************************************) | |
ae4735db | 1069 | let parse_cache file = |
ca417fcf C |
1070 | if not !Flag_parsing_c.use_cache |
1071 | then parse_print_error_heuristic None None file | |
ae4735db | 1072 | else |
17ba0788 | 1073 | let _ = pr2_once "TOFIX: use_cache is not sensitive to changes in the considered macros, include files, etc" in |
ae4735db | 1074 | let need_no_changed_files = |
34e49164 | 1075 | (* should use Sys.argv.(0), would be safer. *) |
485bce71 C |
1076 | |
1077 | [ | |
1078 | (* TOFIX | |
1079 | Config.path ^ "/parsing_c/c_parser.cma"; | |
ae4735db | 1080 | (* we may also depend now on the semantic patch because |
485bce71 | 1081 | the SP may use macro and so we will disable some of the |
ae4735db | 1082 | macro expansions from standard.h. |
485bce71 C |
1083 | *) |
1084 | !Config.std_h; | |
1085 | *) | |
f3c4ece6 | 1086 | ] in |
ae4735db | 1087 | let need_no_changed_variables = |
34e49164 | 1088 | (* could add some of the flags of flag_parsing_c.ml *) |
f3c4ece6 | 1089 | [] in |
5427db06 | 1090 | Common.cache_computation_robust_in_dir |
f3c4ece6 | 1091 | !Flag_parsing_c.cache_prefix file ".ast_raw" |
ae4735db | 1092 | (need_no_changed_files, need_no_changed_variables) ".depend_raw" |
f3c4ece6 C |
1093 | (fun () -> |
1094 | (* check whether to clear the cache *) | |
1095 | (match (!Flag_parsing_c.cache_limit,!Flag_parsing_c.cache_prefix) with | |
1096 | (None,_) | (_,None) -> () | |
1097 | | (Some limit,Some prefix) -> | |
1098 | let count = | |
1099 | Common.cmd_to_list | |
17ba0788 C |
1100 | (Printf.sprintf "test -e %s && find %s -name \"*_raw\" | wc -l" |
1101 | prefix prefix) in | |
f3c4ece6 C |
1102 | match count with |
1103 | [c] -> | |
1104 | if int_of_string c >= limit | |
1105 | then | |
1106 | let _ = | |
1107 | Sys.command | |
17ba0788 C |
1108 | (Printf.sprintf |
1109 | "find %s -name \"*_raw\" -exec /bin/rm {} \\;" | |
1110 | prefix) in | |
f3c4ece6 C |
1111 | () |
1112 | | _ -> ()); | |
1113 | (* recompute *) | |
1114 | parse_print_error_heuristic None None file) | |
34e49164 C |
1115 | |
1116 | ||
1117 | ||
1118 | (*****************************************************************************) | |
485bce71 | 1119 | (* Some special cases *) |
34e49164 C |
1120 | (*****************************************************************************) |
1121 | ||
485bce71 | 1122 | let (cstatement_of_string: string -> Ast_c.statement) = fun s -> |
708f4980 C |
1123 | let tmpfile = Common.new_temp_file "cocci_stmt_of_s" "c" in |
1124 | Common.write_file tmpfile ("void main() { \n" ^ s ^ "\n}"); | |
1125 | let program = parse_c_and_cpp tmpfile +> fst in | |
ae4735db | 1126 | program +> Common.find_some (fun (e,_) -> |
485bce71 C |
1127 | match e with |
1128 | | Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st | |
1129 | | _ -> None | |
1130 | ) | |
1131 | ||
1132 | let (cexpression_of_string: string -> Ast_c.expression) = fun s -> | |
708f4980 C |
1133 | let tmpfile = Common.new_temp_file "cocci_expr_of_s" "c" in |
1134 | Common.write_file tmpfile ("void main() { \n" ^ s ^ ";\n}"); | |
1135 | let program = parse_c_and_cpp tmpfile +> fst in | |
ae4735db | 1136 | program +> Common.find_some (fun (e,_) -> |
485bce71 | 1137 | match e with |
ae4735db | 1138 | | Ast_c.Definition ({Ast_c.f_body = compound},_) -> |
485bce71 | 1139 | (match compound with |
ae4735db | 1140 | | [Ast_c.StmtElem st] -> |
708f4980 C |
1141 | (match Ast_c.unwrap_st st with |
1142 | | Ast_c.ExprStatement (Some e) -> Some e | |
1143 | | _ -> None | |
1144 | ) | |
485bce71 C |
1145 | | _ -> None |
1146 | ) | |
1147 | | _ -> None | |
1148 | ) |