Commit | Line | Data |
---|---|---|
0708f913 C |
1 | (* Yoann Padioleau |
2 | * | |
3 | * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes | |
34e49164 C |
4 | * |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU General Public License (GPL) | |
7 | * version 2 as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * file license.txt for more details. | |
13 | *) | |
14 | ||
15 | open Common | |
16 | ||
17 | module TH = Token_helpers | |
18 | module LP = Lexer_parser | |
19 | ||
485bce71 C |
20 | module Stat = Parsing_stat |
21 | ||
34e49164 C |
22 | (*****************************************************************************) |
23 | (* Wrappers *) | |
24 | (*****************************************************************************) | |
708f4980 | 25 | let pr2_err, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing |
34e49164 C |
26 | |
27 | (*****************************************************************************) | |
28 | (* Helpers *) | |
29 | (*****************************************************************************) | |
30 | ||
31 | let lexbuf_to_strpos lexbuf = | |
32 | (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf) | |
33 | ||
34 | let token_to_strpos tok = | |
35 | (TH.str_of_tok tok, TH.pos_of_tok tok) | |
36 | ||
37 | ||
38 | let error_msg_tok tok = | |
39 | let file = TH.file_of_tok tok in | |
40 | if !Flag_parsing_c.verbose_parsing | |
41 | then Common.error_message file (token_to_strpos tok) | |
708f4980 | 42 | else ("error in " ^ file ^ "; set verbose_parsing for more info") |
34e49164 C |
43 | |
44 | ||
45 | let print_bad line_error (start_line, end_line) filelines = | |
34e49164 C |
46 | begin |
47 | pr2 ("badcount: " ^ i_to_s (end_line - start_line)); | |
485bce71 | 48 | |
34e49164 | 49 | for i = start_line to end_line do |
485bce71 C |
50 | let line = filelines.(i) in |
51 | ||
b1b2de81 | 52 | if i =|= line_error |
485bce71 C |
53 | then pr2 ("BAD:!!!!!" ^ " " ^ line) |
54 | else pr2 ("bad:" ^ " " ^ line) | |
34e49164 C |
55 | done |
56 | end | |
57 | ||
58 | ||
59 | ||
60 | let mk_info_item2 filename toks = | |
61 | let buf = Buffer.create 100 in | |
62 | let s = | |
63 | (* old: get_slice_file filename (line1, line2) *) | |
64 | begin | |
65 | toks +> List.iter (fun tok -> | |
66 | match TH.pinfo_of_tok tok with | |
91eba41f C |
67 | | Ast_c.OriginTok _ -> |
68 | Buffer.add_string buf (TH.str_of_tok tok) | |
69 | | Ast_c.AbstractLineTok _ -> | |
70 | raise Impossible | |
34e49164 C |
71 | | _ -> () |
72 | ); | |
73 | Buffer.contents buf | |
74 | end | |
75 | in | |
76 | (s, toks) | |
77 | ||
78 | let mk_info_item a b = | |
79 | Common.profile_code "C parsing.mk_info_item" | |
80 | (fun () -> mk_info_item2 a b) | |
81 | ||
82 | ||
91eba41f | 83 | let info_same_line line xs = |
b1b2de81 | 84 | xs +> List.filter (fun info -> Ast_c.line_of_info info =|= line) |
34e49164 | 85 | |
34e49164 C |
86 | |
87 | (*****************************************************************************) | |
88 | (* Stats on what was passed/commentized *) | |
89 | (*****************************************************************************) | |
90 | ||
91 | let commentized xs = xs +> Common.map_filter (function | |
92 | | Parser_c.TCommentCpp (cppkind, ii) -> | |
485bce71 C |
93 | let s = Ast_c.str_of_info ii in |
94 | let legal_passing = | |
95 | match !Flag_parsing_c.filter_passed_level with | |
96 | | 0 -> false | |
97 | | 1 -> | |
0708f913 | 98 | List.mem cppkind [Token_c.CppAttr] |
485bce71 C |
99 | || |
100 | (s =~ "__.*") | |
101 | | 2 -> | |
0708f913 | 102 | List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal] |
485bce71 C |
103 | || |
104 | (s =~ "__.*") | |
105 | | 3 -> | |
0708f913 | 106 | List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective] |
485bce71 C |
107 | || |
108 | (s =~ "__.*") | |
109 | | 4 -> | |
0708f913 | 110 | List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppMacro] |
485bce71 C |
111 | || |
112 | (s =~ "__.*") | |
113 | ||
114 | ||
115 | | 5 -> | |
0708f913 | 116 | List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective;Token_c.CppMacro] |
485bce71 C |
117 | || |
118 | (s =~ "__.*") | |
119 | ||
120 | ||
121 | ||
122 | ||
123 | | _ -> failwith "not valid level passing number" | |
124 | in | |
125 | if legal_passing then None else Some (ii.Ast_c.pinfo) | |
126 | ||
127 | (* | |
34e49164 | 128 | | Ast_c.CppOther -> |
34e49164 C |
129 | (match s with |
130 | | s when s =~ "KERN_.*" -> None | |
131 | | s when s =~ "__.*" -> None | |
485bce71 C |
132 | | _ -> |
133 | Some (ii.Ast_c.pinfo) | |
34e49164 | 134 | ) |
485bce71 C |
135 | *) |
136 | ||
34e49164 C |
137 | |
138 | | Parser_c.TCommentMisc ii | |
139 | | Parser_c.TAction ii | |
140 | -> | |
141 | Some (ii.Ast_c.pinfo) | |
142 | | _ -> | |
143 | None | |
144 | ) | |
145 | ||
146 | let count_lines_commentized xs = | |
147 | let line = ref (-1) in | |
148 | let count = ref 0 in | |
149 | begin | |
150 | commentized xs +> | |
151 | List.iter | |
152 | (function | |
153 | Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) -> | |
154 | let newline = pinfo.Common.line in | |
155 | if newline <> !line | |
156 | then begin | |
157 | line := newline; | |
158 | incr count | |
159 | end | |
160 | | _ -> ()); | |
161 | !count | |
162 | end | |
163 | ||
164 | ||
165 | ||
166 | let print_commentized xs = | |
167 | let line = ref (-1) in | |
168 | begin | |
169 | let ys = commentized xs in | |
170 | ys +> | |
171 | List.iter | |
172 | (function | |
173 | Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) -> | |
174 | let newline = pinfo.Common.line in | |
175 | let s = pinfo.Common.str in | |
176 | let s = Str.global_substitute | |
177 | (Str.regexp "\n") (fun s -> "") s | |
178 | in | |
b1b2de81 | 179 | if newline =|= !line |
34e49164 C |
180 | then prerr_string (s ^ " ") |
181 | else begin | |
b1b2de81 | 182 | if !line =|= -1 |
34e49164 C |
183 | then pr2_no_nl "passed:" |
184 | else pr2_no_nl "\npassed:"; | |
185 | line := newline; | |
186 | pr2_no_nl (s ^ " "); | |
187 | end | |
188 | | _ -> ()); | |
189 | if not (null ys) then pr2 ""; | |
190 | end | |
191 | ||
192 | ||
193 | ||
194 | ||
195 | (*****************************************************************************) | |
196 | (* Lexing only *) | |
197 | (*****************************************************************************) | |
198 | ||
199 | (* called by parse_print_error_heuristic *) | |
200 | let tokens2 file = | |
708f4980 | 201 | let table = Common.full_charpos_to_pos_large file in |
34e49164 C |
202 | |
203 | Common.with_open_infile file (fun chan -> | |
204 | let lexbuf = Lexing.from_channel chan in | |
205 | try | |
206 | let rec tokens_aux acc = | |
207 | let tok = Lexer_c.token lexbuf in | |
208 | (* fill in the line and col information *) | |
209 | let tok = tok +> TH.visitor_info_of_tok (fun ii -> | |
210 | { ii with Ast_c.pinfo= | |
211 | (* could assert pinfo.filename = file ? *) | |
212 | match Ast_c.pinfo_of_info ii with | |
213 | Ast_c.OriginTok pi -> | |
708f4980 | 214 | Ast_c.OriginTok (Common.complete_parse_info_large file table pi) |
34e49164 | 215 | | Ast_c.ExpandedTok (pi,vpi) -> |
708f4980 | 216 | Ast_c.ExpandedTok((Common.complete_parse_info_large file table pi),vpi) |
34e49164 C |
217 | | Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi) |
218 | | Ast_c.AbstractLineTok pi -> failwith "should not occur" | |
219 | }) | |
220 | in | |
221 | ||
222 | if TH.is_eof tok | |
223 | then List.rev (tok::acc) | |
224 | else tokens_aux (tok::acc) | |
225 | in | |
226 | tokens_aux [] | |
227 | with | |
228 | | Lexer_c.Lexical s -> | |
229 | failwith ("lexical error " ^ s ^ "\n =" ^ | |
230 | (Common.error_message file (lexbuf_to_strpos lexbuf))) | |
231 | | e -> raise e | |
232 | ) | |
233 | ||
485bce71 C |
234 | let time_lexing ?(profile=true) a = |
235 | if profile | |
236 | then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a) | |
237 | else tokens2 a | |
238 | let tokens ?profile a = | |
239 | Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a) | |
34e49164 C |
240 | |
241 | ||
485bce71 | 242 | let tokens_of_string string = |
34e49164 C |
243 | let lexbuf = Lexing.from_string string in |
244 | try | |
245 | let rec tokens_s_aux () = | |
246 | let tok = Lexer_c.token lexbuf in | |
247 | if TH.is_eof tok | |
248 | then [tok] | |
249 | else tok::(tokens_s_aux ()) | |
250 | in | |
251 | tokens_s_aux () | |
252 | with | |
253 | | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" ) | |
254 | | e -> raise e | |
255 | ||
256 | ||
257 | (*****************************************************************************) | |
258 | (* Parsing, but very basic, no more used *) | |
259 | (*****************************************************************************) | |
260 | ||
261 | (* | |
262 | * !!!Those function use refs, and are not reentrant !!! so take care. | |
263 | * It use globals defined in Lexer_parser. | |
264 | * | |
265 | * update: because now lexer return comments tokens, those functions | |
266 | * may not work anymore. | |
267 | *) | |
268 | ||
269 | let parse file = | |
270 | let lexbuf = Lexing.from_channel (open_in file) in | |
271 | let result = Parser_c.main Lexer_c.token lexbuf in | |
272 | result | |
273 | ||
274 | ||
275 | let parse_print_error file = | |
276 | let chan = (open_in file) in | |
277 | let lexbuf = Lexing.from_channel chan in | |
278 | ||
279 | let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in | |
280 | try | |
281 | lexbuf +> Parser_c.main Lexer_c.token | |
282 | with | |
283 | | Lexer_c.Lexical s -> | |
284 | failwith ("lexical error " ^s^ "\n =" ^ error_msg ()) | |
285 | | Parsing.Parse_error -> | |
286 | failwith ("parse error \n = " ^ error_msg ()) | |
287 | | Semantic_c.Semantic (s, i) -> | |
288 | failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ()) | |
289 | | e -> raise e | |
290 | ||
291 | ||
292 | ||
293 | ||
294 | (*****************************************************************************) | |
295 | (* Parsing subelements, useful to debug parser *) | |
296 | (*****************************************************************************) | |
297 | ||
298 | (* | |
299 | * !!!Those function use refs, and are not reentrant !!! so take care. | |
300 | * It use globals defined in Lexer_parser. | |
301 | *) | |
302 | ||
303 | ||
304 | (* old: | |
305 | * let parse_gen parsefunc s = | |
306 | * let lexbuf = Lexing.from_string s in | |
307 | * let result = parsefunc Lexer_c.token lexbuf in | |
308 | * result | |
309 | *) | |
310 | ||
311 | let parse_gen parsefunc s = | |
485bce71 | 312 | let toks = tokens_of_string s +> List.filter TH.is_not_comment in |
34e49164 C |
313 | |
314 | ||
315 | (* Why use this lexing scheme ? Why not classically give lexer func | |
316 | * to parser ? Because I now keep comments in lexer. Could | |
317 | * just do a simple wrapper that when comment ask again for a token, | |
318 | * but maybe simpler to use cur_tok technique. | |
319 | *) | |
320 | let all_tokens = ref toks in | |
321 | let cur_tok = ref (List.hd !all_tokens) in | |
322 | ||
323 | let lexer_function = | |
324 | (fun _ -> | |
325 | if TH.is_eof !cur_tok | |
708f4980 | 326 | then (pr2_err "LEXER: ALREADY AT END"; !cur_tok) |
34e49164 C |
327 | else |
328 | let v = Common.pop2 all_tokens in | |
329 | cur_tok := v; | |
330 | !cur_tok | |
331 | ) | |
332 | in | |
333 | let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in | |
334 | let result = parsefunc lexer_function lexbuf_fake in | |
335 | result | |
336 | ||
337 | ||
338 | let type_of_string = parse_gen Parser_c.type_name | |
339 | let statement_of_string = parse_gen Parser_c.statement | |
340 | let expression_of_string = parse_gen Parser_c.expr | |
341 | ||
342 | (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *) | |
343 | ||
344 | ||
345 | ||
346 | ||
347 | ||
348 | (*****************************************************************************) | |
349 | (* Consistency checking *) | |
350 | (*****************************************************************************) | |
351 | ||
113803cf C |
352 | (* todo: |
353 | * could check that an ident has always the same class, be it a typedef | |
354 | * (but sometimes do 'acpi_val acpi_val;'), an ident, a TMacroStatement, | |
355 | * etc. | |
356 | *) | |
357 | ||
34e49164 C |
358 | type class_ident = |
359 | | CIdent (* can be var, func, field, tag, enum constant *) | |
360 | | CTypedef | |
361 | ||
362 | let str_of_class_ident = function | |
363 | | CIdent -> "Ident" | |
364 | | CTypedef -> "Typedef" | |
365 | ||
366 | (* | |
367 | | CMacro | |
368 | | CMacroString | |
369 | | CMacroStmt | |
370 | | CMacroDecl | |
371 | | CMacroIterator | |
372 | | CAttr | |
373 | ||
374 | (* but take care that must still be able to use '=' *) | |
375 | type context = InFunction | InEnum | InStruct | InInitializer | InParams | |
376 | type class_token = | |
377 | | CIdent of class_ident | |
378 | ||
379 | | CComment | |
380 | | CSpace | |
381 | | CCommentCpp of cppkind | |
382 | | CCommentMisc | |
383 | | CCppDirective | |
384 | ||
385 | | COPar | |
386 | | CCPar | |
387 | | COBrace | |
388 | | CCBrace | |
389 | ||
390 | | CSymbol | |
391 | | CReservedKwd (type | decl | qualif | flow | misc | attr) | |
392 | *) | |
393 | ||
708f4980 C |
394 | let ident_to_typename ident : Ast_c.fullType = |
395 | Ast_c.mk_ty (Ast_c.TypeName (ident, Ast_c.noTypedefDef())) Ast_c.noii | |
b1b2de81 C |
396 | |
397 | ||
34e49164 C |
398 | (* parse_typedef_fix4 *) |
399 | let consistency_checking2 xs = | |
400 | ||
401 | (* first phase, gather data *) | |
402 | let stat = Hashtbl.create 101 in | |
403 | ||
404 | (* default value for hash *) | |
405 | let v1 () = Hashtbl.create 101 in | |
406 | let v2 () = ref 0 in | |
407 | ||
408 | let bigf = { Visitor_c.default_visitor_c with | |
409 | ||
410 | Visitor_c.kexpr = (fun (k,bigf) x -> | |
411 | match Ast_c.unwrap_expr x with | |
b1b2de81 C |
412 | | Ast_c.Ident (id) -> |
413 | let s = Ast_c.str_of_name id in | |
34e49164 C |
414 | stat +> |
415 | Common.hfind_default s v1 +> Common.hfind_default CIdent v2 +> | |
416 | (fun aref -> incr aref) | |
417 | ||
418 | | _ -> k x | |
419 | ); | |
420 | Visitor_c.ktype = (fun (k,bigf) t -> | |
421 | match Ast_c.unwrap_typeC t with | |
b1b2de81 C |
422 | | Ast_c.TypeName (name,_typ) -> |
423 | let s = Ast_c.str_of_name name in | |
34e49164 C |
424 | stat +> |
425 | Common.hfind_default s v1 +> Common.hfind_default CTypedef v2 +> | |
426 | (fun aref -> incr aref) | |
427 | ||
428 | | _ -> k t | |
429 | ); | |
430 | } | |
431 | in | |
432 | xs +> List.iter (fun (p, info_item) -> Visitor_c.vk_toplevel bigf p); | |
433 | ||
434 | ||
435 | let ident_to_type = ref [] in | |
436 | ||
437 | ||
438 | (* second phase, analyze data *) | |
439 | stat +> Hashtbl.iter (fun k v -> | |
440 | let xs = Common.hash_to_list v in | |
441 | if List.length xs >= 2 | |
442 | then begin | |
708f4980 | 443 | pr2_err ("CONFLICT:" ^ k); |
34e49164 | 444 | let sorted = xs +> List.sort (fun (ka,va) (kb,vb) -> |
b1b2de81 | 445 | if !va =|= !vb then |
34e49164 C |
446 | (match ka, kb with |
447 | | CTypedef, _ -> 1 (* first is smaller *) | |
448 | | _, CTypedef -> -1 | |
449 | | _ -> 0 | |
450 | ) | |
451 | else compare !va !vb | |
452 | ) in | |
453 | let sorted = List.rev sorted in | |
454 | match sorted with | |
455 | | [CTypedef, i1;CIdent, i2] -> | |
708f4980 | 456 | pr2_err ("transforming some ident in typedef"); |
34e49164 C |
457 | push2 k ident_to_type; |
458 | | _ -> | |
708f4980 | 459 | pr2_err ("TODO:other transforming?"); |
34e49164 C |
460 | |
461 | end | |
462 | ); | |
463 | ||
485bce71 C |
464 | (* third phase, update ast. |
465 | * todo? but normally should try to handle correctly scope ? maybe sometime | |
466 | * sizeof(id) and even if id was for a long time an identifier, maybe | |
467 | * a few time, because of the scope it's actually really a type. | |
468 | *) | |
34e49164 C |
469 | if (null !ident_to_type) |
470 | then xs | |
471 | else | |
472 | let bigf = { Visitor_c.default_visitor_c_s with | |
473 | Visitor_c.kdefineval_s = (fun (k,bigf) x -> | |
474 | match x with | |
475 | | Ast_c.DefineExpr e -> | |
708f4980 C |
476 | (match Ast_c.unwrap_expr e with |
477 | | Ast_c.Ident (ident) -> | |
b1b2de81 C |
478 | let s = Ast_c.str_of_name ident in |
479 | if List.mem s !ident_to_type | |
480 | then | |
481 | let t = ident_to_typename ident in | |
482 | Ast_c.DefineType t | |
483 | else k x | |
34e49164 C |
484 | | _ -> k x |
485 | ) | |
486 | | _ -> k x | |
487 | ); | |
488 | Visitor_c.kexpr_s = (fun (k, bigf) x -> | |
708f4980 | 489 | match Ast_c.get_e_and_ii x with |
34e49164 C |
490 | | (Ast_c.SizeOfExpr e, tref), isizeof -> |
491 | let i1 = tuple_of_list1 isizeof in | |
708f4980 | 492 | (match Ast_c.get_e_and_ii e with |
34e49164 | 493 | | (Ast_c.ParenExpr e, _), iiparen -> |
708f4980 C |
494 | let (i2, i3) = tuple_of_list2 iiparen in |
495 | (match Ast_c.get_e_and_ii e with | |
b1b2de81 C |
496 | | (Ast_c.Ident (ident), _), _ii -> |
497 | ||
498 | let s = Ast_c.str_of_name ident in | |
499 | if List.mem s !ident_to_type | |
500 | then | |
501 | let t = ident_to_typename ident in | |
708f4980 | 502 | (Ast_c.SizeOfType t, tref),[i1;i2;i3] |
b1b2de81 | 503 | else k x |
34e49164 C |
504 | | _ -> k x |
505 | ) | |
506 | | _ -> k x | |
507 | ) | |
508 | | _ -> k x | |
509 | ); | |
510 | } in | |
511 | xs +> List.map (fun (p, info_item) -> | |
512 | Visitor_c.vk_toplevel_s bigf p, info_item | |
513 | ) | |
514 | ||
515 | ||
516 | let consistency_checking a = | |
517 | Common.profile_code "C consistencycheck" (fun () -> consistency_checking2 a) | |
518 | ||
519 | ||
520 | ||
521 | (*****************************************************************************) | |
522 | (* Error recovery *) | |
523 | (*****************************************************************************) | |
524 | ||
708f4980 C |
525 | let is_define_passed passed = |
526 | let xs = passed +> List.rev +> List.filter TH.is_not_comment in | |
527 | if List.length xs >= 2 | |
528 | then | |
529 | (match Common.head_middle_tail xs with | |
530 | | Parser_c.TDefine _, _, Parser_c.TDefEOL _ -> | |
531 | true | |
532 | | _ -> false | |
533 | ) | |
534 | else begin | |
535 | pr2_err "WEIRD: length list of error recovery tokens < 2 "; | |
536 | false | |
537 | end | |
538 | ||
539 | let is_defined_passed_bis last_round = | |
540 | let xs = last_round +> List.filter TH.is_not_comment in | |
541 | match xs with | |
542 | | Parser_c.TDefine _::_ -> true | |
543 | | _ -> false | |
544 | ||
545 | (* ---------------------------------------------------------------------- *) | |
546 | ||
547 | ||
34e49164 C |
548 | (* todo: do something if find Parser_c.Eof ? *) |
549 | let rec find_next_synchro next already_passed = | |
550 | ||
551 | (* Maybe because not enough }, because for example an ifdef contains | |
552 | * in both branch some opening {, we later eat too much, "on deborde | |
553 | * sur la fonction d'apres". So already_passed may be too big and | |
554 | * looking for next synchro point starting from next may not be the | |
555 | * best. So maybe we can find synchro point inside already_passed | |
556 | * instead of looking in next. | |
557 | * | |
558 | * But take care! must progress. We must not stay in infinite loop! | |
559 | * For instance now I have as a error recovery to look for | |
560 | * a "start of something", corresponding to start of function, | |
561 | * but must go beyond this start otherwise will loop. | |
562 | * So look at premier(external_declaration2) in parser.output and | |
563 | * pass at least those first tokens. | |
564 | * | |
565 | * I have chosen to start search for next synchro point after the | |
566 | * first { I found, so quite sure we will not loop. *) | |
567 | ||
568 | let last_round = List.rev already_passed in | |
708f4980 | 569 | if is_defined_passed_bis last_round |
34e49164 C |
570 | then find_next_synchro_define (last_round ++ next) [] |
571 | else | |
572 | ||
573 | let (before, after) = | |
574 | last_round +> Common.span (fun tok -> | |
575 | match tok with | |
576 | (* by looking at TOBrace we are sure that the "start of something" | |
577 | * will not arrive too early | |
578 | *) | |
579 | | Parser_c.TOBrace _ -> false | |
580 | | Parser_c.TDefine _ -> false | |
581 | | _ -> true | |
582 | ) | |
583 | in | |
584 | find_next_synchro_orig (after ++ next) (List.rev before) | |
585 | ||
586 | ||
587 | ||
588 | and find_next_synchro_define next already_passed = | |
589 | match next with | |
590 | | [] -> | |
708f4980 | 591 | pr2_err "ERROR-RECOV: end of file while in recovery mode"; |
34e49164 C |
592 | already_passed, [] |
593 | | (Parser_c.TDefEOL i as v)::xs -> | |
708f4980 | 594 | pr2_err ("ERROR-RECOV: found sync end of #define, line "^i_to_s(TH.line_of_tok v)); |
34e49164 C |
595 | v::already_passed, xs |
596 | | v::xs -> | |
597 | find_next_synchro_define xs (v::already_passed) | |
598 | ||
599 | ||
600 | ||
601 | ||
602 | and find_next_synchro_orig next already_passed = | |
603 | match next with | |
604 | | [] -> | |
708f4980 | 605 | pr2_err "ERROR-RECOV: end of file while in recovery mode"; |
34e49164 C |
606 | already_passed, [] |
607 | ||
b1b2de81 | 608 | | (Parser_c.TCBrace i as v)::xs when TH.col_of_tok v =|= 0 -> |
708f4980 | 609 | pr2_err ("ERROR-RECOV: found sync '}' at line "^i_to_s (TH.line_of_tok v)); |
34e49164 C |
610 | |
611 | (match xs with | |
612 | | [] -> raise Impossible (* there is a EOF token normally *) | |
613 | ||
614 | (* still useful: now parser.mly allow empty ';' so normally no pb *) | |
615 | | Parser_c.TPtVirg iptvirg::xs -> | |
708f4980 | 616 | pr2_err "ERROR-RECOV: found sync bis, eating } and ;"; |
34e49164 C |
617 | (Parser_c.TPtVirg iptvirg)::v::already_passed, xs |
618 | ||
619 | | Parser_c.TIdent x::Parser_c.TPtVirg iptvirg::xs -> | |
708f4980 | 620 | pr2_err "ERROR-RECOV: found sync bis, eating ident, }, and ;"; |
34e49164 C |
621 | (Parser_c.TPtVirg iptvirg)::(Parser_c.TIdent x)::v::already_passed, |
622 | xs | |
623 | ||
624 | | Parser_c.TCommentSpace sp::Parser_c.TIdent x::Parser_c.TPtVirg iptvirg | |
625 | ::xs -> | |
708f4980 | 626 | pr2_err "ERROR-RECOV: found sync bis, eating ident, }, and ;"; |
34e49164 C |
627 | (Parser_c.TCommentSpace sp):: |
628 | (Parser_c.TPtVirg iptvirg):: | |
629 | (Parser_c.TIdent x):: | |
630 | v:: | |
631 | already_passed, | |
632 | xs | |
633 | ||
634 | | Parser_c.TCommentNewline sp::Parser_c.TIdent x::Parser_c.TPtVirg iptvirg | |
635 | ::xs -> | |
708f4980 | 636 | pr2_err "ERROR-RECOV: found sync bis, eating ident, }, and ;"; |
34e49164 C |
637 | (Parser_c.TCommentNewline sp):: |
638 | (Parser_c.TPtVirg iptvirg):: | |
639 | (Parser_c.TIdent x):: | |
640 | v:: | |
641 | already_passed, | |
642 | xs | |
643 | ||
644 | | _ -> | |
645 | v::already_passed, xs | |
646 | ) | |
b1b2de81 | 647 | | v::xs when TH.col_of_tok v =|= 0 && TH.is_start_of_something v -> |
708f4980 | 648 | pr2_err ("ERROR-RECOV: found sync col 0 at line "^ i_to_s(TH.line_of_tok v)); |
34e49164 C |
649 | already_passed, v::xs |
650 | ||
651 | | v::xs -> | |
652 | find_next_synchro_orig xs (v::already_passed) | |
653 | ||
708f4980 C |
654 | |
655 | (*****************************************************************************) | |
656 | (* Macro problem recovery *) | |
657 | (*****************************************************************************) | |
658 | module TV = Token_views_c | |
659 | ||
660 | let candidate_macros_in_passed2 passed defs_optional = | |
661 | let res = ref [] in | |
662 | let res2 = ref [] in | |
663 | ||
664 | passed +> List.iter (function | |
665 | | Parser_c.TIdent (s,_) | |
666 | (* bugfix: may have to undo some infered things *) | |
667 | | Parser_c.TMacroIterator (s,_) | |
668 | | Parser_c.TypedefIdent (s,_) | |
669 | -> | |
670 | (match Common.hfind_option s defs_optional with | |
671 | | Some def -> | |
672 | if s ==~ Parsing_hacks.regexp_macro | |
673 | then | |
674 | (* pr2 (spf "candidate: %s" s); *) | |
675 | Common.push2 (s, def) res | |
676 | else | |
677 | Common.push2 (s, def) res2 | |
678 | | None -> () | |
679 | ) | |
680 | ||
681 | | _ -> () | |
682 | ); | |
683 | if null !res | |
684 | then !res2 | |
685 | else !res | |
686 | ||
687 | let candidate_macros_in_passed a b = | |
688 | Common.profile_code "MACRO managment" (fun () -> | |
689 | candidate_macros_in_passed2 a b) | |
690 | ||
691 | ||
692 | ||
693 | let find_optional_macro_to_expand2 ~defs toks = | |
694 | ||
695 | let defs = Common.hash_of_list defs in | |
696 | ||
697 | let toks = toks +> Common.map (function | |
698 | ||
699 | (* special cases to undo *) | |
700 | | Parser_c.TMacroIterator (s, ii) -> | |
701 | if Hashtbl.mem defs s | |
702 | then Parser_c.TIdent (s, ii) | |
703 | else Parser_c.TMacroIterator (s, ii) | |
704 | ||
705 | | Parser_c.TypedefIdent (s, ii) -> | |
706 | if Hashtbl.mem defs s | |
707 | then Parser_c.TIdent (s, ii) | |
708 | else Parser_c.TypedefIdent (s, ii) | |
709 | ||
710 | | x -> x | |
711 | ) in | |
712 | ||
713 | let tokens = toks in | |
714 | Parsing_hacks.fix_tokens_cpp ~macro_defs:defs tokens | |
715 | ||
716 | (* just calling apply_macro_defs and having a specialized version | |
717 | * of the code in fix_tokens_cpp is not enough as some work such | |
718 | * as the passing of the body of attribute in Parsing_hacks.find_macro_paren | |
719 | * will not get the chance to be run on the new expanded tokens. | |
720 | * Hence even if it's expensive, it's currently better to | |
721 | * just call directly fix_tokens_cpp again here. | |
722 | ||
723 | let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in | |
724 | let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in | |
725 | let paren_grouped = TV.mk_parenthised cleaner in | |
726 | Cpp_token_c.apply_macro_defs | |
727 | ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s)) | |
728 | ~msg_apply_known_macro_hint:(fun s -> pr2 "hint") | |
729 | defs paren_grouped; | |
730 | (* because the before field is used by apply_macro_defs *) | |
731 | tokens2 := TV.rebuild_tokens_extented !tokens2; | |
732 | Parsing_hacks.insert_virtual_positions | |
733 | (!tokens2 +> Common.acc_map (fun x -> x.TV.tok)) | |
734 | *) | |
735 | let find_optional_macro_to_expand ~defs a = | |
736 | Common.profile_code "MACRO managment" (fun () -> | |
737 | find_optional_macro_to_expand2 ~defs a) | |
738 | ||
739 | ||
740 | ||
34e49164 C |
741 | (*****************************************************************************) |
742 | (* Include/Define hacks *) | |
743 | (*****************************************************************************) | |
744 | ||
485bce71 C |
745 | (* Sometimes I prefer to generate a single token for a list of things in the |
746 | * lexer so that if I have to passed them, like for passing TInclude then | |
747 | * it's easy. Also if I don't do a single token, then I need to | |
748 | * parse the rest which may not need special stuff, like detecting | |
749 | * end of line which the parser is not really ready for. So for instance | |
750 | * could I parse a #include <a/b/c/xxx.h> as 2 or more tokens ? just | |
751 | * lex #include ? so then need recognize <a/b/c/xxx.h> as one token ? | |
752 | * but this kind of token is valid only after a #include and the | |
753 | * lexing and parsing rules are different for such tokens so not that | |
754 | * easy to parse such things in parser_c.mly. Hence the following hacks. | |
755 | * | |
756 | * less?: maybe could get rid of this like I get rid of some of fix_define. | |
757 | *) | |
758 | ||
34e49164 C |
759 | (* ------------------------------------------------------------------------- *) |
760 | (* helpers *) | |
761 | (* ------------------------------------------------------------------------- *) | |
762 | ||
763 | (* used to generate new token from existing one *) | |
764 | let new_info posadd str ii = | |
765 | { Ast_c.pinfo = | |
766 | Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with | |
767 | charpos = Ast_c.pos_of_info ii + posadd; | |
768 | str = str; | |
769 | column = Ast_c.col_of_info ii + posadd; | |
770 | }; | |
771 | (* must generate a new ref each time, otherwise share *) | |
772 | cocci_tag = ref Ast_c.emptyAnnot; | |
773 | comments_tag = ref Ast_c.emptyComments; | |
774 | } | |
775 | ||
776 | ||
777 | let rec comment_until_defeol xs = | |
778 | match xs with | |
708f4980 C |
779 | | [] -> |
780 | (* job not done in Cpp_token_c.define_parse ? *) | |
781 | failwith "cant find end of define token TDefEOL" | |
34e49164 C |
782 | | x::xs -> |
783 | (match x with | |
784 | | Parser_c.TDefEOL i -> | |
0708f913 | 785 | Parser_c.TCommentCpp (Token_c.CppDirective, TH.info_of_tok x) |
34e49164 C |
786 | ::xs |
787 | | _ -> | |
485bce71 C |
788 | let x' = |
789 | (* bugfix: otherwise may lose a TComment token *) | |
790 | if TH.is_real_comment x | |
791 | then x | |
0708f913 | 792 | else Parser_c.TCommentCpp (Token_c.CppPassingNormal (*good?*), TH.info_of_tok x) |
485bce71 C |
793 | in |
794 | x'::comment_until_defeol xs | |
34e49164 C |
795 | ) |
796 | ||
797 | let drop_until_defeol xs = | |
798 | List.tl | |
799 | (Common.drop_until (function Parser_c.TDefEOL _ -> true | _ -> false) xs) | |
800 | ||
801 | ||
802 | ||
803 | (* ------------------------------------------------------------------------- *) | |
804 | (* returns a pair (replaced token, list of next tokens) *) | |
805 | (* ------------------------------------------------------------------------- *) | |
806 | ||
807 | let tokens_include (info, includes, filename, inifdef) = | |
808 | Parser_c.TIncludeStart (Ast_c.rewrap_str includes info, inifdef), | |
809 | [Parser_c.TIncludeFilename | |
810 | (filename, (new_info (String.length includes) filename info)) | |
811 | ] | |
812 | ||
813 | (*****************************************************************************) | |
485bce71 | 814 | (* Parsing default define macros, usually in a standard.h file *) |
34e49164 C |
815 | (*****************************************************************************) |
816 | ||
485bce71 | 817 | let parse_cpp_define_file2 file = |
708f4980 C |
818 | Common.save_excursion Flag_parsing_c.verbose_lexing (fun () -> |
819 | Flag_parsing_c.verbose_lexing := false; | |
820 | let toks = tokens ~profile:false file in | |
821 | let toks = Cpp_token_c.fix_tokens_define toks in | |
822 | Cpp_token_c.extract_cpp_define toks | |
823 | ) | |
34e49164 | 824 | |
485bce71 C |
825 | let parse_cpp_define_file a = |
826 | Common.profile_code_exclusif "HACK" (fun () -> parse_cpp_define_file2 a) | |
827 | ||
708f4980 C |
828 | |
829 | ||
830 | let (_defs : (string, Cpp_token_c.define_def) Hashtbl.t ref) = | |
831 | ref (Hashtbl.create 101) | |
832 | ||
833 | let (_defs_builtins : (string, Cpp_token_c.define_def) Hashtbl.t ref) = | |
834 | ref (Hashtbl.create 101) | |
835 | ||
836 | ||
485bce71 C |
837 | (* can not be put in parsing_hack, cos then mutually recursive problem as |
838 | * we also want to parse the standard.h file. | |
839 | *) | |
708f4980 | 840 | let init_defs_macros std_h = |
485bce71 C |
841 | if not (Common.lfile_exists std_h) |
842 | then pr2 ("warning: Can't find default macro file: " ^ std_h) | |
843 | else begin | |
844 | pr2 ("init_defs: " ^ std_h); | |
708f4980 C |
845 | _defs := Common.hash_of_list (parse_cpp_define_file std_h); |
846 | end | |
847 | ||
848 | let init_defs_builtins file_h = | |
849 | if not (Common.lfile_exists file_h) | |
850 | then pr2 ("warning: Can't find macro file: " ^ file_h) | |
851 | else begin | |
852 | pr2 ("init_defs_builtins: " ^ file_h); | |
853 | _defs_builtins := | |
854 | Common.hash_of_list (parse_cpp_define_file file_h); | |
485bce71 C |
855 | end |
856 | ||
34e49164 C |
857 | |
858 | (*****************************************************************************) | |
859 | (* Main entry point *) | |
860 | (*****************************************************************************) | |
861 | ||
862 | type info_item = string * Parser_c.token list | |
863 | ||
864 | type program2 = toplevel2 list | |
865 | and toplevel2 = Ast_c.toplevel * info_item | |
866 | ||
485bce71 C |
867 | let program_of_program2 xs = |
868 | xs +> List.map fst | |
869 | ||
870 | let with_program2 f program2 = | |
871 | program2 | |
872 | +> Common.unzip | |
873 | +> (fun (program, infos) -> | |
874 | f program, infos | |
875 | ) | |
876 | +> Common.uncurry Common.zip | |
877 | ||
878 | ||
34e49164 C |
879 | |
880 | (* The use of local refs (remaining_tokens, passed_tokens, ...) makes | |
881 | * possible error recovery. Indeed, they allow to skip some tokens and | |
882 | * still be able to call again the ocamlyacc parser. It is ugly code | |
883 | * because we cant modify ocamllex and ocamlyacc. As we want some | |
884 | * extended lexing tricks, we have to use such refs. | |
885 | * | |
886 | * Those refs are now also used for my lalr(k) technique. Indeed They | |
887 | * store the futur and previous tokens that were parsed, and so | |
888 | * provide enough context information for powerful lex trick. | |
889 | * | |
890 | * - passed_tokens_last_ckp stores the passed tokens since last | |
485bce71 | 891 | * checkpoint. Used for NotParsedCorrectly and also to build the |
34e49164 C |
892 | * info_item attached to each program_element. |
893 | * - passed_tokens_clean is used for lookahead, in fact for lookback. | |
894 | * - remaining_tokens_clean is used for lookahead. Now remaining_tokens | |
895 | * contain some comments and so would make pattern matching difficult | |
896 | * in lookahead. Hence this variable. We would like also to get rid | |
897 | * of cpp instruction because sometimes a cpp instruction is between | |
898 | * two tokens and makes a pattern matching fail. But lookahead also | |
899 | * transform some cpp instruction (in comment) so can't remove them. | |
900 | * | |
901 | * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens, | |
902 | * whereas passed_tokens_clean and remaining_tokens_clean does not contain | |
903 | * comment-tokens. | |
904 | * | |
905 | * Normally we have: | |
906 | * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens | |
907 | * after the call to pop2. | |
908 | * toks = (reverse passed_tok) ++ remaining_tokens | |
909 | * at the and of the lexer_function call. | |
910 | * At the very beginning, cur_tok and remaining_tokens overlap, but not after. | |
911 | * At the end of lexer_function call, cur_tok overlap with passed_tok. | |
912 | * | |
913 | * convention: I use "tr" for "tokens refs" | |
485bce71 C |
914 | * |
915 | * I now also need this lexing trick because the lexer return comment | |
916 | * tokens. | |
34e49164 C |
917 | *) |
918 | ||
919 | type tokens_state = { | |
920 | mutable rest : Parser_c.token list; | |
921 | mutable rest_clean : Parser_c.token list; | |
922 | mutable current : Parser_c.token; | |
923 | (* it's passed since last "checkpoint", not passed from the beginning *) | |
924 | mutable passed : Parser_c.token list; | |
925 | mutable passed_clean : Parser_c.token list; | |
926 | } | |
708f4980 C |
927 | |
928 | let mk_tokens_state toks = | |
929 | { | |
930 | rest = toks; | |
931 | rest_clean = (toks +> List.filter TH.is_not_comment); | |
932 | current = (List.hd toks); | |
933 | passed = []; | |
934 | passed_clean = []; | |
935 | } | |
936 | ||
937 | ||
938 | ||
939 | let clone_tokens_state tr = | |
485bce71 C |
940 | { rest = tr.rest; |
941 | rest_clean = tr.rest_clean; | |
942 | current = tr.current; | |
943 | passed = tr.passed; | |
944 | passed_clean = tr.passed_clean; | |
945 | } | |
708f4980 | 946 | let copy_tokens_state ~src ~dst = |
485bce71 C |
947 | dst.rest <- src.rest; |
948 | dst.rest_clean <- src.rest_clean; | |
949 | dst.current <- src.current; | |
950 | dst.passed <- src.passed; | |
951 | dst.passed_clean <- src.passed_clean; | |
952 | () | |
953 | ||
708f4980 | 954 | (* todo? agglomerate the x##b ? *) |
485bce71 C |
955 | let rec filter_noise n xs = |
956 | match n, xs with | |
957 | | _, [] -> [] | |
958 | | 0, xs -> xs | |
959 | | n, x::xs -> | |
960 | (match x with | |
961 | | Parser_c.TMacroAttr _ -> | |
962 | filter_noise (n-1) xs | |
963 | | _ -> | |
964 | x::filter_noise (n-1) xs | |
965 | ) | |
966 | ||
967 | let clean_for_lookahead xs = | |
968 | match xs with | |
969 | | [] -> [] | |
970 | | [x] -> [x] | |
971 | | x::xs -> | |
972 | x::filter_noise 10 xs | |
973 | ||
34e49164 | 974 | |
485bce71 C |
975 | |
976 | (* Hacked lex. This function use refs passed by parse_print_error_heuristic | |
977 | * tr means token refs. | |
978 | *) | |
979 | let rec lexer_function ~pass tr = fun lexbuf -> | |
34e49164 | 980 | match tr.rest with |
708f4980 | 981 | | [] -> pr2_err "ALREADY AT END"; tr.current |
34e49164 C |
982 | | v::xs -> |
983 | tr.rest <- xs; | |
984 | tr.current <- v; | |
985 | ||
986 | if !Flag_parsing_c.debug_lexer then Common.pr2_gen v; | |
987 | ||
988 | if TH.is_comment v | |
989 | then begin | |
990 | tr.passed <- v::tr.passed; | |
485bce71 | 991 | lexer_function ~pass tr lexbuf |
34e49164 C |
992 | end |
993 | else begin | |
994 | let x = List.hd tr.rest_clean in | |
995 | tr.rest_clean <- List.tl tr.rest_clean; | |
b1b2de81 | 996 | assert (x =*= v); |
34e49164 C |
997 | |
998 | (match v with | |
113803cf C |
999 | |
1000 | (* fix_define1. | |
1001 | * | |
1002 | * Why not in parsing_hacks lookahead and do passing like | |
485bce71 | 1003 | * I do for some ifdef directives ? Because here I also need to |
113803cf C |
1004 | * generate some tokens sometimes and so I need access to the |
1005 | * tr.passed, tr.rest, etc. | |
485bce71 | 1006 | *) |
34e49164 | 1007 | | Parser_c.TDefine (tok) -> |
b1b2de81 | 1008 | if not (LP.current_context () =*= LP.InTopLevel) && |
708f4980 | 1009 | (!Flag_parsing_c.cpp_directive_passing || (pass >= 2)) |
34e49164 | 1010 | then begin |
485bce71 | 1011 | incr Stat.nDefinePassing; |
34e49164 | 1012 | pr2_once ("CPP-DEFINE: inside function, I treat it as comment"); |
0708f913 | 1013 | let v' = Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v) |
34e49164 C |
1014 | in |
1015 | tr.passed <- v'::tr.passed; | |
1016 | tr.rest <- comment_until_defeol tr.rest; | |
1017 | tr.rest_clean <- drop_until_defeol tr.rest_clean; | |
485bce71 | 1018 | lexer_function ~pass tr lexbuf |
34e49164 C |
1019 | end |
1020 | else begin | |
1021 | tr.passed <- v::tr.passed; | |
1022 | tr.passed_clean <- v::tr.passed_clean; | |
1023 | v | |
1024 | end | |
1025 | ||
1026 | | Parser_c.TInclude (includes, filename, inifdef, info) -> | |
b1b2de81 | 1027 | if not (LP.current_context () =*= LP.InTopLevel) && |
708f4980 | 1028 | (!Flag_parsing_c.cpp_directive_passing || (pass >= 2)) |
34e49164 | 1029 | then begin |
485bce71 | 1030 | incr Stat.nIncludePassing; |
34e49164 | 1031 | pr2_once ("CPP-INCLUDE: inside function, I treat it as comment"); |
0708f913 | 1032 | let v = Parser_c.TCommentCpp(Token_c.CppDirective, info) in |
34e49164 | 1033 | tr.passed <- v::tr.passed; |
485bce71 | 1034 | lexer_function ~pass tr lexbuf |
34e49164 C |
1035 | end |
1036 | else begin | |
1037 | let (v,new_tokens) = | |
1038 | tokens_include (info, includes, filename, inifdef) in | |
1039 | let new_tokens_clean = | |
1040 | new_tokens +> List.filter TH.is_not_comment in | |
1041 | ||
1042 | tr.passed <- v::tr.passed; | |
1043 | tr.passed_clean <- v::tr.passed_clean; | |
1044 | tr.rest <- new_tokens ++ tr.rest; | |
1045 | tr.rest_clean <- new_tokens_clean ++ tr.rest_clean; | |
1046 | v | |
1047 | end | |
1048 | ||
1049 | | _ -> | |
1050 | ||
1051 | (* typedef_fix1 *) | |
1052 | let v = match v with | |
1053 | | Parser_c.TIdent (s, ii) -> | |
485bce71 C |
1054 | if |
1055 | LP.is_typedef s && | |
1056 | not (!Flag_parsing_c.disable_add_typedef) && | |
b1b2de81 | 1057 | pass =|= 1 |
34e49164 C |
1058 | then Parser_c.TypedefIdent (s, ii) |
1059 | else Parser_c.TIdent (s, ii) | |
1060 | | x -> x | |
1061 | in | |
1062 | ||
485bce71 C |
1063 | let v = Parsing_hacks.lookahead ~pass |
1064 | (clean_for_lookahead (v::tr.rest_clean)) | |
1065 | tr.passed_clean in | |
34e49164 C |
1066 | |
1067 | tr.passed <- v::tr.passed; | |
1068 | ||
485bce71 | 1069 | (* the lookahead may have changed the status of the token and |
34e49164 | 1070 | * consider it as a comment, for instance some #include are |
485bce71 | 1071 | * turned into comments, hence this code. *) |
34e49164 | 1072 | match v with |
485bce71 | 1073 | | Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf |
34e49164 C |
1074 | | v -> |
1075 | tr.passed_clean <- v::tr.passed_clean; | |
1076 | v | |
1077 | ) | |
1078 | end | |
1079 | ||
1080 | ||
708f4980 C |
1081 | let max_pass = 4 |
1082 | ||
34e49164 | 1083 | |
485bce71 C |
1084 | let get_one_elem ~pass tr (file, filelines) = |
1085 | ||
1086 | if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef | |
708f4980 | 1087 | then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn"; |
485bce71 C |
1088 | |
1089 | (* normally have to do that only when come from an exception in which | |
1090 | * case the dt() may not have been done | |
1091 | * TODO but if was in scoped scope ? have to let only the last scope | |
1092 | * so need do a LP.lexer_reset_typedef (); | |
1093 | *) | |
1094 | LP.enable_typedef(); | |
1095 | LP._lexer_hint := (LP.default_hint ()); | |
1096 | LP.save_typedef_state(); | |
1097 | ||
1098 | tr.passed <- []; | |
1099 | ||
1100 | let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in | |
1101 | ||
1102 | (try | |
1103 | (* -------------------------------------------------- *) | |
1104 | (* Call parser *) | |
1105 | (* -------------------------------------------------- *) | |
1106 | Common.profile_code_exclusif "YACC" (fun () -> | |
1107 | Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake) | |
1108 | ) | |
708f4980 | 1109 | with e -> |
485bce71 C |
1110 | LP.restore_typedef_state(); |
1111 | ||
1112 | (* must keep here, before the code that adjusts the tr fields *) | |
1113 | let line_error = TH.line_of_tok tr.current in | |
708f4980 C |
1114 | |
1115 | let passed_before_error = tr.passed in | |
1116 | let current = tr.current in | |
485bce71 C |
1117 | |
1118 | (* error recovery, go to next synchro point *) | |
1119 | let (passed', rest') = find_next_synchro tr.rest tr.passed in | |
1120 | tr.rest <- rest'; | |
1121 | tr.passed <- passed'; | |
1122 | ||
1123 | tr.current <- List.hd passed'; | |
1124 | tr.passed_clean <- []; (* enough ? *) | |
1125 | (* with error recovery, rest and rest_clean may not be in sync *) | |
1126 | tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment); | |
1127 | ||
1128 | ||
1129 | let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in | |
708f4980 C |
1130 | Right (info_of_bads, line_error, |
1131 | tr.passed, passed_before_error, | |
1132 | current, e) | |
485bce71 C |
1133 | ) |
1134 | ||
1135 | ||
1136 | ||
1137 | ||
34e49164 | 1138 | (* note: as now we go in 2 passes, there is first all the error message of |
485bce71 | 1139 | * the lexer, and then the error of the parser. It is not anymore |
34e49164 C |
1140 | * interwinded. |
1141 | * | |
1142 | * !!!This function use refs, and is not reentrant !!! so take care. | |
1143 | * It use globals defined in Lexer_parser and also the _defs global | |
485bce71 C |
1144 | * in parsing_hack.ml. |
1145 | * | |
1146 | * This function uses internally some semi globals in the | |
1147 | * tokens_stat record and parsing_stat record. | |
34e49164 C |
1148 | *) |
1149 | ||
1150 | let parse_print_error_heuristic2 file = | |
1151 | ||
91eba41f | 1152 | let filelines = Common.cat_array file in |
485bce71 C |
1153 | let stat = Parsing_stat.default_stat file in |
1154 | ||
34e49164 C |
1155 | (* -------------------------------------------------- *) |
1156 | (* call lexer and get all the tokens *) | |
1157 | (* -------------------------------------------------- *) | |
1158 | LP.lexer_reset_typedef(); | |
485bce71 | 1159 | Parsing_hacks.ifdef_paren_cnt := 0; |
708f4980 | 1160 | |
485bce71 | 1161 | let toks_orig = tokens file in |
34e49164 | 1162 | |
708f4980 | 1163 | let toks = Cpp_token_c.fix_tokens_define toks_orig in |
34e49164 | 1164 | |
708f4980 | 1165 | let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs:!_defs_builtins toks in |
34e49164 | 1166 | |
708f4980 C |
1167 | (* expand macros on demand trick, preparation phase *) |
1168 | let macros = | |
1169 | Common.profile_code "MACRO mgmt prep 1" (fun () -> | |
1170 | let macros = Hashtbl.copy !_defs in | |
1171 | (* include also builtins as some macros may generate some builtins too | |
1172 | * like __decl_spec or __stdcall | |
1173 | *) | |
1174 | !_defs_builtins +> Hashtbl.iter (fun s def -> | |
1175 | Hashtbl.replace macros s def; | |
1176 | ); | |
1177 | macros | |
1178 | ) | |
1179 | in | |
1180 | Common.profile_code "MACRO mgmt prep 2" (fun () -> | |
1181 | let local_macros = parse_cpp_define_file file in | |
1182 | local_macros +> List.iter (fun (s, def) -> | |
1183 | Hashtbl.replace macros s def; | |
1184 | ); | |
1185 | ); | |
34e49164 | 1186 | |
708f4980 | 1187 | let tr = mk_tokens_state toks in |
485bce71 C |
1188 | |
1189 | let rec loop tr = | |
34e49164 C |
1190 | |
1191 | (* todo?: I am not sure that it represents current_line, cos maybe | |
1192 | * tr.current partipated in the previous parsing phase, so maybe tr.current | |
1193 | * is not the first token of the next parsing phase. Same with checkpoint2. | |
1194 | * It would be better to record when we have a } or ; in parser.mly, | |
1195 | * cos we know that they are the last symbols of external_declaration2. | |
485bce71 C |
1196 | * |
1197 | * bugfix: may not be equal to 'file' as after macro expansions we can | |
1198 | * start to parse a new entity from the body of a macro, for instance | |
1199 | * when parsing a define_machine() body, cf standard.h | |
34e49164 C |
1200 | *) |
1201 | let checkpoint = TH.line_of_tok tr.current in | |
485bce71 | 1202 | let checkpoint_file = TH.file_of_tok tr.current in |
34e49164 | 1203 | |
485bce71 | 1204 | (* call the parser *) |
34e49164 | 1205 | let elem = |
708f4980 C |
1206 | let pass1 = |
1207 | Common.profile_code "Parsing: 1st pass" (fun () -> | |
1208 | get_one_elem ~pass:1 tr (file, filelines) | |
1209 | ) in | |
485bce71 C |
1210 | match pass1 with |
1211 | | Left e -> Left e | |
708f4980 C |
1212 | | Right (info,line_err, passed, passed_before_error, cur, exn) -> |
1213 | if !Flag_parsing_c.disable_multi_pass | |
1214 | then pass1 | |
485bce71 | 1215 | else begin |
708f4980 C |
1216 | Common.profile_code "Parsing: multi pass" (fun () -> |
1217 | ||
1218 | pr2_err "parsing pass2: try again"; | |
1219 | let toks = List.rev passed ++ tr.rest in | |
1220 | let new_tr = mk_tokens_state toks in | |
1221 | copy_tokens_state ~src:new_tr ~dst:tr; | |
1222 | let passx = get_one_elem ~pass:2 tr (file, filelines) in | |
1223 | ||
1224 | (match passx with | |
1225 | | Left e -> passx | |
1226 | | Right (info,line_err,passed,passed_before_error,cur,exn) -> | |
1227 | let candidates = | |
1228 | candidate_macros_in_passed passed macros | |
1229 | in | |
1230 | if is_define_passed passed || null candidates | |
1231 | then passx | |
1232 | else begin | |
1233 | (* todo factorize code *) | |
1234 | ||
1235 | pr2_err "parsing pass3: try again"; | |
1236 | let toks = List.rev passed ++ tr.rest in | |
1237 | let toks' = | |
1238 | find_optional_macro_to_expand ~defs:candidates toks in | |
1239 | let new_tr = mk_tokens_state toks' in | |
1240 | copy_tokens_state ~src:new_tr ~dst:tr; | |
1241 | let passx = get_one_elem ~pass:3 tr (file, filelines) in | |
1242 | ||
1243 | (match passx with | |
1244 | | Left e -> passx | |
1245 | | Right (info,line_err,passed,passed_before_error,cur,exn) -> | |
1246 | pr2_err "parsing pass4: try again"; | |
1247 | ||
1248 | let candidates = | |
1249 | candidate_macros_in_passed passed macros in | |
1250 | ||
1251 | let toks = List.rev passed ++ tr.rest in | |
1252 | let toks' = | |
1253 | find_optional_macro_to_expand ~defs:candidates toks in | |
1254 | let new_tr = mk_tokens_state toks' in | |
1255 | copy_tokens_state ~src:new_tr ~dst:tr; | |
1256 | let passx = get_one_elem ~pass:4 tr (file, filelines) in | |
1257 | passx | |
1258 | ) | |
1259 | end | |
1260 | ) | |
1261 | ) | |
485bce71 C |
1262 | end |
1263 | in | |
1264 | ||
1265 | ||
1266 | (* again not sure if checkpoint2 corresponds to end of bad region *) | |
1267 | let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *) | |
1268 | let checkpoint2_file = TH.file_of_tok tr.current in | |
1269 | ||
485bce71 | 1270 | let diffline = |
b1b2de81 | 1271 | if (checkpoint_file =$= checkpoint2_file) && (checkpoint_file =$= file) |
485bce71 C |
1272 | then (checkpoint2 - checkpoint) |
1273 | else 0 | |
1274 | (* TODO? so if error come in middle of something ? where the | |
1275 | * start token was from original file but synchro found in body | |
1276 | * of macro ? then can have wrong number of lines stat. | |
1277 | * Maybe simpler just to look at tr.passed and count | |
1278 | * the lines in the token from the correct file ? | |
1279 | *) | |
34e49164 | 1280 | in |
34e49164 C |
1281 | let info = mk_info_item file (List.rev tr.passed) in |
1282 | ||
485bce71 C |
1283 | (* some stat updates *) |
1284 | stat.Stat.commentized <- | |
1285 | stat.Stat.commentized + count_lines_commentized (snd info); | |
1286 | ||
1287 | let elem = | |
1288 | match elem with | |
91eba41f C |
1289 | | Left e -> |
1290 | stat.Stat.correct <- stat.Stat.correct + diffline; | |
1291 | e | |
708f4980 C |
1292 | | Right (info_of_bads, line_error, toks_of_bads, |
1293 | _passed_before_error, cur, exn) -> | |
1294 | ||
1295 | let was_define = is_define_passed tr.passed in | |
1296 | ||
1297 | if was_define && !Flag_parsing_c.filter_msg_define_error | |
1298 | then () | |
1299 | else begin | |
1300 | ||
1301 | (match exn with | |
1302 | | Lexer_c.Lexical _ | |
1303 | | Parsing.Parse_error | |
1304 | | Semantic_c.Semantic _ -> () | |
1305 | | e -> raise e | |
1306 | ); | |
1307 | ||
1308 | if !Flag_parsing_c.show_parsing_error | |
1309 | then begin | |
1310 | (match exn with | |
1311 | (* Lexical is not anymore launched I think *) | |
1312 | | Lexer_c.Lexical s -> | |
1313 | pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok cur) | |
1314 | | Parsing.Parse_error -> | |
1315 | pr2 ("parse error \n = " ^ error_msg_tok cur) | |
1316 | | Semantic_c.Semantic (s, i) -> | |
1317 | pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok cur) | |
1318 | | e -> raise Impossible | |
1319 | ); | |
1320 | (* bugfix: *) | |
1321 | if (checkpoint_file =$= checkpoint2_file) && | |
1322 | checkpoint_file =$= file | |
1323 | then print_bad line_error (checkpoint, checkpoint2) filelines | |
1324 | else pr2 "PB: bad: but on tokens not from original file" | |
1325 | end; | |
1326 | ||
1327 | ||
1328 | let pbline = | |
1329 | toks_of_bads | |
1330 | +> Common.filter (TH.is_same_line_or_close line_error) | |
1331 | +> Common.filter TH.is_ident_like | |
1332 | in | |
1333 | let error_info = | |
1334 | (pbline +> List.map TH.str_of_tok), line_error | |
1335 | in | |
1336 | stat.Stat.problematic_lines <- | |
1337 | error_info::stat.Stat.problematic_lines; | |
1338 | ||
1339 | end; | |
1340 | ||
91eba41f C |
1341 | if was_define && !Flag_parsing_c.filter_define_error |
1342 | then stat.Stat.correct <- stat.Stat.correct + diffline | |
1343 | else stat.Stat.bad <- stat.Stat.bad + diffline; | |
1344 | ||
485bce71 C |
1345 | Ast_c.NotParsedCorrectly info_of_bads |
1346 | in | |
34e49164 C |
1347 | |
1348 | (match elem with | |
1349 | | Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)] | |
485bce71 | 1350 | | xs -> (xs, info):: loop tr (* recurse *) |
34e49164 C |
1351 | ) |
1352 | in | |
485bce71 | 1353 | let v = loop tr in |
34e49164 C |
1354 | let v = consistency_checking v in |
1355 | (v, stat) | |
1356 | ||
1357 | ||
485bce71 C |
1358 | let time_total_parsing a = |
1359 | Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a) | |
1360 | ||
34e49164 | 1361 | let parse_print_error_heuristic a = |
485bce71 C |
1362 | Common.profile_code "C parsing" (fun () -> time_total_parsing a) |
1363 | ||
34e49164 C |
1364 | |
1365 | (* alias *) | |
1366 | let parse_c_and_cpp a = parse_print_error_heuristic a | |
1367 | ||
1368 | (*****************************************************************************) | |
1369 | (* Same but faster cos memoize stuff *) | |
1370 | (*****************************************************************************) | |
1371 | let parse_cache file = | |
1372 | if not !Flag_parsing_c.use_cache then parse_print_error_heuristic file | |
1373 | else | |
485bce71 | 1374 | let _ = pr2 "TOFIX" in |
34e49164 C |
1375 | let need_no_changed_files = |
1376 | (* should use Sys.argv.(0), would be safer. *) | |
485bce71 C |
1377 | |
1378 | [ | |
1379 | (* TOFIX | |
1380 | Config.path ^ "/parsing_c/c_parser.cma"; | |
1381 | (* we may also depend now on the semantic patch because | |
1382 | the SP may use macro and so we will disable some of the | |
1383 | macro expansions from standard.h. | |
1384 | *) | |
1385 | !Config.std_h; | |
1386 | *) | |
34e49164 C |
1387 | ] |
1388 | in | |
1389 | let need_no_changed_variables = | |
1390 | (* could add some of the flags of flag_parsing_c.ml *) | |
1391 | [] | |
1392 | in | |
1393 | Common.cache_computation_robust | |
1394 | file ".ast_raw" | |
1395 | (need_no_changed_files, need_no_changed_variables) ".depend_raw" | |
1396 | (fun () -> parse_print_error_heuristic file) | |
1397 | ||
1398 | ||
1399 | ||
1400 | (*****************************************************************************) | |
485bce71 | 1401 | (* Some special cases *) |
34e49164 C |
1402 | (*****************************************************************************) |
1403 | ||
485bce71 | 1404 | let (cstatement_of_string: string -> Ast_c.statement) = fun s -> |
708f4980 C |
1405 | let tmpfile = Common.new_temp_file "cocci_stmt_of_s" "c" in |
1406 | Common.write_file tmpfile ("void main() { \n" ^ s ^ "\n}"); | |
1407 | let program = parse_c_and_cpp tmpfile +> fst in | |
485bce71 C |
1408 | program +> Common.find_some (fun (e,_) -> |
1409 | match e with | |
1410 | | Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st | |
1411 | | _ -> None | |
1412 | ) | |
1413 | ||
1414 | let (cexpression_of_string: string -> Ast_c.expression) = fun s -> | |
708f4980 C |
1415 | let tmpfile = Common.new_temp_file "cocci_expr_of_s" "c" in |
1416 | Common.write_file tmpfile ("void main() { \n" ^ s ^ ";\n}"); | |
1417 | let program = parse_c_and_cpp tmpfile +> fst in | |
485bce71 C |
1418 | program +> Common.find_some (fun (e,_) -> |
1419 | match e with | |
1420 | | Ast_c.Definition ({Ast_c.f_body = compound},_) -> | |
1421 | (match compound with | |
708f4980 C |
1422 | | [Ast_c.StmtElem st] -> |
1423 | (match Ast_c.unwrap_st st with | |
1424 | | Ast_c.ExprStatement (Some e) -> Some e | |
1425 | | _ -> None | |
1426 | ) | |
485bce71 C |
1427 | | _ -> None |
1428 | ) | |
1429 | | _ -> None | |
1430 | ) |