permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / comment_annotater_c.ml
CommitLineData
0708f913
C
1(* Yoann Padioleau
2 *
ae4735db
C
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2009, University of Urbana Champaign.
0708f913
C
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
0708f913
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16open Common
17
18module T = Token_c
19
20
21(*****************************************************************************)
22(* Prelude *)
23(*****************************************************************************)
24
25(* A trimmed down version of my comment_annotater of CComment. In CComment
ae4735db 26 * I was also trying to associate the comment to the relevant entity, not
0708f913
C
27 * just the closest token (e.g. a function comment is not placed next to the
28 * identifier of the function but before its return type or storage).
29 *)
30
31
32(*****************************************************************************)
33(* Helpers *)
34(*****************************************************************************)
35
ae4735db 36let is_comment_or_space_or_stuff tok =
0708f913
C
37 Token_helpers.is_not_in_ast tok && Token_helpers.is_origin tok
38
39(* coupling with token_helpers.is_not_in_ast, and of course with tokens_c.ml *)
ae4735db 40let convert_relevant_tokens x =
0708f913
C
41 assert (Token_helpers.is_origin x);
42
ae4735db
C
43 match x with
44 | Parser_c.TCommentSpace info ->
0708f913 45 Token_c.TCommentSpace, (Ast_c.parse_info_of_info info)
ae4735db 46 | Parser_c.TCommentNewline info ->
0708f913
C
47 Token_c.TCommentNewline, (Ast_c.parse_info_of_info info)
48
49 | Parser_c.TComment info ->
50 Token_c.TComment, (Ast_c.parse_info_of_info info)
51
52 (* the passed tokens because of our limited handling of cpp *)
ae4735db 53 | Parser_c.TCommentCpp(cppcommentkind, info) ->
0708f913
C
54 Token_c.TCommentCpp cppcommentkind, (Ast_c.parse_info_of_info info)
55
abad11c5 56 | _ -> raise (Impossible 61)
0708f913
C
57
58
59(*****************************************************************************)
60(* Main entry *)
61(*****************************************************************************)
62
ae4735db 63(* right now we just add comment-like and origin-tok tokens,
0708f913
C
64 * as explained in token_c.ml.
65 *
66 * This simplified comment_annotater (compared to CComment) is really
67 * simple as the tokens and the Ast_c.info in the asts actually share
68 * the same refs.
ae4735db 69 * So, modifying fields in the tokens will also modify the info in
0708f913
C
70 * the ast. Sometimes side effects simplify programming ...
71 * We use similar tricks in unparse_c.ml. So really the asts argument
72 * is not needed.
ae4735db
C
73 *
74 * ex: C1 C2 T1 T2 C3 C4 T3 C5 T4.
0708f913
C
75 * => infoT1(-C1C2,+), infoT2(-,+C3C4), infoT3(-C3C4,+C5), infoT4(-C5,+)
76 *)
77
78(*
ae4735db 79let (agglomerate_either:
0708f913
C
80 ('a, 'a) Common.either list -> ('a list, 'a list) Common.either list) = fun xs ->
81 raise Todo
82
ae4735db
C
83let (span_and_pack:
84 ('a -> ('a, 'a) Common.either) -> 'a list ->
85 ('a list, 'a list) Common.either list) = fun f_either xs ->
0708f913
C
86 let xs' = List.map f_either xs in
87 agglomerate_either xs'
88*)
89
90
ae4735db 91(* the asts is not really used, we do all via side effect on the tokens,
0708f913
C
92 * which share the info reference with the elements in the ast.
93 *)
ae4735db 94let annotate_program toks asts =
0708f913
C
95 (* Common.exclude_but_keep_attached gather all comments before a
96 * token and then associates to this token those comments. Note that
97 * if reverse the list of tokens then this function can also be used
ae4735db 98 * to gather all the comments after a token :)
0708f913
C
99 *)
100
101 (* before phase *)
ae4735db 102 let toks_with_before =
0708f913 103 Common.exclude_but_keep_attached is_comment_or_space_or_stuff
ae4735db 104 toks
0708f913
C
105 in
106
107 (* after phase. trick: reverse the tokens and reuse previous func *)
108 let toks_with_after =
109 List.rev
110 (List.map
111 (function (x,l) -> (x,List.rev l))
ae4735db 112 (Common.exclude_but_keep_attached is_comment_or_space_or_stuff
0708f913
C
113 (List.rev toks)))
114 in
115
116 (* merge *)
b1b2de81 117 assert(List.length toks_with_after =|= List.length toks_with_before);
0708f913 118
ae4735db
C
119 Common.zip toks_with_before toks_with_after
120 +> List.iter (fun ((t1, before), (t2, after)) ->
0708f913 121
b1b2de81 122 assert(t1 =*= t2);
0708f913
C
123
124 let before' = before +> List.map convert_relevant_tokens in
125 let after' = after +> List.map convert_relevant_tokens in
126
127 let info = Token_helpers.info_of_tok t1 in
128 info.Ast_c.comments_tag :=
129 { Ast_c.mbefore = before';
130 Ast_c.mafter = after';
708f4980
C
131 mbefore2 = [];
132 mafter2 = [];
0708f913 133 };
ae4735db 134
0708f913 135 );
ae4735db 136 (* modified via side effect. I return it just to have a
0708f913
C
137 * clean signature.
138 *)
139 asts
ae4735db
C
140
141
142
143