765b6b879c228a0a6c52df847ec5baa31f6fd45a
[bpt/coccinelle.git] / docs / manual / cocci_syntax.tex
1
2 %\section{The SmPL Grammar}
3
4 % This section presents the SmPL grammar. This definition follows closely
5 % our implementation using the Menhir parser generator \cite{menhir}.
6
7 This document presents the grammar of the SmPL language used by the
8 \href{http://coccinelle.lip6.fr/}{Coccinelle tool}. For the most
9 part, the grammar is written using standard notation. In some rules,
10 however, the left-hand side is in all uppercase letters. These are
11 macros, which take one or more grammar rule right-hand-sides as
12 arguments. The grammar also uses some unspecified nonterminals, such
13 as \T{id}, \T{const}, etc. These refer to the sets suggested by
14 the name, {\em i.e.}, \T{id} refers to the set of possible
15 C-language identifiers, while \T{const} refers to the set of
16 possible C-language constants.
17 %
18 \ifhevea
19 A PDF version of this documentation is available at
20 \url{http://coccinelle.lip6.fr/docs/main_grammar.pdf}.
21 \else
22 A HTML version of this documentation is available online at
23 \url{http://coccinelle.lip6.fr/docs/main_grammar.html}.
24 \fi
25
26 \section{Program}
27
28 \begin{grammar}
29 \RULE{\rt{program}}
30 \CASE{\any{\NT{include\_cocci}} \some{\NT{changeset}}}
31
32 \RULE{\rt{include\_cocci}}
33 \CASE{using \NT{string}}
34 \CASE{using \NT{pathToIsoFile}}
35 \CASE{virtual \T{id} \ANY{, \T{id}}}
36
37 \RULE{\rt{changeset}}
38 \CASE{\NT{metavariables} \NT{transformation}}
39 \CASE{\NT{script\_metavariables} \T{script\_code}}
40 % \CASE{\NT{metavariables} \ANY{--- filename +++ filename} \NT{transformation}}
41 \end{grammar}
42
43 \noindent
44 \T{script\_code} is any code in the chosen scripting language. Parsing of
45 the semantic patch does not check the validity of this code; any errors are
46 first detected when the code is executed. Furthermore, \texttt{@} should
47 not be use in this code. Spatch scans the script code for the next
48 \texttt{@} and considers that to be the beginning of the next rule, even if
49 \texttt{@} occurs within e.g., a string or a comment.
50
51 \texttt{virtual} keyword is used to declare virtual rules. Virtual
52 rules may be subsequently used as a dependency for the rules in the
53 SmPL file. Whether a virtual rule is defined or not is controlled by
54 the \texttt{-D} option on the command line.
55
56 % Between the metavariables and the transformation rule, there can be a
57 % specification of constraints on the names of the old and new files,
58 % analogous to the filename specifications in the standard patch syntax.
59 % (see Figure \ref{scsiglue_patch}).
60
61 \section{Metavariables for transformations}
62
63 The \NT{rulename} portion of the metavariable declaration can specify
64 properties of a rule such as its name, the names of the rules that it
65 depends on, the isomorphisms to be used in processing the rule, and whether
66 quantification over paths should be universal or existential. The optional
67 annotation {\tt expression} indicates that the pattern is to be considered
68 as matching an expression, and thus can be used to avoid some parsing
69 problems.
70
71 The \NT{metadecl} portion of the metavariable declaration defines various
72 types of metavariables that will be used for matching in the transformation
73 section.
74
75 \begin{grammar}
76 \RULE{\rt{metavariables}}
77 \CASE{@@ \any{\NT{metadecl}} @@}
78 \CASE{@ \NT{rulename} @ \any{\NT{metadecl}} @@}
79
80 \RULE{\rt{rulename}}
81 \CASE{\T{id} \OPT{extends \T{id}} \OPT{depends on \NT{dep}} \opt{\NT{iso}}
82 \opt{\NT{disable-iso}} \opt{\NT{exists}} \opt{expression}}
83
84 \RULE{\rt{dep}}
85 \CASE{\NT{pnrule}}
86 \CASE{\NT{dep} \&\& \NT{dep}}
87 \CASE{\NT{dep} || \NT{dep}}
88
89 \RULE{\rt{pnrule}}
90 \CASE{\T{id}}
91 \CASE{!\T{id}}
92 \CASE{ever \T{id}}
93 \CASE{never \T{id}}
94 \CASE{(\NT{dep})}
95
96 \RULE{\rt{iso}}
97 \CASE{using \NT{string} \ANY{, \NT{string}}}
98
99 \RULE{\rt{disable-iso}}
100 \CASE{disable \NT{COMMA\_LIST}\mth{(}\T{id}\mth{)}}
101
102 \RULE{\rt{exists}}
103 \CASE{exists}
104 \CASE{forall}
105 % \CASE{\opt{reverse} forall}
106
107 \RULE{\rt{COMMA\_LIST}\mth{(}\rt{elem}\mth{)}}
108 \CASE{\NT{elem} \ANY{, \NT{elem}}}
109 \end{grammar}
110
111 The keyword \KW{disable} is normally used with the names of
112 isomorphisms defined in standard.iso or whatever isomorphism file has been
113 included. There are, however, some other isomorphisms that are built into
114 the implementation of Coccinelle and that can be disabled as well. Their
115 names are given below. In each case, the text describes the standard
116 behavior. Using \NT{disable-iso} with the given name disables this behavior.
117
118 \begin{itemize}
119 \item \KW{optional\_storage}: A SmPL function definition that does not
120 specify any visibility (i.e., static or extern), or a SmPL variable
121 declaration that does not specify any storage (i.e., auto, static,
122 register, or extern), matches a function declaration or variable
123 declaration with any visibility or storage, respectively.
124 \item \KW{optional\_qualifier}: This is similar to \KW{optional\_storage},
125 except that here is it the qualifier (i.e., const or volatile) that does
126 not have to be specified in the SmPL code, but may be present in the C code.
127 \item \KW{value\_format}: Integers in various formats, e.g., 1 and 0x1, are
128 considered to be equivalent in the matching process.
129 \item \KW{comm\_assoc}: An expression of the form \NT{exp} \NT{bin\_op}
130 \KW{...}, where \NT{bin\_op} is commutative and associative, is
131 considered to match any top-level sequence of \NT{bin\_op} operators
132 containing \NT{exp} as the top-level argument.
133 \end{itemize}
134
135 The possible types of metavariable declarations are defined by the grammar
136 rule below. Metavariables should occur at least once in the transformation
137 immediately following their declaration. Fresh metavariables must only be
138 used in {\tt +} code. These properties are not expressed in the grammar,
139 but are checked by a subsequent analysis. The metavariables are designated
140 according to the kind of terms they can match, such as a statement, an
141 identifier, or an expression. An expression metavariable can be further
142 constrained by its type. A declaration metavariable matches the
143 declaration of one or more variables, all sharing the same type
144 specification ({\em e.g.}, {\tt int a,b,c=3;}). A field metavariable does
145 the same, but for structure fields.
146
147 \begin{grammar}
148 \RULE{\rt{metadecl}}
149 \CASE{metavariable \NT{ids} ;}
150 \CASE{fresh identifier \NT{ids} ;}
151 \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
152 \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_virt\_or\_not\_eq}\mth{)} ;}
153 \CASE{parameter \opt{list} \NT{ids} ;}
154 \CASE{parameter list [ \NT{id} ] \NT{ids} ;}
155 \CASE{parameter list [ \NT{const} ] \NT{ids} ;}
156 \CASE{type \NT{ids} ;}
157 \CASE{statement \opt{list} \NT{ids} ;}
158 \CASE{declaration \opt{list} \NT{ids} ;}
159 \CASE{field \opt{list} \NT{ids} ;}
160 \CASE{typedef \NT{ids} ;}
161 \CASE{declarer name \NT{ids} ;}
162 % \CASE{\opt{local} function \NT{pmid\_with\_not\_eq\_list} ;}
163 \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
164 \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
165 \CASE{iterator name \NT{ids} ;}
166 \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
167 \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
168 % \CASE{error \NT{pmid\_with\_not\_eq\_list} ; }
169 \CASE{\opt{local} idexpression \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
170 \CASE{\opt{local} idexpression \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
171 \CASE{\opt{local} idexpression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
172 \CASE{expression list \NT{ids} ;}
173 \CASE{expression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
174 \CASE{expression enum \any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
175 \CASE{expression struct \any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
176 \CASE{expression union \any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
177 \CASE{expression \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
178 \CASE{expression list [ \NT{id} ] \NT{ids} ;}
179 \CASE{expression list [ \NT{const} ] \NT{ids} ;}
180 \CASE{\NT{ctype} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
181 \CASE{\NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
182 \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
183 \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
184 \CASE{constant \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
185 \CASE{constant \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
186 \CASE{position \opt{any} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq\_mid}\mth{)} ;}
187 \end{grammar}
188
189 It is possible to specify that an expression list or a parameter list
190 metavariable should match a specific number of expressions or parameters.
191
192 \begin{grammar}
193 \RULE{\rt{ids}}
194 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{pmid}\mth{)}}
195
196 \RULE{\rt{pmid}}
197 \CASE{\T{id}}
198 \CASE{\NT{mid}}
199 % \CASE{list}
200 % \CASE{error}
201 % \CASE{type}
202
203 \RULE{\rt{mid}} \CASE{\T{rulename\_id}.\T{id}}
204
205 \RULE{\rt{pmid\_with\_regexp}}
206 \CASE{\NT{pmid} \~{}= \NT{regexp}}
207 \CASE{\NT{pmid} !\~{}= \NT{regexp}}
208
209 \RULE{\rt{pmid\_with\_not\_eq}}
210 \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_meta}}}
211 \CASE{\NT{pmid}
212 \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_meta}\mth{)} \ttrb}}
213
214 \RULE{\rt{pmid\_with\_virt\_or\_not\_eq}}
215 \CASE{virtual.\T{id}}
216 \CASE{\NT{pmid\_with\_not\_eq}}
217
218 \RULE{\rt{pmid\_with\_not\_ceq}}
219 \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_cst}}}
220 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_cst}\mth{)} \ttrb}}
221
222 \RULE{\rt{id\_or\_cst}}
223 \CASE{\T{id}}
224 \CASE{\T{integer}}
225
226 \RULE{\rt{id\_or\_meta}}
227 \CASE{\T{id}}
228 \CASE{\T{rulename\_id}.\T{id}}
229
230 \RULE{\rt{pmid\_with\_not\_eq\_mid}}
231 \CASE{\NT{pmid} \OPT{!= \NT{mid}}}
232 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{mid}\mth{)} \ttrb}}
233 \end{grammar}
234
235 Subsequently, we refer to arbitrary metavariables as
236 \mth{\msf{metaid}^{\mbox{\scriptsize{\it{ty}}}}}, where {\it{ty}}
237 indicates the {\it metakind} used in the declaration of the variable.
238 For example, \mth{\msf{metaid}^{\ssf{Type}}} refers to a metavariable
239 that was declared using \texttt{type} and stands for any type.
240
241 {\tt metavariable} declares a metavariable for which the parser tried to
242 figure out the metavariable type based on the usage context. Such a
243 metavariable must be used consistently. These metavariables cannot be used
244 in all contexts; specifically, they cannot be used in context that would
245 make the parsing ambiguous. Some examples are the leftmost term of an
246 expression, such as the left-hand side of an assignment, or the type in a
247 variable declaration. These restrictions may seems somewhat arbitrary from
248 the user's point of view. Thus, it is better to use metavariables with
249 metavariable types. If Coccinelle is given the argument {\tt
250 -parse\_cocci}, it will print information about the type that is inferred
251 for each metavariable.
252
253 The \NT{ctype} and \NT{ctypes} nonterminals are used by both the grammar of
254 metavariable declarations and the grammar of transformations, and are
255 defined on page~\pageref{types}.
256
257 An identifier metavariable with {\tt virtual} as its ``rule name'' is given
258 a value on the command line. For example, if a semantic patch contains a
259 rule that declares an identifier metavariable with the name {\tt
260 virtual.alloc}, then the command line could contain {\tt -D
261 alloc=kmalloc}. There should not be space around the {\tt =}. An
262 example is in {\tt demos/vm.cocci} and {\tt demos/vm.c}.
263
264 \section{Metavariables for scripts}
265
266 Metavariables for scripts can only be inherited from transformation rules.
267 In the spirit of scripting languages such as Python that use dynamic
268 typing, metavariables for scripts do not include type declarations.
269
270 \begin{grammar}
271 \RULE{\rt{script\_metavariables}}
272 \CASE{@ script:\NT{language} \OPT{\NT{rulename}} \OPT{depends on \NT{dep}} @
273 \any{\NT{script\_metadecl}} @@}
274 \CASE{@ initialize:\NT{language} \OPT{depends on \NT{dep}} @}
275 \CASE{@ finalize:\NT{language} \OPT{depends on \NT{dep}} @}
276
277 \RULE{\rt{language}} \CASE{python} \CASE{ocaml}
278
279 \RULE{\rt{script\_metadecl}}
280 \CASE{\T{id} <{}< \T{rulename\_id}.\T{id} ;}
281 \CASE{\T{id} ;}
282 \end{grammar}
283
284 Currently, the only scripting languages that are supported are Python and
285 OCaml, indicated using {\tt python} and {\tt ocaml}, respectively. The
286 set of available scripting languages may be extended at some point.
287
288 Script rules declared with \KW{initialize} are run before the treatment of
289 any file. Script rules declared with \KW{finalize} are run when the
290 treatment of all of the files has completed. There can be at most one of
291 each per scripting language (thus currently at most one of each).
292 Initialize and finalize script rules do not have access to SmPL
293 metavariables. Nevertheless, a finalize script rule can access any
294 variables initialized by the other script rules, allowing information to be
295 transmitted from the matching process to the finalize rule.
296
297 A script metavariable that does not specify an origin, using \texttt{<<},
298 is newly declared by the script. This metavariable should be assigned to a
299 string and can be inherited by subsequent rules as an identifier. In
300 Python, the assignment of such a metavariable $x$ should refer to the
301 metavariable as {\tt coccinelle.\(x\)}. Examples are in the files
302 \texttt{demos/pythontococci.cocci} and \texttt{demos/camltococci.cocci}.
303
304 In an ocaml script, the following extended form of \textit{script\_metadecl}
305 may be used:
306
307 \begin{grammar}
308 \RULE{\rt{script\_metadecl}}
309 \CASE{(\T{id},\T{id}) <{}< \T{rulename\_id}.\T{id} ;}
310 \CASE{\T{id} <{}< \T{rulename\_id}.\T{id} ;}
311 \CASE{\T{id} ;}
312 \end{grammar}
313
314 \noindent
315 In a declaration of the form \texttt{(\T{id},\T{id}) <{}<
316 \T{rulename\_id}.\T{id} ;}, the left component of \texttt{(\T{id},\T{id})}
317 receives a string representation of the value of the inherited metavariable
318 while the right component receives its abstract syntax tree. The file
319 \texttt{parsing\_c/ast\_c.ml} in the Coccinelle implementation gives some
320 information about the structure of the abstract syntax tree. Either the
321 left or right component may be replaced by \verb+_+, indicating that the
322 string representation or abstract syntax trees representation is not
323 wanted, respectively.
324
325 The abstract syntax tree of a metavariable declared using {\tt
326 metavariable} is not available.
327
328 \section{Transformation}
329
330 The transformation specification essentially has the form of C code,
331 except that lines to remove are annotated with \verb+-+ in the first
332 column, and lines to add are annotated with \verb-+-. A
333 transformation specification can also use {\em dots}, ``\verb-...-'',
334 describing an arbitrary sequence of function arguments or instructions
335 within a control-flow path. Dots may be modified with a {\tt when}
336 clause, indicating a pattern that should not occur anywhere within the
337 matched sequence. Finally, a transformation can specify a disjunction
338 of patterns, of the form \mtt{( \mth{\mita{pat}_1} | \mita{\ldots} |
339 \mth{\mita{pat}_n} )} where each \texttt{(}, \texttt{|} or
340 \texttt{)} is in column 0 or preceded by \texttt{\textbackslash}.
341
342 The grammar that we present for the transformation is not actually the
343 grammar of the SmPL code that can be written by the programmer, but is
344 instead the grammar of the slice of this consisting of the {\tt -}
345 annotated and the unannotated code (the context of the transformed lines),
346 or the {\tt +} annotated code and the unannotated code. For example, for
347 parsing purposes, the following transformation
348 %presented in Section \ref{sec:seq2}
349 is split into the two variants shown below and each is parsed
350 separately.
351
352 \begin{center}
353 \begin{tabular}{c}
354 \begin{lstlisting}[language=Cocci]
355 proc_info_func(...) {
356 <...
357 @-- hostno
358 @++ hostptr->host_no
359 ...>
360 }
361 \end{lstlisting}\\
362 \end{tabular}
363 \end{center}
364
365 {%\sizecodebis
366 \begin{center}
367 \begin{tabular}{p{5cm}p{3cm}p{5cm}}
368 \begin{lstlisting}[language=Cocci]
369 proc_info_func(...) {
370 <...
371 @-- hostno
372 ...>
373 }
374 \end{lstlisting}
375 &&
376 \begin{lstlisting}[language=Cocci]
377 proc_info_func(...) {
378 <...
379 @++ hostptr->host_no
380 ...>
381 }
382 \end{lstlisting}
383 \end{tabular}
384 \end{center}
385 }
386
387 \noindent
388 Requiring that both slices parse correctly ensures that the rule matches
389 syntactically valid C code and that it produces syntactically valid C code.
390 The generated parse trees are then merged for use in the subsequent
391 matching and transformation process.
392
393 The grammar for the minus or plus slice of a transformation is as follows:
394
395 \begin{grammar}
396
397 \RULE{\rt{transformation}}
398 \CASE{\some{\NT{include}}}
399 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
400 \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
401 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{fundecl}, \NT{when}\mth{)}}
402
403 \RULE{\rt{include}}
404 \CASE{\#include \T{include\_string}}
405
406 % \RULE{\rt{fun\_decl\_stmt}}
407 % \CASE{\NT{decl\_stmt}}
408 % \CASE{\NT{fundecl}}
409
410 % \CASE{\NT{ctype}}
411 % \CASE{\ttlb \NT{initialize\_list} \ttrb}
412 % \CASE{\NT{toplevel\_seq\_start\_after\_dots\_init}}
413 %
414 % \RULE{\rt{toplevel\_seq\_start\_after\_dots\_init}}
415 % \CASE{\NT{stmt\_dots} \NT{toplevel\_after\_dots}}
416 % \CASE{\NT{expr} \opt{\NT{toplevel\_after\_exp}}}
417 % \CASE{\NT{decl\_stmt\_expr} \opt{\NT{toplevel\_after\_stmt}}}
418 %
419 % \RULE{\rt{stmt\_dots}}
420 % \CASE{... \any{\NT{when}}}
421 % \CASE{<... \any{\NT{when}} \NT{nest\_after\_dots} ...>}
422 % \CASE{<+... \any{\NT{when}} \NT{nest\_after\_dots} ...+>}
423
424 \RULE{\rt{when}}
425 \CASE{when != \NT{when\_code}}
426 \CASE{when = \NT{rule\_elem\_stmt}}
427 \CASE{when \NT{COMMA\_LIST}\mth{(}\NT{any\_strict}\mth{)}}
428 \CASE{when true != \NT{expr}}
429 \CASE{when false != \NT{expr}}
430
431 \RULE{\rt{when\_code}}
432 \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
433 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
434
435 \RULE{\rt{rule\_elem\_stmt}}
436 \CASE{\NT{one\_decl}}
437 \CASE{\NT{expr};}
438 \CASE{return \opt{\NT{expr}};}
439 \CASE{break;}
440 \CASE{continue;}
441 \CASE{\bs(\NT{rule\_elem\_stmt} \SOME{\bs| \NT{rule\_elem\_stmt}}\bs)}
442
443 \RULE{\rt{any\_strict}}
444 \CASE{any}
445 \CASE{strict}
446 \CASE{forall}
447 \CASE{exists}
448
449 % \RULE{\rt{nest\_after\_dots}}
450 % \CASE{\NT{decl\_stmt\_exp} \opt{\NT{nest\_after\_stmt}}}
451 % \CASE{\opt{\NT{exp}} \opt{\NT{nest\_after\_exp}}}
452 %
453 % \RULE{\rt{nest\_after\_stmt}}
454 % \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
455 % \CASE{\NT{decl\_stmt} \opt{\NT{nest\_after\_stmt}}}
456 %
457 % \RULE{\rt{nest\_after\_exp}}
458 % \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
459 %
460 % \RULE{\rt{toplevel\_after\_dots}}
461 % \CASE{\opt{\NT{toplevel\_after\_exp}}}
462 % \CASE{\NT{exp} \opt{\NT{toplevel\_after\_exp}}}
463 % \CASE{\NT{decl\_stmt\_expr} \NT{toplevel\_after\_stmt}}
464 %
465 % \RULE{\rt{toplevel\_after\_exp}}
466 % \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
467 %
468 % \RULE{\rt{decl\_stmt\_expr}}
469 % \CASE{TMetaStmList$^\ddag$}
470 % \CASE{\NT{decl\_var}}
471 % \CASE{\NT{stmt}}
472 % \CASE{(\NT{stmt\_seq} \ANY{| \NT{stmt\_seq}})}
473 %
474 % \RULE{\rt{toplevel\_after\_stmt}}
475 % \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
476 % \CASE{\NT{decl\_stmt} \NT{toplevel\_after\_stmt}}
477
478 \end{grammar}
479
480 \begin{grammar}
481 \RULE{\rt{OPTDOTSEQ}\mth{(}\rt{grammar\_ds}, \rt{when\_ds}\mth{)}}
482 \CASE{}\multicolumn{3}{r}{\hspace{1cm}
483 \KW{\opt{... \opt{\NT{when\_ds}}} \NT{grammar\_ds}
484 \ANY{... \opt{\NT{when\_ds}} \NT{grammar\_ds}}
485 \opt{... \opt{\NT{when\_ds}}}}
486 }
487
488 % \CASE{\opt{... \opt{\NT{when\_ds}}} \NT{grammar}
489 % \ANY{... \opt{\NT{when\_ds}} \NT{grammar}}
490 % \opt{... \opt{\NT{when\_ds}}}}
491 % \CASE{<... \any{\NT{when\_ds}} \NT{grammar} ...>}
492 % \CASE{<+... \any{\NT{when\_ds}} \NT{grammar} ...+>}
493
494 \end{grammar}
495
496 \noindent
497 Lines may be annotated with an element of the set $\{\mtt{-}, \mtt{+},
498 \mtt{*}\}$ or the singleton $\mtt{?}$, or one of each set. \mtt{?}
499 represents at most one match of the given pattern. \mtt{*} is used for
500 semantic match, \emph{i.e.}, a pattern that highlights the fragments
501 annotated with \mtt{*}, but does not perform any modification of the
502 matched code. \mtt{*} cannot be mixed with \mtt{-} and \mtt{+}. There are
503 some constraints on the use of these annotations:
504 \begin{itemize}
505 \item Dots, {\em i.e.} \texttt{...}, cannot occur on a line marked
506 \texttt{+}.
507 \item Nested dots, {\em i.e.}, dots enclosed in {\tt <} and {\tt >}, cannot
508 occur on a line with any marking.
509 \end{itemize}
510
511 Each element of a disjunction must be a proper term like an
512 expression, a statement, an identifier or a declaration. Thus, the
513 rule on the left below is not a syntactically correct SmPL rule. One may
514 use the rule on the right instead.
515
516 \begin{center}
517 \begin{tabular}{l@{\hspace{5cm}}r}
518 \begin{lstlisting}[language=Cocci]
519 @@
520 type T;
521 T b;
522 @@
523
524 (
525 writeb(...,
526 |
527 readb(
528 )
529 @--(T)
530 b)
531 \end{lstlisting}
532 &
533 \begin{lstlisting}[language=Cocci]
534 @@
535 type T;
536 T b;
537 @@
538
539 (
540 read
541 |
542 write
543 )
544 (...,
545 @-- (T)
546 b)
547 \end{lstlisting}
548 \\
549 \end{tabular}
550 \end{center}
551
552 \section{Types}
553 \label{types}
554
555 \begin{grammar}
556
557 \RULE{\rt{ctypes}}
558 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{ctype}\mth{)}}
559
560 \RULE{\rt{ctype}}
561 \CASE{\opt{\NT{const\_vol}} \NT{generic\_ctype} \any{*}}
562 \CASE{\opt{\NT{const\_vol}} void \some{*}}
563 \CASE{(\NT{ctype} \ANY{| \NT{ctype}})}
564
565 \RULE{\rt{const\_vol}}
566 \CASE{const}
567 \CASE{volatile}
568
569 \RULE{\rt{generic\_ctype}}
570 \CASE{\NT{ctype\_qualif}}
571 \CASE{\opt{\NT{ctype\_qualif}} char}
572 \CASE{\opt{\NT{ctype\_qualif}} short}
573 \CASE{\opt{\NT{ctype\_qualif}} int}
574 \CASE{\opt{\NT{ctype\_qualif}} long}
575 \CASE{\opt{\NT{ctype\_qualif}} long long}
576 \CASE{double}
577 \CASE{float}
578 \CASE{size\_t} \CASE{ssize\_t} \CASE{ptrdiff\_t}
579 \CASE{enum \NT{id} \{ \NT{PARAMSEQ}\mth{(}\NT{dot\_expr}, \NT{exp\_whencode}\mth{)} \OPT{,} \}}
580 \CASE{\OPT{struct\OR union} \T{id} \OPT{\{ \any{\NT{struct\_decl\_list}} \}}}
581
582 \RULE{\rt{ctype\_qualif}}
583 \CASE{unsigned}
584 \CASE{signed}
585
586 \RULE{\rt{struct\_decl\_list}}
587 \CASE{\NT{struct\_decl\_list\_start}}
588
589 \RULE{\rt{struct\_decl\_list\_start}}
590 \CASE{\NT{struct\_decl}}
591 \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
592 \CASE{... \opt{when != \NT{struct\_decl}}$^\dag$ \opt{\NT{continue\_struct\_decl\_list}}}
593
594 \RULE{\rt{continue\_struct\_decl\_list}}
595 \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
596 \CASE{\NT{struct\_decl}}
597
598 \RULE{\rt{struct\_decl}}
599 \CASE{\NT{ctype} \NT{d\_ident};}
600 \CASE{\NT{fn\_ctype} (* \NT{d\_ident}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)});)}
601 \CASE{\opt{\NT{const\_vol}} \T{id} \NT{d\_ident};}
602
603 \RULE{\rt{d\_ident}}
604 \CASE{\T{id} \any{[\opt{\NT{expr}}]}}
605
606 \RULE{\rt{fn\_ctype}}
607 \CASE{\NT{generic\_ctype} \any{*}}
608 \CASE{void \any{*}}
609
610 \RULE{\rt{name\_opt\_decl}}
611 \CASE{\NT{decl}}
612 \CASE{\NT{ctype}}
613 \CASE{\NT{fn\_ctype}}
614 \end{grammar}
615
616 $^\dag$ The optional \texttt{when} construct ends at the end of the line.
617
618 \section{Function declarations}
619
620 \begin{grammar}
621
622 \RULE{\rt{fundecl}}
623 \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
624 (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}})
625 \ttlb~\opt{\NT{stmt\_seq}} \ttrb}
626
627 \RULE{\rt{funproto}}
628 \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
629 (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}});}
630
631 \RULE{\rt{funinfo}}
632 \CASE{inline}
633 \CASE{\NT{storage}}
634 % \CASE{\NT{attr}}
635
636 \RULE{\rt{storage}}
637 \CASE{static}
638 \CASE{auto}
639 \CASE{register}
640 \CASE{extern}
641
642 \RULE{\rt{funid}}
643 \CASE{\T{id}}
644 \CASE{\mth{\T{metaid}^{\ssf{Id}}}}
645 % \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
646 % \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
647
648 \RULE{\rt{param}}
649 \CASE{\NT{type} \T{id}}
650 \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
651 \CASE{\mth{\T{metaid}^{\ssf{ParamList}}}}
652
653 \RULE{\rt{decl}}
654 \CASE{\NT{ctype} \NT{id}}
655 \CASE{\NT{fn\_ctype} (* \NT{id}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)})}
656 \CASE{void}
657 \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
658 \end{grammar}
659
660 \begin{grammar}
661 \RULE{\rt{PARAMSEQ}\mth{(}\rt{gram\_p}, \rt{when\_p}\mth{)}}
662 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{gram\_p} \OR \ldots \opt{\NT{when\_p}}\mth{)}}
663 \end{grammar}
664
665 To match a function it is not necessary to provide all of the annotations
666 that appear before the function name. For example, the following semantic
667 patch:
668
669 \begin{lstlisting}[language=Cocci]
670 @@
671 @@
672
673 foo() { ... }
674 \end{lstlisting}
675
676 \noindent
677 matches a function declared as follows:
678
679 \begin{lstlisting}[language=C]
680 static int foo() { return 12; }
681 \end{lstlisting}
682
683 \noindent
684 This behavior can be turned off by disabling the \KW{optional\_storage}
685 isomorphism. If one adds code before a function declaration, then the
686 effect depends on the kind of code that is added. If the added code is a
687 function definition or CPP code, then the new code is placed before
688 all information associated with the function definition, including any
689 comments preceeding the function definition. On the other hand, if the new
690 code is associated with the function, such as the addition of the keyword
691 {\tt static}, the new code is placed exactly where it appears with respect
692 to the rest of the function definition in the semantic patch. For example,
693
694 \begin{lstlisting}[language=Cocci]
695 @@
696 @@
697
698 + static
699 foo() { ... }
700 \end{lstlisting}
701
702 \noindent
703 causes static to be placed just before the function name. The following
704 causes it to be placed just before the type
705
706 \begin{lstlisting}[language=Cocci]
707 @@
708 type T;
709 @@
710
711 + static
712 T foo() { ... }
713 \end{lstlisting}
714
715 \noindent
716 It may be necessary to consider several cases to ensure that the added ode
717 is placed in the right position. For example, one may need one pattern
718 that considers that the function is declared {\tt inline} and another that
719 considers that it is not.
720
721 %\newpage
722
723 \section{Declarations}
724
725 \begin{grammar}
726 \RULE{\rt{decl\_var}}
727 % \CASE{\NT{type} \opt{\NT{id} \opt{[\opt{\NT{dot\_expr}}]}
728 % \ANY{, \NT{id} \opt{[ \opt{\NT{dot\_expr}}]}}};}
729 \CASE{\NT{common\_decl}}
730 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
731 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
732 \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) = \NT{initialize} ;}
733 \CASE{typedef \NT{ctype} \NT{typedef\_ident} ;}
734
735 \RULE{\rt{one\_decl}}
736 \CASE{\NT{common\_decl}}
737 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{id};}
738 % \CASE{\NT{storage} \NT{ctype} \NT{id} \opt{[\opt{\NT{dot\\_expr}}]} = \NT{nest\\_expr};}
739 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} ;}
740
741 \RULE{\rt{common\_decl}}
742 \CASE{\NT{ctype};}
743 \CASE{\NT{funproto}}
744 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{d\_ident} = \NT{initialize} ;}
745 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} = \NT{initialize} ;}
746 \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) ;}
747 \CASE{\NT{decl\_ident} ( \OPT{\NT{COMMA\_LIST}\mth{(}\NT{expr}\mth{)}} ) ;}
748
749 \RULE{\rt{initialize}}
750 \CASE{\NT{dot\_expr}}
751 \CASE{\mth{\T{metaid}^{\ssf{Initialiser}}}}
752 \CASE{\ttlb~\opt{\NT{COMMA\_LIST}\mth{(}\NT{dot\_expr}\mth{)}}~\ttrb}
753
754 \RULE{\rt{init\_list\_elem}}
755 \CASE{\NT{dot\_expr}}
756 \CASE{\NT{designator} = \NT{dot\_expr}}
757 \CASE{\NT{id} : \NT{dot\_expr}}
758
759 \RULE{\rt{designator}}
760 \CASE{. \NT{id}}
761 \CASE{[ \NT{dot\_expr} ]}
762 \CASE{[ \NT{dot\_expr} ... \NT{dot\_expr} ]}
763
764 \RULE{\rt{decl\_ident}}
765 \CASE{\T{DeclarerId}}
766 \CASE{\mth{\T{metaid}^{\ssf{Declarer}}}}
767 \end{grammar}
768
769 \section{Statements}
770
771 The first rule {\em statement} describes the various forms of a statement.
772 The remaining rules implement the constraints that are sensitive to the
773 context in which the statement occurs: {\em single\_statement} for a
774 context in which only one statement is allowed, and {\em decl\_statement}
775 for a context in which a declaration, statement, or sequence thereof is
776 allowed.
777
778 \begin{grammar}
779 \RULE{\rt{stmt}}
780 \CASE{\NT{include}}
781 \CASE{\mth{\T{metaid}^{\ssf{Stmt}}}}
782 \CASE{\NT{expr};}
783 \CASE{if (\NT{dot\_expr}) \NT{single\_stmt} \opt{else \NT{single\_stmt}}}
784 \CASE{for (\opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}})
785 \NT{single\_stmt}}
786 \CASE{while (\NT{dot\_expr}) \NT{single\_stmt}}
787 \CASE{do \NT{single\_stmt} while (\NT{dot\_expr});}
788 \CASE{\NT{iter\_ident} (\any{\NT{dot\_expr}}) \NT{single\_stmt}}
789 \CASE{switch (\opt{\NT{dot\_expr}}) \ttlb \any{\NT{case\_line}} \ttrb}
790 \CASE{return \opt{\NT{dot\_expr}};}
791 \CASE{\ttlb~\opt{\NT{stmt\_seq}} \ttrb}
792 \CASE{\NT{NEST}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
793 \CASE{\NT{NEST}\mth{(}\NT{expr}, \NT{when}\mth{)}}
794 \CASE{break;}
795 \CASE{continue;}
796 \CASE{\NT{id}:}
797 \CASE{goto \NT{id};}
798 \CASE{\ttlb \NT{stmt\_seq} \ttrb}
799
800 \RULE{\rt{single\_stmt}}
801 \CASE{\NT{stmt}}
802 \CASE{\NT{OR}\mth{(}\NT{stmt}\mth{)}}
803
804 \RULE{\rt{decl\_stmt}}
805 \CASE{\mth{\T{metaid}^{\ssf{StmtList}}}}
806 \CASE{\NT{decl\_var}}
807 \CASE{\NT{stmt}}
808 \CASE{\NT{OR}\mth{(}\NT{stmt\_seq}\mth{)}}
809
810 \RULE{\rt{stmt\_seq}}
811 \CASE{\any{\NT{decl\_stmt}}
812 \opt{\NT{DOTSEQ}\mth{(}\some{\NT{decl\_stmt}},
813 \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
814 \CASE{\any{\NT{decl\_stmt}}
815 \opt{\NT{DOTSEQ}\mth{(}\NT{expr},
816 \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
817
818 \RULE{\rt{case\_line}}
819 \CASE{default :~\NT{stmt\_seq}}
820 \CASE{case \NT{dot\_expr} :~\NT{stmt\_seq}}
821
822 \RULE{\rt{iter\_ident}}
823 \CASE{\T{IteratorId}}
824 \CASE{\mth{\T{metaid}^{\ssf{Iterator}}}}
825 \end{grammar}
826
827 \begin{grammar}
828 \RULE{\rt{OR}\mth{(}\rt{gram\_o}\mth{)}}
829 \CASE{( \NT{gram\_o} \ANY{\ttmid \NT{gram\_o}})}
830
831 \RULE{\rt{DOTSEQ}\mth{(}\rt{gram\_d}, \rt{when\_d}\mth{)}}
832 \CASE{\ldots \opt{\NT{when\_d}} \ANY{\NT{gram\_d} \ldots \opt{\NT{when\_d}}}}
833
834 \RULE{\rt{NEST}\mth{(}\rt{gram\_n}, \rt{when\_n}\mth{)}}
835 \CASE{<\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots>}
836 \CASE{<+\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots+>}
837 \end{grammar}
838
839 \noindent
840 OR is a macro that generates a disjunction of patterns. The three
841 tokens \T{(}, \T{\ttmid}, and \T{)} must appear in the leftmost
842 column, to differentiate them from the parentheses and bit-or tokens
843 that can appear within expressions (and cannot appear in the leftmost
844 column). These token may also be preceded by \texttt{\bs}
845 when they are used in an other column. These tokens are furthermore
846 different from (, \(\mid\), and ), which are part of the grammar
847 metalanguage.
848
849 \section{Expressions}
850
851 A nest or a single ellipsis is allowed in some expression contexts, and
852 causes ambiguity in others. For example, in a sequence \mtt{\ldots
853 \mita{expr} \ldots}, the nonterminal \mita{expr} must be instantiated as an
854 explicit C-language expression, while in an array reference,
855 \mtt{\mth{\mita{expr}_1} \mtt{[} \mth{\mita{expr}_2} \mtt{]}}, the
856 nonterminal \mth{\mita{expr}_2}, because it is delimited by brackets, can
857 be also instantiated as \mtt{\ldots}, representing an arbitrary expression. To
858 distinguish between the various possibilities, we define three nonterminals
859 for expressions: {\em expr} does not allow either top-level nests or
860 ellipses, {\em nest\_expr} allows a nest but not an ellipsis, and {\em
861 dot\_expr} allows both. The EXPR macro is used to express these variants
862 in a concise way.
863
864 \begin{grammar}
865 \RULE{\rt{expr}}
866 \CASE{\NT{EXPR}\mth{(}\NT{expr}\mth{)}}
867
868 \RULE{\rt{nest\_expr}}
869 \CASE{\NT{EXPR}\mth{(}\NT{nest\_expr}\mth{)}}
870 \CASE{\NT{NEST}\mth{(}\NT{nest\_expr}, \NT{exp\_whencode}\mth{)}}
871
872 \RULE{\rt{dot\_expr}}
873 \CASE{\NT{EXPR}\mth{(}\NT{dot\_expr}\mth{)}}
874 \CASE{\NT{NEST}\mth{(}\NT{dot\_expr}, \NT{exp\_whencode}\mth{)}}
875 \CASE{...~\opt{\NT{exp\_whencode}}}
876
877 \RULE{\rt{EXPR}\mth{(}\rt{exp}\mth{)}}
878 \CASE{\NT{exp} \NT{assign\_op} \NT{exp}}
879 \CASE{\NT{exp}++}
880 \CASE{\NT{exp}--}
881 \CASE{\NT{unary\_op} \NT{exp}}
882 \CASE{\NT{exp} \NT{bin\_op} \NT{exp}}
883 \CASE{\NT{exp} ?~\NT{dot\_expr} :~\NT{exp}}
884 \CASE{(\NT{type}) \NT{exp}}
885 \CASE{\NT{exp} [\NT{dot\_expr}]}
886 \CASE{\NT{exp} .~\NT{id}}
887 \CASE{\NT{exp} -> \NT{id}}
888 \CASE{\NT{exp}(\opt{\NT{PARAMSEQ}\mth{(}\NT{arg}, \NT{exp\_whencode}\mth{)}})}
889 \CASE{\NT{id}}
890 % \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
891 % \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
892 \CASE{\mth{\T{metaid}^{\ssf{Exp}}}}
893 % \CASE{\mth{\T{metaid}^{\ssf{Err}}}}
894 \CASE{\mth{\T{metaid}^{\ssf{Const}}}}
895 \CASE{\NT{const}}
896 \CASE{(\NT{dot\_expr})}
897 \CASE{\NT{OR}\mth{(}\NT{exp}\mth{)}}
898
899 \RULE{\rt{arg}}
900 \CASE{\NT{nest\_expr}}
901 \CASE{\mth{\T{metaid}^{\ssf{ExpList}}}}
902
903 \RULE{\rt{exp\_whencode}}
904 \CASE{when != \NT{expr}}
905
906 \RULE{\rt{assign\_op}}
907 \CASE{= \OR -= \OR += \OR *= \OR /= \OR \%=}
908 \CASE{\&= \OR |= \OR \caret= \OR \lt\lt= \OR \gt\gt=}
909
910 \RULE{\rt{bin\_op}}
911 \CASE{* \OR / \OR \% \OR + \OR -}
912 \CASE{\lt\lt \OR \gt\gt \OR \caret\xspace \OR \& \OR \ttmid}
913 \CASE{< \OR > \OR <= \OR >= \OR == \OR != \OR \&\& \OR \ttmid\ttmid}
914
915 \RULE{\rt{unary\_op}}
916 \CASE{++ \OR -- \OR \& \OR * \OR + \OR - \OR !}
917
918 \end{grammar}
919
920 \section{Constant, Identifiers and Types for Transformations}
921
922 \begin{grammar}
923 \RULE{\rt{const}}
924 \CASE{\NT{string}}
925 \CASE{[0-9]+}
926 \CASE{\mth{\cdots}}
927
928 \RULE{\rt{string}}
929 \CASE{"\any{[\^{}"]}"}
930
931 \RULE{\rt{id}}
932 \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Id}}}}
933
934 \RULE{\rt{typedef\_ident}}
935 \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Type}}}}
936
937 \RULE{\rt{type}}
938 \CASE{\NT{ctype} \OR \mth{\T{metaid}^{\ssf{Type}}}}
939
940 \RULE{\rt{pathToIsoFile}}
941 \CASE{<.*>}
942
943 \RULE{\rt{regexp}}
944 \CASE{"\any{[\^{}"]}"}
945 \end{grammar}
946
947
948 %%% Local Variables:
949 %%% mode: LaTeX
950 %%% TeX-master: "main_grammar"
951 %%% coding: utf-8
952 %%% TeX-PDF-mode: t
953 %%% ispell-local-dictionary: "american"
954 %%% End: