Release coccinelle-0.2.4rc1
[bpt/coccinelle.git] / docs / manual / cocci_syntax.tex
CommitLineData
faf9a90c
C
1
2%\section{The SmPL Grammar}
3
4% This section presents the SmPL grammar. This definition follows closely
5% our implementation using the Menhir parser generator \cite{menhir}.
6
7This document presents the grammar of the SmPL language used by the
7f004419 8\href{http://coccinelle.lip6.fr/}{Coccinelle tool}. For the most
faf9a90c
C
9part, the grammar is written using standard notation. In some rules,
10however, the left-hand side is in all uppercase letters. These are
11macros, which take one or more grammar rule right-hand-sides as
12arguments. The grammar also uses some unspecified nonterminals, such
b1b2de81
C
13as \T{id}, \T{const}, etc. These refer to the sets suggested by
14the name, {\em i.e.}, \T{id} refers to the set of possible
15C-language identifiers, while \T{const} refers to the set of
978fd7e5 16possible C-language constants.
708f4980 17%
978fd7e5 18\ifhevea
708f4980 19A PDF version of this documentation is available at
951c7801 20\url{http://coccinelle.lip6.fr/docs/main_grammar.pdf}.
708f4980 21\else
faf9a90c 22A HTML version of this documentation is available online at
951c7801 23\url{http://coccinelle.lip6.fr/docs/main_grammar.html}.
708f4980 24\fi
faf9a90c 25
faf9a90c
C
26\section{Program}
27
28\begin{grammar}
29 \RULE{\rt{program}}
30 \CASE{\any{\NT{include\_cocci}} \some{\NT{changeset}}}
31
32 \RULE{\rt{include\_cocci}}
33 \CASE{using \NT{string}}
34 \CASE{using \NT{pathToIsoFile}}
5636bb2c 35 \CASE{virtual \T{id} \ANY{, \T{id}}}
faf9a90c
C
36
37 \RULE{\rt{changeset}}
38 \CASE{\NT{metavariables} \NT{transformation}}
b1b2de81 39 \CASE{\NT{script\_metavariables} \T{script\_code}}
faf9a90c 40% \CASE{\NT{metavariables} \ANY{--- filename +++ filename} \NT{transformation}}
faf9a90c
C
41\end{grammar}
42
b1b2de81
C
43\noindent
44\T{script\_code} is any code in the chosen scripting language. Parsing of
45the semantic patch does not check the validity of this code; any errors are
978fd7e5
C
46first detected when the code is executed. Furthermore, \texttt{@} should
47not be use in this code. Spatch scans the script code for the next
48\texttt{@} and considers that to be the beginning of the next rule, even if
49\texttt{@} occurs within e.g., a string or a comment.
b1b2de81 50
5636bb2c
C
51\texttt{virtual} keyword is used to declare virtual rules. Virtual
52rules may be subsequently used as a dependency for the rules in the
53SmPL file. Whether a virtual rule is defined or not is controlled by
54the \texttt{-D} option on the command line.
55
faf9a90c
C
56% Between the metavariables and the transformation rule, there can be a
57% specification of constraints on the names of the old and new files,
58% analogous to the filename specifications in the standard patch syntax.
59% (see Figure \ref{scsiglue_patch}).
60
b1b2de81 61\section{Metavariables for transformations}
faf9a90c
C
62
63The \NT{rulename} portion of the metavariable declaration can specify
64properties of a rule such as its name, the names of the rules that it
65depends on, the isomorphisms to be used in processing the rule, and whether
66quantification over paths should be universal or existential. The optional
67annotation {\tt expression} indicates that the pattern is to be considered
68as matching an expression, and thus can be used to avoid some parsing
69problems.
70
71The \NT{metadecl} portion of the metavariable declaration defines various
72types of metavariables that will be used for matching in the transformation
73section.
74
75\begin{grammar}
76 \RULE{\rt{metavariables}}
77 \CASE{@@ \any{\NT{metadecl}} @@}
78 \CASE{@ \NT{rulename} @ \any{\NT{metadecl}} @@}
79
80 \RULE{\rt{rulename}}
81 \CASE{\T{id} \OPT{extends \T{id}} \OPT{depends on \NT{dep}} \opt{\NT{iso}}
82 \opt{\NT{disable-iso}} \opt{\NT{exists}} \opt{expression}}
b1b2de81 83
faf9a90c
C
84 \RULE{\rt{dep}}
85 \CASE{\NT{pnrule}}
86 \CASE{\NT{dep} \&\& \NT{dep}}
87 \CASE{\NT{dep} || \NT{dep}}
88
89 \RULE{\rt{pnrule}}
90 \CASE{\T{id}}
91 \CASE{!\T{id}}
92 \CASE{ever \T{id}}
93 \CASE{never \T{id}}
94 \CASE{(\NT{dep})}
95
96 \RULE{\rt{iso}}
97 \CASE{using \NT{string} \ANY{, \NT{string}}}
98
99 \RULE{\rt{disable-iso}}
100 \CASE{disable \NT{COMMA\_LIST}\mth{(}\T{id}\mth{)}}
101
102 \RULE{\rt{exists}}
103 \CASE{exists}
104 \CASE{forall}
105% \CASE{\opt{reverse} forall}
106
107 \RULE{\rt{COMMA\_LIST}\mth{(}\rt{elem}\mth{)}}
108 \CASE{\NT{elem} \ANY{, \NT{elem}}}
109\end{grammar}
110
b1b2de81 111The keyword \KW{disable} is normally used with the names of
faf9a90c
C
112isomorphisms defined in standard.iso or whatever isomorphism file has been
113included. There are, however, some other isomorphisms that are built into
114the implementation of Coccinelle and that can be disabled as well. Their
413ffc02 115names are given below. In each case, the text describes the standard
faf9a90c
C
116behavior. Using \NT{disable-iso} with the given name disables this behavior.
117
118\begin{itemize}
119\item \KW{optional\_storage}: A SmPL function definition that does not
120 specify any visibility (i.e., static or extern), or a SmPL variable
121 declaration that does not specify any storage (i.e., auto, static,
122 register, or extern), matches a function declaration or variable
123 declaration with any visibility or storage, respectively.
124\item \KW{optional\_qualifier}: This is similar to \KW{optional\_storage},
125 except that here is it the qualifier (i.e., const or volatile) that does
126 not have to be specified in the SmPL code, but may be present in the C code.
127\item \KW{value\_format}: Integers in various formats, e.g., 1 and 0x1, are
128 considered to be equivalent in the matching process.
129\item \KW{comm\_assoc}: An expression of the form \NT{exp} \NT{bin\_op}
130 \KW{...}, where \NT{bin\_op} is commutative and associative, is
131 considered to match any top-level sequence of \NT{bin\_op} operators
132 containing \NT{exp} as the top-level argument.
133\end{itemize}
134
135The possible types of metavariable declarations are defined by the grammar
136rule below. Metavariables should occur at least once in the transformation
137immediately following their declaration. Fresh metavariables must only be
138used in {\tt +} code. These properties are not expressed in the grammar,
139but are checked by a subsequent analysis. The metavariables are designated
140according to the kind of terms they can match, such as a statement, an
141identifier, or an expression. An expression metavariable can be further
413ffc02
C
142constrained by its type. A declaration metavariable matches the
143declaration of one or more variables, all sharing the same type
144specification ({\em e.g.}, {\tt int a,b,c=3;}). A field metavariable does
145the same, but for structure fields.
faf9a90c
C
146
147\begin{grammar}
148 \RULE{\rt{metadecl}}
149 \CASE{fresh identifier \NT{ids} ;}
951c7801 150 \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
ae4735db 151 \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_virt\_or\_not\_eq}\mth{)} ;}
faf9a90c
C
152 \CASE{parameter \opt{list} \NT{ids} ;}
153 \CASE{parameter list [ \NT{id} ] \NT{ids} ;}
88e71198 154 \CASE{parameter list [ \NT{const} ] \NT{ids} ;}
faf9a90c
C
155 \CASE{type \NT{ids} ;}
156 \CASE{statement \opt{list} \NT{ids} ;}
413ffc02
C
157 \CASE{declaration \opt{list} \NT{ids} ;}
158 \CASE{field \opt{list} \NT{ids} ;}
faf9a90c
C
159 \CASE{typedef \NT{ids} ;}
160 \CASE{declarer name \NT{ids} ;}
161% \CASE{\opt{local} function \NT{pmid\_with\_not\_eq\_list} ;}
951c7801 162 \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
faf9a90c
C
163 \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
164 \CASE{iterator name \NT{ids} ;}
951c7801 165 \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
faf9a90c
C
166 \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
167% \CASE{error \NT{pmid\_with\_not\_eq\_list} ; }
168 \CASE{\opt{local} idexpression \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
169 \CASE{\opt{local} idexpression \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
170 \CASE{\opt{local} idexpression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
171 \CASE{expression list \NT{ids} ;}
172 \CASE{expression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
173 \CASE{expression \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
88e71198
C
174 \CASE{expression list [ \NT{id} ] \NT{ids} ;}
175 \CASE{expression list [ \NT{const} ] \NT{ids} ;}
faf9a90c
C
176 \CASE{\NT{ctype} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
177 \CASE{\NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
178 \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
179 \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
180 \CASE{constant \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
181 \CASE{constant \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
182 \CASE{position \opt{any} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq\_mid}\mth{)} ;}
183\end{grammar}
184
88e71198
C
185It is possible to specify that an expression list or a parameter list
186metavariable should match a specific number of expressions or parameters.
187
faf9a90c
C
188\begin{grammar}
189 \RULE{\rt{ids}}
190 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{pmid}\mth{)}}
191
192 \RULE{\rt{pmid}}
193 \CASE{\T{id}}
194 \CASE{\NT{mid}}
195% \CASE{list}
196% \CASE{error}
197% \CASE{type}
198
199 \RULE{\rt{mid}} \CASE{\T{rulename\_id}.\T{id}}
200
951c7801
C
201 \RULE{\rt{pmid\_with\_regexp}}
202 \CASE{\NT{pmid} \~{}= \NT{regexp}}
203
faf9a90c 204 \RULE{\rt{pmid\_with\_not\_eq}}
5636bb2c
C
205 \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_meta}}}
206 \CASE{\NT{pmid}
207 \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_meta}\mth{)} \ttrb}}
faf9a90c 208
55d38388 209 \RULE{\rt{pmid\_with\_virt\_or\_not\_eq}}
ae4735db 210 \CASE{virtual.\T{id}}
55d38388
C
211 \CASE{\NT{pmid\_with\_not\_eq}}
212
213 \RULE{\rt{pmid\_with\_not\_ceq}}
faf9a90c
C
214 \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_cst}}}
215 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_cst}\mth{)} \ttrb}}
216
217 \RULE{\rt{id\_or\_cst}}
218 \CASE{\T{id}}
219 \CASE{\T{integer}}
220
5636bb2c
C
221 \RULE{\rt{id\_or\_meta}}
222 \CASE{\T{id}}
223 \CASE{\T{rulename\_id}.\T{id}}
224
faf9a90c
C
225 \RULE{\rt{pmid\_with\_not\_eq\_mid}}
226 \CASE{\NT{pmid} \OPT{!= \NT{mid}}}
227 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{mid}\mth{)} \ttrb}}
228\end{grammar}
229
230Subsequently, we refer to arbitrary metavariables as
231\mth{\msf{metaid}^{\mbox{\scriptsize{\it{ty}}}}}, where {\it{ty}}
232indicates the {\it metakind} used in the declaration of the variable.
233For example, \mth{\msf{metaid}^{\ssf{Type}}} refers to a metavariable
234that was declared using \texttt{type} and stands for any type.
235
236The \NT{ctype} and \NT{ctypes} nonterminals are used by both the grammar of
237metavariable declarations and the grammar of transformations, and are
238defined on page~\pageref{types}.
239
ae4735db
C
240An identifier metavariable with {\tt virtual} as its ``rule name'' is given
241a value on the command line. For example, if a semantic patch contains a
242rule that declares an identifier metavariable with the name {\tt
243 virtual.alloc}, then the command line could contain {\tt -D
244 alloc=kmalloc}. There should not be space around the {\tt =}. An
245example is in {\tt demos/vm.cocci} and {\tt demos/vm.c}.
246
b1b2de81
C
247\section{Metavariables for scripts}
248
249Metavariables for scripts can only be inherited from transformation rules.
250In the spirit of scripting languages such as Python that use dynamic
251typing, metavariables for scripts do not include type declarations.
252
253\begin{grammar}
254 \RULE{\rt{script\_metavariables}}
413ffc02 255 \CASE{@ script:\NT{language} \OPT{\NT{rulename}} \OPT{depends on \NT{dep}} @
b1b2de81 256 \any{\NT{script\_metadecl}} @@}
5636bb2c
C
257 \CASE{@ initialize:\NT{language} \OPT{depends on \NT{dep}} @}
258 \CASE{@ finalize:\NT{language} \OPT{depends on \NT{dep}} @}
b1b2de81 259
413ffc02 260 \RULE{\rt{language}} \CASE{python} \CASE{ocaml}
b1b2de81 261
413ffc02
C
262 \RULE{\rt{script\_metadecl}}
263 \CASE{\T{id} <{}< \T{rulename\_id}.\T{id} ;}
264 \CASE{\T{id} ;}
b1b2de81
C
265\end{grammar}
266
174d1640
C
267Currently, the only scripting languages that are supported are Python and
268OCaml, indicated using {\tt python} and {\tt ocaml}, respectively. The
b1b2de81
C
269set of available scripting languages may be extended at some point.
270
271Script rules declared with \KW{initialize} are run before the treatment of
272any file. Script rules declared with \KW{finalize} are run when the
273treatment of all of the files has completed. There can be at most one of
274each per scripting language (thus currently at most one of each).
275Initialize and finalize script rules do not have access to SmPL
276metavariables. Nevertheless, a finalize script rule can access any
277variables initialized by the other script rules, allowing information to be
278transmitted from the matching process to the finalize rule.
279
413ffc02
C
280A script metavariable that does not specify an origin, using \texttt{<<},
281is newly declared by the script. This metavariable should be assigned to a
282string and can be inherited by subsequent rules as an identifier. In
283Python, the assignment of such a metavariable $x$ should refer to the
284metavariable as {\tt coccinelle.\(x\)}. Examples are in the files
285\texttt{demos/pythontococci.cocci} and \texttt{demos/camltococci.cocci}.
286
287In an ocaml script, the following extended form of \textit{script\_metadecl}
288may be used:
289
290\begin{grammar}
291 \RULE{\rt{script\_metadecl}}
292 \CASE{(\T{id},\T{id}) <{}< \T{rulename\_id}.\T{id} ;}
293 \CASE{\T{id} <{}< \T{rulename\_id}.\T{id} ;}
294 \CASE{\T{id} ;}
295\end{grammar}
296
297\noindent
298In a declaration of the form \texttt{(\T{id},\T{id}) <{}<
299 \T{rulename\_id}.\T{id} ;}, the left component of \texttt{(\T{id},\T{id})}
300receives a string representation of the value of the inherited metavariable
301while the right component receives its abstract syntax tree. The file
302\texttt{parsing\_c/ast\_c.ml} in the Coccinelle implementation gives some
303information about the structure of the abstract syntax tree. Either the
304left or right component may be replaced by \verb+_+, indicating that the
305string representation or abstract syntax trees representation is not
306wanted, respectively.
307
faf9a90c
C
308\section{Transformation}
309
310The transformation specification essentially has the form of C code,
311except that lines to remove are annotated with \verb+-+ in the first
312column, and lines to add are annotated with \verb-+-. A
313transformation specification can also use {\em dots}, ``\verb-...-'',
314describing an arbitrary sequence of function arguments or instructions
315within a control-flow path. Dots may be modified with a {\tt when}
316clause, indicating a pattern that should not occur anywhere within the
317matched sequence. Finally, a transformation can specify a disjunction
318of patterns, of the form \mtt{( \mth{\mita{pat}_1} | \mita{\ldots} |
319 \mth{\mita{pat}_n} )} where each \texttt{(}, \texttt{|} or
320\texttt{)} is in column 0 or preceded by \texttt{\textbackslash}.
321
322The grammar that we present for the transformation is not actually the
323grammar of the SmPL code that can be written by the programmer, but is
324instead the grammar of the slice of this consisting of the {\tt -}
325annotated and the unannotated code (the context of the transformed lines),
326or the {\tt +} annotated code and the unannotated code. For example, for
327parsing purposes, the following transformation
328%presented in Section \ref{sec:seq2}
329is split into the two variants shown below and each is parsed
330separately.
331
332\begin{center}
333\begin{tabular}{c}
334\begin{lstlisting}[language=Cocci]
335 proc_info_func(...) {
336 <...
337@-- hostno
338@++ hostptr->host_no
339 ...>
340 }
341\end{lstlisting}\\
342\end{tabular}
343\end{center}
344
345{%\sizecodebis
346\begin{center}
347\begin{tabular}{p{5cm}p{3cm}p{5cm}}
348\begin{lstlisting}[language=Cocci]
349 proc_info_func(...) {
350 <...
351@-- hostno
352 ...>
353 }
354\end{lstlisting}
355&&
356\begin{lstlisting}[language=Cocci]
357 proc_info_func(...) {
358 <...
359@++ hostptr->host_no
360 ...>
361 }
362\end{lstlisting}
363\end{tabular}
364\end{center}
365}
366
367\noindent
368Requiring that both slices parse correctly ensures that the rule matches
369syntactically valid C code and that it produces syntactically valid C code.
370The generated parse trees are then merged for use in the subsequent
371matching and transformation process.
372
373The grammar for the minus or plus slice of a transformation is as follows:
374
375\begin{grammar}
376
377 \RULE{\rt{transformation}}
378 \CASE{\some{\NT{include}}}
379 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
380 \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
381 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{fundecl}, \NT{when}\mth{)}}
382
383 \RULE{\rt{include}}
384 \CASE{\#include \T{include\_string}}
385
386% \RULE{\rt{fun\_decl\_stmt}}
387% \CASE{\NT{decl\_stmt}}
388% \CASE{\NT{fundecl}}
389
390% \CASE{\NT{ctype}}
391% \CASE{\ttlb \NT{initialize\_list} \ttrb}
392% \CASE{\NT{toplevel\_seq\_start\_after\_dots\_init}}
393%
394% \RULE{\rt{toplevel\_seq\_start\_after\_dots\_init}}
395% \CASE{\NT{stmt\_dots} \NT{toplevel\_after\_dots}}
396% \CASE{\NT{expr} \opt{\NT{toplevel\_after\_exp}}}
397% \CASE{\NT{decl\_stmt\_expr} \opt{\NT{toplevel\_after\_stmt}}}
398%
399% \RULE{\rt{stmt\_dots}}
400% \CASE{... \any{\NT{when}}}
401% \CASE{<... \any{\NT{when}} \NT{nest\_after\_dots} ...>}
402% \CASE{<+... \any{\NT{when}} \NT{nest\_after\_dots} ...+>}
403
404 \RULE{\rt{when}}
405 \CASE{when != \NT{when\_code}}
406 \CASE{when = \NT{rule\_elem\_stmt}}
407 \CASE{when \NT{COMMA\_LIST}\mth{(}\NT{any\_strict}\mth{)}}
408 \CASE{when true != \NT{expr}}
409 \CASE{when false != \NT{expr}}
410
411 \RULE{\rt{when\_code}}
412 \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
413 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
414
415 \RULE{\rt{rule\_elem\_stmt}}
416 \CASE{\NT{one\_decl}}
417 \CASE{\NT{expr};}
418 \CASE{return \opt{\NT{expr}};}
419 \CASE{break;}
420 \CASE{continue;}
421 \CASE{\bs(\NT{rule\_elem\_stmt} \SOME{\bs| \NT{rule\_elem\_stmt}}\bs)}
422
423 \RULE{\rt{any\_strict}}
424 \CASE{any}
425 \CASE{strict}
426 \CASE{forall}
427 \CASE{exists}
428
429% \RULE{\rt{nest\_after\_dots}}
430% \CASE{\NT{decl\_stmt\_exp} \opt{\NT{nest\_after\_stmt}}}
431% \CASE{\opt{\NT{exp}} \opt{\NT{nest\_after\_exp}}}
432%
433% \RULE{\rt{nest\_after\_stmt}}
434% \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
435% \CASE{\NT{decl\_stmt} \opt{\NT{nest\_after\_stmt}}}
436%
437% \RULE{\rt{nest\_after\_exp}}
438% \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
439%
440% \RULE{\rt{toplevel\_after\_dots}}
441% \CASE{\opt{\NT{toplevel\_after\_exp}}}
442% \CASE{\NT{exp} \opt{\NT{toplevel\_after\_exp}}}
443% \CASE{\NT{decl\_stmt\_expr} \NT{toplevel\_after\_stmt}}
444%
445% \RULE{\rt{toplevel\_after\_exp}}
446% \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
447%
448% \RULE{\rt{decl\_stmt\_expr}}
449% \CASE{TMetaStmList$^\ddag$}
450% \CASE{\NT{decl\_var}}
451% \CASE{\NT{stmt}}
452% \CASE{(\NT{stmt\_seq} \ANY{| \NT{stmt\_seq}})}
453%
454% \RULE{\rt{toplevel\_after\_stmt}}
455% \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
456% \CASE{\NT{decl\_stmt} \NT{toplevel\_after\_stmt}}
457
458\end{grammar}
459
460\begin{grammar}
461 \RULE{\rt{OPTDOTSEQ}\mth{(}\rt{grammar\_ds}, \rt{when\_ds}\mth{)}}
462 \CASE{}\multicolumn{3}{r}{\hspace{1cm}
463 \KW{\opt{... \opt{\NT{when\_ds}}} \NT{grammar\_ds}
464 \ANY{... \opt{\NT{when\_ds}} \NT{grammar\_ds}}
465 \opt{... \opt{\NT{when\_ds}}}}
466 }
467
468% \CASE{\opt{... \opt{\NT{when\_ds}}} \NT{grammar}
469% \ANY{... \opt{\NT{when\_ds}} \NT{grammar}}
470% \opt{... \opt{\NT{when\_ds}}}}
471% \CASE{<... \any{\NT{when\_ds}} \NT{grammar} ...>}
472% \CASE{<+... \any{\NT{when\_ds}} \NT{grammar} ...+>}
473
474\end{grammar}
475
476\noindent
477Lines may be annotated with an element of the set $\{\mtt{-}, \mtt{+},
478\mtt{*}\}$ or the singleton $\mtt{?}$, or one of each set. \mtt{?}
479represents at most one match of the given pattern. \mtt{*} is used for
480semantic match, \emph{i.e.}, a pattern that highlights the fragments
481annotated with \mtt{*}, but does not perform any modification of the
482matched code. \mtt{*} cannot be mixed with \mtt{-} and \mtt{+}. There are
483some constraints on the use of these annotations:
484\begin{itemize}
485\item Dots, {\em i.e.} \texttt{...}, cannot occur on a line marked
486 \texttt{+}.
487\item Nested dots, {\em i.e.}, dots enclosed in {\tt <} and {\tt >}, cannot
488 occur on a line with any marking.
489\end{itemize}
490
0708f913
C
491Each element of a disjunction must be a proper term like an
492expression, a statement, an identifier or a declaration. Thus, the
413ffc02 493rule on the left below is not a syntactically correct SmPL rule. One may
0708f913
C
494use the rule on the right instead.
495
496\begin{center}
497 \begin{tabular}{l@{\hspace{5cm}}r}
498\begin{lstlisting}[language=Cocci]
499@@
500type T;
501T b;
502@@
503
504(
505 writeb(...,
506|
507 readb(
508)
509@--(T)
510 b)
511\end{lstlisting}
512 &
513\begin{lstlisting}[language=Cocci]
514@@
515type T;
516T b;
517@@
518
519(
520read
521|
522write
523)
524 (...,
525@-- (T)
526 b)
527\end{lstlisting}
528 \\
529 \end{tabular}
530\end{center}
531
faf9a90c
C
532\section{Types}
533\label{types}
534
535\begin{grammar}
536
537 \RULE{\rt{ctypes}}
538 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{ctype}\mth{)}}
539
540 \RULE{\rt{ctype}}
541 \CASE{\opt{\NT{const\_vol}} \NT{generic\_ctype} \any{*}}
542 \CASE{\opt{\NT{const\_vol}} void \some{*}}
543 \CASE{(\NT{ctype} \ANY{| \NT{ctype}})}
544
545 \RULE{\rt{const\_vol}}
546 \CASE{const}
547 \CASE{volatile}
548
549 \RULE{\rt{generic\_ctype}}
550 \CASE{\NT{ctype\_qualif}}
551 \CASE{\opt{\NT{ctype\_qualif}} char}
552 \CASE{\opt{\NT{ctype\_qualif}} short}
553 \CASE{\opt{\NT{ctype\_qualif}} int}
554 \CASE{\opt{\NT{ctype\_qualif}} long}
555 \CASE{\opt{\NT{ctype\_qualif}} long long}
556 \CASE{double}
557 \CASE{float}
558 \CASE{\OPT{struct\OR union} \T{id} \OPT{\{ \any{\NT{struct\_decl\_list}} \}}}
559
560 \RULE{\rt{ctype\_qualif}}
561 \CASE{unsigned}
562 \CASE{signed}
563
564 \RULE{\rt{struct\_decl\_list}}
565 \CASE{\NT{struct\_decl\_list\_start}}
566
567 \RULE{\rt{struct\_decl\_list\_start}}
568 \CASE{\NT{struct\_decl}}
569 \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
570 \CASE{... \opt{when != \NT{struct\_decl}}$^\dag$ \opt{\NT{continue\_struct\_decl\_list}}}
571
572 \RULE{\rt{continue\_struct\_decl\_list}}
573 \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
574 \CASE{\NT{struct\_decl}}
575
576 \RULE{\rt{struct\_decl}}
577 \CASE{\NT{ctype} \NT{d\_ident};}
578 \CASE{\NT{fn\_ctype} (* \NT{d\_ident}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)});)}
579 \CASE{\opt{\NT{const\_vol}} \T{id} \NT{d\_ident};}
580
581 \RULE{\rt{d\_ident}}
582 \CASE{\NT{id} \any{[\opt{\NT{expr}}]}}
583
584 \RULE{\rt{fn\_ctype}}
585 \CASE{\NT{generic\_ctype} \any{*}}
586 \CASE{void \any{*}}
587
588 \RULE{\rt{name\_opt\_decl}}
589 \CASE{\NT{decl}}
590 \CASE{\NT{ctype}}
591 \CASE{\NT{fn\_ctype}}
592\end{grammar}
593
594$^\dag$ The optional \texttt{when} construct ends at the end of the line.
595
596\section{Function declarations}
597
598\begin{grammar}
599
600 \RULE{\rt{fundecl}}
601 \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
602 (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}})
603 \ttlb~\opt{\NT{stmt\_seq}} \ttrb}
604
605 \RULE{\rt{funproto}}
606 \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
607 (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}});}
608
609 \RULE{\rt{funinfo}}
610 \CASE{inline}
611 \CASE{\NT{storage}}
612% \CASE{\NT{attr}}
613
614 \RULE{\rt{storage}}
615 \CASE{static}
616 \CASE{auto}
617 \CASE{register}
618 \CASE{extern}
619
620 \RULE{\rt{funid}}
621 \CASE{\T{id}}
622 \CASE{\mth{\T{metaid}^{\ssf{Id}}}}
623% \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
624% \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
625
626 \RULE{\rt{param}}
627 \CASE{\NT{type} \T{id}}
628 \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
629 \CASE{\mth{\T{metaid}^{\ssf{ParamList}}}}
630
631 \RULE{\rt{decl}}
632 \CASE{\NT{ctype} \NT{id}}
633 \CASE{\NT{fn\_ctype} (* \NT{id}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)})}
634 \CASE{void}
635 \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
636\end{grammar}
637
638\begin{grammar}
639 \RULE{\rt{PARAMSEQ}\mth{(}\rt{gram\_p}, \rt{when\_p}\mth{)}}
640 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{gram\_p} \OR \ldots \opt{\NT{when\_p}}\mth{)}}
641\end{grammar}
642
90aeb998
C
643To match a function it is not necessary to provide all of the annotations
644that appear before the function name. For example, the following semantic
645patch:
646
647\begin{lstlisting}[language=Cocci]
648@@
649@@
650
651foo() { ... }
652\end{lstlisting}
653
654\noindent
655matches a function declared as follows:
656
657\begin{lstlisting}[language=C]
658static int foo() { return 12; }
659\end{lstlisting}
660
661\noindent
662This behavior can be turned off by disabling the \KW{optional\_storage}
663isomorphism. If one adds code before a function declaration, then the
664effect depends on the kind of code that is added. If the added code is a
665function definition or CPP code, then the new code is placed before
666all information associated with the function definition, including any
667comments preceeding the function definition. On the other hand, if the new
668code is associated with the function, such as the addition of the keyword
669{\tt static}, the new code is placed exactly where it appears with respect
670to the rest of the function definition in the semantic patch. For example,
671
672\begin{lstlisting}[language=Cocci]
673@@
674@@
675
676+ static
677foo() { ... }
678\end{lstlisting}
679
680\noindent
681causes static to be placed just before the function name. The following
682causes it to be placed just before the type
683
684\begin{lstlisting}[language=Cocci]
685@@
686type T;
687@@
688
689+ static
690T foo() { ... }
691\end{lstlisting}
692
693\noindent
413ffc02 694It may be necessary to consider several cases to ensure that the added ode
90aeb998
C
695is placed in the right position. For example, one may need one pattern
696that considers that the function is declared {\tt inline} and another that
697considers that it is not.
698
faf9a90c
C
699%\newpage
700
701\section{Declarations}
702
703\begin{grammar}
704 \RULE{\rt{decl\_var}}
705% \CASE{\NT{type} \opt{\NT{id} \opt{[\opt{\NT{dot\_expr}}]}
706% \ANY{, \NT{id} \opt{[ \opt{\NT{dot\_expr}}]}}};}
707 \CASE{\NT{common\_decl}}
708 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
709 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
710 \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) = \NT{initialize} ;}
711 \CASE{typedef \NT{ctype} \NT{typedef\_ident} ;}
712
713 \RULE{\rt{one\_decl}}
714 \CASE{\NT{common\_decl}}
715 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{id};}
716% \CASE{\NT{storage} \NT{ctype} \NT{id} \opt{[\opt{\NT{dot\\_expr}}]} = \NT{nest\\_expr};}
717 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} ;}
718
719 \RULE{\rt{common\_decl}}
720 \CASE{\NT{ctype};}
721 \CASE{\NT{funproto}}
722 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{d\_ident} = \NT{initialize} ;}
723 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} = \NT{initialize} ;}
724 \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) ;}
725 \CASE{\NT{decl\_ident} ( \OPT{\NT{COMMA\_LIST}\mth{(}\NT{expr}\mth{)}} ) ;}
726
727 \RULE{\rt{initialize}}
728 \CASE{\NT{dot\_expr}}
729 \CASE{\ttlb~\opt{\NT{COMMA\_LIST}\mth{(}\NT{dot\_expr}\mth{)}}~\ttrb}
730
731 \RULE{\rt{decl\_ident}}
732 \CASE{\T{DeclarerId}}
733 \CASE{\mth{\T{metaid}^{\ssf{Declarer}}}}
734\end{grammar}
735
736\section{Statements}
737
738The first rule {\em statement} describes the various forms of a statement.
739The remaining rules implement the constraints that are sensitive to the
740context in which the statement occurs: {\em single\_statement} for a
741context in which only one statement is allowed, and {\em decl\_statement}
742for a context in which a declaration, statement, or sequence thereof is
743allowed.
744
745\begin{grammar}
746 \RULE{\rt{stmt}}
747 \CASE{\NT{include}}
748 \CASE{\mth{\T{metaid}^{\ssf{Stmt}}}}
749 \CASE{\NT{expr};}
750 \CASE{if (\NT{dot\_expr}) \NT{single\_stmt} \opt{else \NT{single\_stmt}}}
751 \CASE{for (\opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}})
752 \NT{single\_stmt}}
753 \CASE{while (\NT{dot\_expr}) \NT{single\_stmt}}
754 \CASE{do \NT{single\_stmt} while (\NT{dot\_expr});}
755 \CASE{\NT{iter\_ident} (\any{\NT{dot\_expr}}) \NT{single\_stmt}}
756 \CASE{switch (\opt{\NT{dot\_expr}}) \ttlb \any{\NT{case\_line}} \ttrb}
757 \CASE{return \opt{\NT{dot\_expr}};}
758 \CASE{\ttlb~\opt{\NT{stmt\_seq}} \ttrb}
759 \CASE{\NT{NEST}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
760 \CASE{\NT{NEST}\mth{(}\NT{expr}, \NT{when}\mth{)}}
761 \CASE{break;}
762 \CASE{continue;}
763 \CASE{\NT{id}:}
764 \CASE{goto \NT{id};}
765 \CASE{\ttlb \NT{stmt\_seq} \ttrb}
766
767 \RULE{\rt{single\_stmt}}
768 \CASE{\NT{stmt}}
769 \CASE{\NT{OR}\mth{(}\NT{stmt}\mth{)}}
770
771 \RULE{\rt{decl\_stmt}}
772 \CASE{\mth{\T{metaid}^{\ssf{StmtList}}}}
773 \CASE{\NT{decl\_var}}
774 \CASE{\NT{stmt}}
775 \CASE{\NT{OR}\mth{(}\NT{stmt\_seq}\mth{)}}
776
777 \RULE{\rt{stmt\_seq}}
778 \CASE{\any{\NT{decl\_stmt}}
779 \opt{\NT{DOTSEQ}\mth{(}\some{\NT{decl\_stmt}},
780 \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
781 \CASE{\any{\NT{decl\_stmt}}
782 \opt{\NT{DOTSEQ}\mth{(}\NT{expr},
783 \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
784
785 \RULE{\rt{case\_line}}
786 \CASE{default :~\NT{stmt\_seq}}
787 \CASE{case \NT{dot\_expr} :~\NT{stmt\_seq}}
788
789 \RULE{\rt{iter\_ident}}
790 \CASE{\T{IteratorId}}
791 \CASE{\mth{\T{metaid}^{\ssf{Iterator}}}}
792\end{grammar}
793
794\begin{grammar}
795 \RULE{\rt{OR}\mth{(}\rt{gram\_o}\mth{)}}
796 \CASE{( \NT{gram\_o} \ANY{\ttmid \NT{gram\_o}})}
797
798 \RULE{\rt{DOTSEQ}\mth{(}\rt{gram\_d}, \rt{when\_d}\mth{)}}
799 \CASE{\ldots \opt{\NT{when\_d}} \ANY{\NT{gram\_d} \ldots \opt{\NT{when\_d}}}}
800
801 \RULE{\rt{NEST}\mth{(}\rt{gram\_n}, \rt{when\_n}\mth{)}}
802 \CASE{<\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots>}
803 \CASE{<+\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots+>}
804\end{grammar}
805
806\noindent
807OR is a macro that generates a disjunction of patterns. The three
808tokens \T{(}, \T{\ttmid}, and \T{)} must appear in the leftmost
809column, to differentiate them from the parentheses and bit-or tokens
810that can appear within expressions (and cannot appear in the leftmost
811column). These token may also be preceded by \texttt{\bs}
812when they are used in an other column. These tokens are furthermore
813different from (, \(\mid\), and ), which are part of the grammar
814metalanguage.
815
816\section{Expressions}
817
818A nest or a single ellipsis is allowed in some expression contexts, and
819causes ambiguity in others. For example, in a sequence \mtt{\ldots
820\mita{expr} \ldots}, the nonterminal \mita{expr} must be instantiated as an
821explicit C-language expression, while in an array reference,
822\mtt{\mth{\mita{expr}_1} \mtt{[} \mth{\mita{expr}_2} \mtt{]}}, the
823nonterminal \mth{\mita{expr}_2}, because it is delimited by brackets, can
824be also instantiated as \mtt{\ldots}, representing an arbitrary expression. To
825distinguish between the various possibilities, we define three nonterminals
826for expressions: {\em expr} does not allow either top-level nests or
827ellipses, {\em nest\_expr} allows a nest but not an ellipsis, and {\em
828dot\_expr} allows both. The EXPR macro is used to express these variants
829in a concise way.
830
831\begin{grammar}
832 \RULE{\rt{expr}}
833 \CASE{\NT{EXPR}\mth{(}\NT{expr}\mth{)}}
834
835 \RULE{\rt{nest\_expr}}
836 \CASE{\NT{EXPR}\mth{(}\NT{nest\_expr}\mth{)}}
837 \CASE{\NT{NEST}\mth{(}\NT{nest\_expr}, \NT{exp\_whencode}\mth{)}}
838
839 \RULE{\rt{dot\_expr}}
840 \CASE{\NT{EXPR}\mth{(}\NT{dot\_expr}\mth{)}}
841 \CASE{\NT{NEST}\mth{(}\NT{dot\_expr}, \NT{exp\_whencode}\mth{)}}
842 \CASE{...~\opt{\NT{exp\_whencode}}}
843
844 \RULE{\rt{EXPR}\mth{(}\rt{exp}\mth{)}}
845 \CASE{\NT{exp} \NT{assign\_op} \NT{exp}}
846 \CASE{\NT{exp}++}
847 \CASE{\NT{exp}--}
848 \CASE{\NT{unary\_op} \NT{exp}}
849 \CASE{\NT{exp} \NT{bin\_op} \NT{exp}}
850 \CASE{\NT{exp} ?~\NT{dot\_expr} :~\NT{exp}}
851 \CASE{(\NT{type}) \NT{exp}}
852 \CASE{\NT{exp} [\NT{dot\_expr}]}
853 \CASE{\NT{exp} .~\NT{id}}
854 \CASE{\NT{exp} -> \NT{id}}
855 \CASE{\NT{exp}(\opt{\NT{PARAMSEQ}\mth{(}\NT{arg}, \NT{exp\_whencode}\mth{)}})}
856 \CASE{\NT{id}}
857% \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
858% \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
859 \CASE{\mth{\T{metaid}^{\ssf{Exp}}}}
860% \CASE{\mth{\T{metaid}^{\ssf{Err}}}}
861 \CASE{\mth{\T{metaid}^{\ssf{Const}}}}
862 \CASE{\NT{const}}
863 \CASE{(\NT{dot\_expr})}
864 \CASE{\NT{OR}\mth{(}\NT{exp}\mth{)}}
865
866 \RULE{\rt{arg}}
867 \CASE{\NT{nest\_expr}}
868 \CASE{\mth{\T{metaid}^{\ssf{ExpList}}}}
869
870 \RULE{\rt{exp\_whencode}}
871 \CASE{when != \NT{expr}}
872
873 \RULE{\rt{assign\_op}}
874 \CASE{= \OR -= \OR += \OR *= \OR /= \OR \%=}
875 \CASE{\&= \OR |= \OR \caret= \OR \lt\lt= \OR \gt\gt=}
876
877 \RULE{\rt{bin\_op}}
878 \CASE{* \OR / \OR \% \OR + \OR -}
879 \CASE{\lt\lt \OR \gt\gt \OR \caret\xspace \OR \& \OR \ttmid}
880 \CASE{< \OR > \OR <= \OR >= \OR == \OR != \OR \&\& \OR \ttmid\ttmid}
881
882 \RULE{\rt{unary\_op}}
883 \CASE{++ \OR -- \OR \& \OR * \OR + \OR - \OR !}
884
885\end{grammar}
886
887\section{Constant, Identifiers and Types for Transformations}
888
889\begin{grammar}
890 \RULE{\rt{const}}
891 \CASE{\NT{string}}
892 \CASE{[0-9]+}
893 \CASE{\mth{\cdots}}
894
895 \RULE{\rt{string}}
896 \CASE{"\any{[\^{}"]}"}
897
898 \RULE{\rt{id}}
899 \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Id}}}}
900
901 \RULE{\rt{typedef\_ident}}
902 \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Type}}}}
903
904 \RULE{\rt{type}}
905 \CASE{\NT{ctype} \OR \mth{\T{metaid}^{\ssf{Type}}}}
906
907 \RULE{\rt{pathToIsoFile}}
908 \CASE{<.*>}
951c7801
C
909
910 \RULE{\rt{regexp}}
911 \CASE{"\any{[\^{}"]}"}
faf9a90c
C
912\end{grammar}
913
faf9a90c
C
914
915%%% Local Variables:
916%%% mode: LaTeX
708f4980 917%%% TeX-master: "main_grammar"
5636bb2c 918%%% coding: utf-8
faf9a90c
C
919%%% TeX-PDF-mode: t
920%%% ispell-local-dictionary: "american"
921%%% End: