Release coccinelle-0.2.3rc5
[bpt/coccinelle.git] / docs / manual / cocci_syntax.tex
CommitLineData
faf9a90c
C
1
2%\section{The SmPL Grammar}
3
4% This section presents the SmPL grammar. This definition follows closely
5% our implementation using the Menhir parser generator \cite{menhir}.
6
7This document presents the grammar of the SmPL language used by the
7f004419 8\href{http://coccinelle.lip6.fr/}{Coccinelle tool}. For the most
faf9a90c
C
9part, the grammar is written using standard notation. In some rules,
10however, the left-hand side is in all uppercase letters. These are
11macros, which take one or more grammar rule right-hand-sides as
12arguments. The grammar also uses some unspecified nonterminals, such
b1b2de81
C
13as \T{id}, \T{const}, etc. These refer to the sets suggested by
14the name, {\em i.e.}, \T{id} refers to the set of possible
15C-language identifiers, while \T{const} refers to the set of
978fd7e5 16possible C-language constants.
708f4980 17%
978fd7e5 18\ifhevea
708f4980 19A PDF version of this documentation is available at
951c7801 20\url{http://coccinelle.lip6.fr/docs/main_grammar.pdf}.
708f4980 21\else
faf9a90c 22A HTML version of this documentation is available online at
951c7801 23\url{http://coccinelle.lip6.fr/docs/main_grammar.html}.
708f4980 24\fi
faf9a90c 25
faf9a90c
C
26\section{Program}
27
28\begin{grammar}
29 \RULE{\rt{program}}
30 \CASE{\any{\NT{include\_cocci}} \some{\NT{changeset}}}
31
32 \RULE{\rt{include\_cocci}}
33 \CASE{using \NT{string}}
34 \CASE{using \NT{pathToIsoFile}}
5636bb2c 35 \CASE{virtual \T{id} \ANY{, \T{id}}}
faf9a90c
C
36
37 \RULE{\rt{changeset}}
38 \CASE{\NT{metavariables} \NT{transformation}}
b1b2de81 39 \CASE{\NT{script\_metavariables} \T{script\_code}}
faf9a90c 40% \CASE{\NT{metavariables} \ANY{--- filename +++ filename} \NT{transformation}}
faf9a90c
C
41\end{grammar}
42
b1b2de81
C
43\noindent
44\T{script\_code} is any code in the chosen scripting language. Parsing of
45the semantic patch does not check the validity of this code; any errors are
978fd7e5
C
46first detected when the code is executed. Furthermore, \texttt{@} should
47not be use in this code. Spatch scans the script code for the next
48\texttt{@} and considers that to be the beginning of the next rule, even if
49\texttt{@} occurs within e.g., a string or a comment.
b1b2de81 50
5636bb2c
C
51\texttt{virtual} keyword is used to declare virtual rules. Virtual
52rules may be subsequently used as a dependency for the rules in the
53SmPL file. Whether a virtual rule is defined or not is controlled by
54the \texttt{-D} option on the command line.
55
faf9a90c
C
56% Between the metavariables and the transformation rule, there can be a
57% specification of constraints on the names of the old and new files,
58% analogous to the filename specifications in the standard patch syntax.
59% (see Figure \ref{scsiglue_patch}).
60
b1b2de81 61\section{Metavariables for transformations}
faf9a90c
C
62
63The \NT{rulename} portion of the metavariable declaration can specify
64properties of a rule such as its name, the names of the rules that it
65depends on, the isomorphisms to be used in processing the rule, and whether
66quantification over paths should be universal or existential. The optional
67annotation {\tt expression} indicates that the pattern is to be considered
68as matching an expression, and thus can be used to avoid some parsing
69problems.
70
71The \NT{metadecl} portion of the metavariable declaration defines various
72types of metavariables that will be used for matching in the transformation
73section.
74
75\begin{grammar}
76 \RULE{\rt{metavariables}}
77 \CASE{@@ \any{\NT{metadecl}} @@}
78 \CASE{@ \NT{rulename} @ \any{\NT{metadecl}} @@}
79
80 \RULE{\rt{rulename}}
81 \CASE{\T{id} \OPT{extends \T{id}} \OPT{depends on \NT{dep}} \opt{\NT{iso}}
82 \opt{\NT{disable-iso}} \opt{\NT{exists}} \opt{expression}}
b1b2de81 83
faf9a90c
C
84 \RULE{\rt{dep}}
85 \CASE{\NT{pnrule}}
86 \CASE{\NT{dep} \&\& \NT{dep}}
87 \CASE{\NT{dep} || \NT{dep}}
88
89 \RULE{\rt{pnrule}}
90 \CASE{\T{id}}
91 \CASE{!\T{id}}
92 \CASE{ever \T{id}}
93 \CASE{never \T{id}}
94 \CASE{(\NT{dep})}
95
96 \RULE{\rt{iso}}
97 \CASE{using \NT{string} \ANY{, \NT{string}}}
98
99 \RULE{\rt{disable-iso}}
100 \CASE{disable \NT{COMMA\_LIST}\mth{(}\T{id}\mth{)}}
101
102 \RULE{\rt{exists}}
103 \CASE{exists}
104 \CASE{forall}
105% \CASE{\opt{reverse} forall}
106
107 \RULE{\rt{COMMA\_LIST}\mth{(}\rt{elem}\mth{)}}
108 \CASE{\NT{elem} \ANY{, \NT{elem}}}
109\end{grammar}
110
b1b2de81 111The keyword \KW{disable} is normally used with the names of
faf9a90c
C
112isomorphisms defined in standard.iso or whatever isomorphism file has been
113included. There are, however, some other isomorphisms that are built into
114the implementation of Coccinelle and that can be disabled as well. Their
115names are given below. In each case, the text descibes the standard
116behavior. Using \NT{disable-iso} with the given name disables this behavior.
117
118\begin{itemize}
119\item \KW{optional\_storage}: A SmPL function definition that does not
120 specify any visibility (i.e., static or extern), or a SmPL variable
121 declaration that does not specify any storage (i.e., auto, static,
122 register, or extern), matches a function declaration or variable
123 declaration with any visibility or storage, respectively.
124\item \KW{optional\_qualifier}: This is similar to \KW{optional\_storage},
125 except that here is it the qualifier (i.e., const or volatile) that does
126 not have to be specified in the SmPL code, but may be present in the C code.
127\item \KW{value\_format}: Integers in various formats, e.g., 1 and 0x1, are
128 considered to be equivalent in the matching process.
129\item \KW{comm\_assoc}: An expression of the form \NT{exp} \NT{bin\_op}
130 \KW{...}, where \NT{bin\_op} is commutative and associative, is
131 considered to match any top-level sequence of \NT{bin\_op} operators
132 containing \NT{exp} as the top-level argument.
133\end{itemize}
134
135The possible types of metavariable declarations are defined by the grammar
136rule below. Metavariables should occur at least once in the transformation
137immediately following their declaration. Fresh metavariables must only be
138used in {\tt +} code. These properties are not expressed in the grammar,
139but are checked by a subsequent analysis. The metavariables are designated
140according to the kind of terms they can match, such as a statement, an
141identifier, or an expression. An expression metavariable can be further
142constrained by its type.
143
144\begin{grammar}
145 \RULE{\rt{metadecl}}
146 \CASE{fresh identifier \NT{ids} ;}
951c7801 147 \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
ae4735db 148 \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_virt\_or\_not\_eq}\mth{)} ;}
faf9a90c
C
149 \CASE{parameter \opt{list} \NT{ids} ;}
150 \CASE{parameter list [ \NT{id} ] \NT{ids} ;}
88e71198 151 \CASE{parameter list [ \NT{const} ] \NT{ids} ;}
faf9a90c
C
152 \CASE{type \NT{ids} ;}
153 \CASE{statement \opt{list} \NT{ids} ;}
154 \CASE{typedef \NT{ids} ;}
155 \CASE{declarer name \NT{ids} ;}
156% \CASE{\opt{local} function \NT{pmid\_with\_not\_eq\_list} ;}
951c7801 157 \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
faf9a90c
C
158 \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
159 \CASE{iterator name \NT{ids} ;}
951c7801 160 \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
faf9a90c
C
161 \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
162% \CASE{error \NT{pmid\_with\_not\_eq\_list} ; }
163 \CASE{\opt{local} idexpression \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
164 \CASE{\opt{local} idexpression \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
165 \CASE{\opt{local} idexpression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
166 \CASE{expression list \NT{ids} ;}
167 \CASE{expression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
168 \CASE{expression \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
88e71198
C
169 \CASE{expression list [ \NT{id} ] \NT{ids} ;}
170 \CASE{expression list [ \NT{const} ] \NT{ids} ;}
faf9a90c
C
171 \CASE{\NT{ctype} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
172 \CASE{\NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
173 \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
174 \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
175 \CASE{constant \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
176 \CASE{constant \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
177 \CASE{position \opt{any} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq\_mid}\mth{)} ;}
178\end{grammar}
179
88e71198
C
180It is possible to specify that an expression list or a parameter list
181metavariable should match a specific number of expressions or parameters.
182
faf9a90c
C
183\begin{grammar}
184 \RULE{\rt{ids}}
185 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{pmid}\mth{)}}
186
187 \RULE{\rt{pmid}}
188 \CASE{\T{id}}
189 \CASE{\NT{mid}}
190% \CASE{list}
191% \CASE{error}
192% \CASE{type}
193
194 \RULE{\rt{mid}} \CASE{\T{rulename\_id}.\T{id}}
195
951c7801
C
196 \RULE{\rt{pmid\_with\_regexp}}
197 \CASE{\NT{pmid} \~{}= \NT{regexp}}
198
faf9a90c 199 \RULE{\rt{pmid\_with\_not\_eq}}
5636bb2c
C
200 \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_meta}}}
201 \CASE{\NT{pmid}
202 \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_meta}\mth{)} \ttrb}}
faf9a90c 203
55d38388 204 \RULE{\rt{pmid\_with\_virt\_or\_not\_eq}}
ae4735db 205 \CASE{virtual.\T{id}}
55d38388
C
206 \CASE{\NT{pmid\_with\_not\_eq}}
207
208 \RULE{\rt{pmid\_with\_not\_ceq}}
faf9a90c
C
209 \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_cst}}}
210 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_cst}\mth{)} \ttrb}}
211
212 \RULE{\rt{id\_or\_cst}}
213 \CASE{\T{id}}
214 \CASE{\T{integer}}
215
5636bb2c
C
216 \RULE{\rt{id\_or\_meta}}
217 \CASE{\T{id}}
218 \CASE{\T{rulename\_id}.\T{id}}
219
faf9a90c
C
220 \RULE{\rt{pmid\_with\_not\_eq\_mid}}
221 \CASE{\NT{pmid} \OPT{!= \NT{mid}}}
222 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{mid}\mth{)} \ttrb}}
223\end{grammar}
224
225Subsequently, we refer to arbitrary metavariables as
226\mth{\msf{metaid}^{\mbox{\scriptsize{\it{ty}}}}}, where {\it{ty}}
227indicates the {\it metakind} used in the declaration of the variable.
228For example, \mth{\msf{metaid}^{\ssf{Type}}} refers to a metavariable
229that was declared using \texttt{type} and stands for any type.
230
231The \NT{ctype} and \NT{ctypes} nonterminals are used by both the grammar of
232metavariable declarations and the grammar of transformations, and are
233defined on page~\pageref{types}.
234
ae4735db
C
235An identifier metavariable with {\tt virtual} as its ``rule name'' is given
236a value on the command line. For example, if a semantic patch contains a
237rule that declares an identifier metavariable with the name {\tt
238 virtual.alloc}, then the command line could contain {\tt -D
239 alloc=kmalloc}. There should not be space around the {\tt =}. An
240example is in {\tt demos/vm.cocci} and {\tt demos/vm.c}.
241
b1b2de81
C
242\section{Metavariables for scripts}
243
244Metavariables for scripts can only be inherited from transformation rules.
245In the spirit of scripting languages such as Python that use dynamic
246typing, metavariables for scripts do not include type declarations.
247
248\begin{grammar}
249 \RULE{\rt{script\_metavariables}}
250 \CASE{@ script:\NT{language} \OPT{depends on \NT{dep}} @
251 \any{\NT{script\_metadecl}} @@}
5636bb2c
C
252 \CASE{@ initialize:\NT{language} \OPT{depends on \NT{dep}} @}
253 \CASE{@ finalize:\NT{language} \OPT{depends on \NT{dep}} @}
b1b2de81
C
254
255 \RULE{\rt{language}} \CASE{python}
256
257 \RULE{\rt{script\_metadecl}} \CASE{\T{id} <{}< \T{rulename\_id}.\T{id} ;}
258\end{grammar}
259
174d1640
C
260Currently, the only scripting languages that are supported are Python and
261OCaml, indicated using {\tt python} and {\tt ocaml}, respectively. The
b1b2de81
C
262set of available scripting languages may be extended at some point.
263
264Script rules declared with \KW{initialize} are run before the treatment of
265any file. Script rules declared with \KW{finalize} are run when the
266treatment of all of the files has completed. There can be at most one of
267each per scripting language (thus currently at most one of each).
268Initialize and finalize script rules do not have access to SmPL
269metavariables. Nevertheless, a finalize script rule can access any
270variables initialized by the other script rules, allowing information to be
271transmitted from the matching process to the finalize rule.
272
faf9a90c
C
273\section{Transformation}
274
275The transformation specification essentially has the form of C code,
276except that lines to remove are annotated with \verb+-+ in the first
277column, and lines to add are annotated with \verb-+-. A
278transformation specification can also use {\em dots}, ``\verb-...-'',
279describing an arbitrary sequence of function arguments or instructions
280within a control-flow path. Dots may be modified with a {\tt when}
281clause, indicating a pattern that should not occur anywhere within the
282matched sequence. Finally, a transformation can specify a disjunction
283of patterns, of the form \mtt{( \mth{\mita{pat}_1} | \mita{\ldots} |
284 \mth{\mita{pat}_n} )} where each \texttt{(}, \texttt{|} or
285\texttt{)} is in column 0 or preceded by \texttt{\textbackslash}.
286
287The grammar that we present for the transformation is not actually the
288grammar of the SmPL code that can be written by the programmer, but is
289instead the grammar of the slice of this consisting of the {\tt -}
290annotated and the unannotated code (the context of the transformed lines),
291or the {\tt +} annotated code and the unannotated code. For example, for
292parsing purposes, the following transformation
293%presented in Section \ref{sec:seq2}
294is split into the two variants shown below and each is parsed
295separately.
296
297\begin{center}
298\begin{tabular}{c}
299\begin{lstlisting}[language=Cocci]
300 proc_info_func(...) {
301 <...
302@-- hostno
303@++ hostptr->host_no
304 ...>
305 }
306\end{lstlisting}\\
307\end{tabular}
308\end{center}
309
310{%\sizecodebis
311\begin{center}
312\begin{tabular}{p{5cm}p{3cm}p{5cm}}
313\begin{lstlisting}[language=Cocci]
314 proc_info_func(...) {
315 <...
316@-- hostno
317 ...>
318 }
319\end{lstlisting}
320&&
321\begin{lstlisting}[language=Cocci]
322 proc_info_func(...) {
323 <...
324@++ hostptr->host_no
325 ...>
326 }
327\end{lstlisting}
328\end{tabular}
329\end{center}
330}
331
332\noindent
333Requiring that both slices parse correctly ensures that the rule matches
334syntactically valid C code and that it produces syntactically valid C code.
335The generated parse trees are then merged for use in the subsequent
336matching and transformation process.
337
338The grammar for the minus or plus slice of a transformation is as follows:
339
340\begin{grammar}
341
342 \RULE{\rt{transformation}}
343 \CASE{\some{\NT{include}}}
344 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
345 \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
346 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{fundecl}, \NT{when}\mth{)}}
347
348 \RULE{\rt{include}}
349 \CASE{\#include \T{include\_string}}
350
351% \RULE{\rt{fun\_decl\_stmt}}
352% \CASE{\NT{decl\_stmt}}
353% \CASE{\NT{fundecl}}
354
355% \CASE{\NT{ctype}}
356% \CASE{\ttlb \NT{initialize\_list} \ttrb}
357% \CASE{\NT{toplevel\_seq\_start\_after\_dots\_init}}
358%
359% \RULE{\rt{toplevel\_seq\_start\_after\_dots\_init}}
360% \CASE{\NT{stmt\_dots} \NT{toplevel\_after\_dots}}
361% \CASE{\NT{expr} \opt{\NT{toplevel\_after\_exp}}}
362% \CASE{\NT{decl\_stmt\_expr} \opt{\NT{toplevel\_after\_stmt}}}
363%
364% \RULE{\rt{stmt\_dots}}
365% \CASE{... \any{\NT{when}}}
366% \CASE{<... \any{\NT{when}} \NT{nest\_after\_dots} ...>}
367% \CASE{<+... \any{\NT{when}} \NT{nest\_after\_dots} ...+>}
368
369 \RULE{\rt{when}}
370 \CASE{when != \NT{when\_code}}
371 \CASE{when = \NT{rule\_elem\_stmt}}
372 \CASE{when \NT{COMMA\_LIST}\mth{(}\NT{any\_strict}\mth{)}}
373 \CASE{when true != \NT{expr}}
374 \CASE{when false != \NT{expr}}
375
376 \RULE{\rt{when\_code}}
377 \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
378 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
379
380 \RULE{\rt{rule\_elem\_stmt}}
381 \CASE{\NT{one\_decl}}
382 \CASE{\NT{expr};}
383 \CASE{return \opt{\NT{expr}};}
384 \CASE{break;}
385 \CASE{continue;}
386 \CASE{\bs(\NT{rule\_elem\_stmt} \SOME{\bs| \NT{rule\_elem\_stmt}}\bs)}
387
388 \RULE{\rt{any\_strict}}
389 \CASE{any}
390 \CASE{strict}
391 \CASE{forall}
392 \CASE{exists}
393
394% \RULE{\rt{nest\_after\_dots}}
395% \CASE{\NT{decl\_stmt\_exp} \opt{\NT{nest\_after\_stmt}}}
396% \CASE{\opt{\NT{exp}} \opt{\NT{nest\_after\_exp}}}
397%
398% \RULE{\rt{nest\_after\_stmt}}
399% \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
400% \CASE{\NT{decl\_stmt} \opt{\NT{nest\_after\_stmt}}}
401%
402% \RULE{\rt{nest\_after\_exp}}
403% \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
404%
405% \RULE{\rt{toplevel\_after\_dots}}
406% \CASE{\opt{\NT{toplevel\_after\_exp}}}
407% \CASE{\NT{exp} \opt{\NT{toplevel\_after\_exp}}}
408% \CASE{\NT{decl\_stmt\_expr} \NT{toplevel\_after\_stmt}}
409%
410% \RULE{\rt{toplevel\_after\_exp}}
411% \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
412%
413% \RULE{\rt{decl\_stmt\_expr}}
414% \CASE{TMetaStmList$^\ddag$}
415% \CASE{\NT{decl\_var}}
416% \CASE{\NT{stmt}}
417% \CASE{(\NT{stmt\_seq} \ANY{| \NT{stmt\_seq}})}
418%
419% \RULE{\rt{toplevel\_after\_stmt}}
420% \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
421% \CASE{\NT{decl\_stmt} \NT{toplevel\_after\_stmt}}
422
423\end{grammar}
424
425\begin{grammar}
426 \RULE{\rt{OPTDOTSEQ}\mth{(}\rt{grammar\_ds}, \rt{when\_ds}\mth{)}}
427 \CASE{}\multicolumn{3}{r}{\hspace{1cm}
428 \KW{\opt{... \opt{\NT{when\_ds}}} \NT{grammar\_ds}
429 \ANY{... \opt{\NT{when\_ds}} \NT{grammar\_ds}}
430 \opt{... \opt{\NT{when\_ds}}}}
431 }
432
433% \CASE{\opt{... \opt{\NT{when\_ds}}} \NT{grammar}
434% \ANY{... \opt{\NT{when\_ds}} \NT{grammar}}
435% \opt{... \opt{\NT{when\_ds}}}}
436% \CASE{<... \any{\NT{when\_ds}} \NT{grammar} ...>}
437% \CASE{<+... \any{\NT{when\_ds}} \NT{grammar} ...+>}
438
439\end{grammar}
440
441\noindent
442Lines may be annotated with an element of the set $\{\mtt{-}, \mtt{+},
443\mtt{*}\}$ or the singleton $\mtt{?}$, or one of each set. \mtt{?}
444represents at most one match of the given pattern. \mtt{*} is used for
445semantic match, \emph{i.e.}, a pattern that highlights the fragments
446annotated with \mtt{*}, but does not perform any modification of the
447matched code. \mtt{*} cannot be mixed with \mtt{-} and \mtt{+}. There are
448some constraints on the use of these annotations:
449\begin{itemize}
450\item Dots, {\em i.e.} \texttt{...}, cannot occur on a line marked
451 \texttt{+}.
452\item Nested dots, {\em i.e.}, dots enclosed in {\tt <} and {\tt >}, cannot
453 occur on a line with any marking.
454\end{itemize}
455
0708f913
C
456Each element of a disjunction must be a proper term like an
457expression, a statement, an identifier or a declaration. Thus, the
458rule on the left below is not a syntaxically correct SmPL rule. One may
459use the rule on the right instead.
460
461\begin{center}
462 \begin{tabular}{l@{\hspace{5cm}}r}
463\begin{lstlisting}[language=Cocci]
464@@
465type T;
466T b;
467@@
468
469(
470 writeb(...,
471|
472 readb(
473)
474@--(T)
475 b)
476\end{lstlisting}
477 &
478\begin{lstlisting}[language=Cocci]
479@@
480type T;
481T b;
482@@
483
484(
485read
486|
487write
488)
489 (...,
490@-- (T)
491 b)
492\end{lstlisting}
493 \\
494 \end{tabular}
495\end{center}
496
faf9a90c
C
497\section{Types}
498\label{types}
499
500\begin{grammar}
501
502 \RULE{\rt{ctypes}}
503 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{ctype}\mth{)}}
504
505 \RULE{\rt{ctype}}
506 \CASE{\opt{\NT{const\_vol}} \NT{generic\_ctype} \any{*}}
507 \CASE{\opt{\NT{const\_vol}} void \some{*}}
508 \CASE{(\NT{ctype} \ANY{| \NT{ctype}})}
509
510 \RULE{\rt{const\_vol}}
511 \CASE{const}
512 \CASE{volatile}
513
514 \RULE{\rt{generic\_ctype}}
515 \CASE{\NT{ctype\_qualif}}
516 \CASE{\opt{\NT{ctype\_qualif}} char}
517 \CASE{\opt{\NT{ctype\_qualif}} short}
518 \CASE{\opt{\NT{ctype\_qualif}} int}
519 \CASE{\opt{\NT{ctype\_qualif}} long}
520 \CASE{\opt{\NT{ctype\_qualif}} long long}
521 \CASE{double}
522 \CASE{float}
523 \CASE{\OPT{struct\OR union} \T{id} \OPT{\{ \any{\NT{struct\_decl\_list}} \}}}
524
525 \RULE{\rt{ctype\_qualif}}
526 \CASE{unsigned}
527 \CASE{signed}
528
529 \RULE{\rt{struct\_decl\_list}}
530 \CASE{\NT{struct\_decl\_list\_start}}
531
532 \RULE{\rt{struct\_decl\_list\_start}}
533 \CASE{\NT{struct\_decl}}
534 \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
535 \CASE{... \opt{when != \NT{struct\_decl}}$^\dag$ \opt{\NT{continue\_struct\_decl\_list}}}
536
537 \RULE{\rt{continue\_struct\_decl\_list}}
538 \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
539 \CASE{\NT{struct\_decl}}
540
541 \RULE{\rt{struct\_decl}}
542 \CASE{\NT{ctype} \NT{d\_ident};}
543 \CASE{\NT{fn\_ctype} (* \NT{d\_ident}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)});)}
544 \CASE{\opt{\NT{const\_vol}} \T{id} \NT{d\_ident};}
545
546 \RULE{\rt{d\_ident}}
547 \CASE{\NT{id} \any{[\opt{\NT{expr}}]}}
548
549 \RULE{\rt{fn\_ctype}}
550 \CASE{\NT{generic\_ctype} \any{*}}
551 \CASE{void \any{*}}
552
553 \RULE{\rt{name\_opt\_decl}}
554 \CASE{\NT{decl}}
555 \CASE{\NT{ctype}}
556 \CASE{\NT{fn\_ctype}}
557\end{grammar}
558
559$^\dag$ The optional \texttt{when} construct ends at the end of the line.
560
561\section{Function declarations}
562
563\begin{grammar}
564
565 \RULE{\rt{fundecl}}
566 \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
567 (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}})
568 \ttlb~\opt{\NT{stmt\_seq}} \ttrb}
569
570 \RULE{\rt{funproto}}
571 \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
572 (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}});}
573
574 \RULE{\rt{funinfo}}
575 \CASE{inline}
576 \CASE{\NT{storage}}
577% \CASE{\NT{attr}}
578
579 \RULE{\rt{storage}}
580 \CASE{static}
581 \CASE{auto}
582 \CASE{register}
583 \CASE{extern}
584
585 \RULE{\rt{funid}}
586 \CASE{\T{id}}
587 \CASE{\mth{\T{metaid}^{\ssf{Id}}}}
588% \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
589% \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
590
591 \RULE{\rt{param}}
592 \CASE{\NT{type} \T{id}}
593 \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
594 \CASE{\mth{\T{metaid}^{\ssf{ParamList}}}}
595
596 \RULE{\rt{decl}}
597 \CASE{\NT{ctype} \NT{id}}
598 \CASE{\NT{fn\_ctype} (* \NT{id}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)})}
599 \CASE{void}
600 \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
601\end{grammar}
602
603\begin{grammar}
604 \RULE{\rt{PARAMSEQ}\mth{(}\rt{gram\_p}, \rt{when\_p}\mth{)}}
605 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{gram\_p} \OR \ldots \opt{\NT{when\_p}}\mth{)}}
606\end{grammar}
607
90aeb998
C
608To match a function it is not necessary to provide all of the annotations
609that appear before the function name. For example, the following semantic
610patch:
611
612\begin{lstlisting}[language=Cocci]
613@@
614@@
615
616foo() { ... }
617\end{lstlisting}
618
619\noindent
620matches a function declared as follows:
621
622\begin{lstlisting}[language=C]
623static int foo() { return 12; }
624\end{lstlisting}
625
626\noindent
627This behavior can be turned off by disabling the \KW{optional\_storage}
628isomorphism. If one adds code before a function declaration, then the
629effect depends on the kind of code that is added. If the added code is a
630function definition or CPP code, then the new code is placed before
631all information associated with the function definition, including any
632comments preceeding the function definition. On the other hand, if the new
633code is associated with the function, such as the addition of the keyword
634{\tt static}, the new code is placed exactly where it appears with respect
635to the rest of the function definition in the semantic patch. For example,
636
637\begin{lstlisting}[language=Cocci]
638@@
639@@
640
641+ static
642foo() { ... }
643\end{lstlisting}
644
645\noindent
646causes static to be placed just before the function name. The following
647causes it to be placed just before the type
648
649\begin{lstlisting}[language=Cocci]
650@@
651type T;
652@@
653
654+ static
655T foo() { ... }
656\end{lstlisting}
657
658\noindent
659It may be nencessary to consider several cases to ensure that the added ode
660is placed in the right position. For example, one may need one pattern
661that considers that the function is declared {\tt inline} and another that
662considers that it is not.
663
faf9a90c
C
664%\newpage
665
666\section{Declarations}
667
668\begin{grammar}
669 \RULE{\rt{decl\_var}}
670% \CASE{\NT{type} \opt{\NT{id} \opt{[\opt{\NT{dot\_expr}}]}
671% \ANY{, \NT{id} \opt{[ \opt{\NT{dot\_expr}}]}}};}
672 \CASE{\NT{common\_decl}}
673 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
674 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
675 \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) = \NT{initialize} ;}
676 \CASE{typedef \NT{ctype} \NT{typedef\_ident} ;}
677
678 \RULE{\rt{one\_decl}}
679 \CASE{\NT{common\_decl}}
680 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{id};}
681% \CASE{\NT{storage} \NT{ctype} \NT{id} \opt{[\opt{\NT{dot\\_expr}}]} = \NT{nest\\_expr};}
682 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} ;}
683
684 \RULE{\rt{common\_decl}}
685 \CASE{\NT{ctype};}
686 \CASE{\NT{funproto}}
687 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{d\_ident} = \NT{initialize} ;}
688 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} = \NT{initialize} ;}
689 \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) ;}
690 \CASE{\NT{decl\_ident} ( \OPT{\NT{COMMA\_LIST}\mth{(}\NT{expr}\mth{)}} ) ;}
691
692 \RULE{\rt{initialize}}
693 \CASE{\NT{dot\_expr}}
694 \CASE{\ttlb~\opt{\NT{COMMA\_LIST}\mth{(}\NT{dot\_expr}\mth{)}}~\ttrb}
695
696 \RULE{\rt{decl\_ident}}
697 \CASE{\T{DeclarerId}}
698 \CASE{\mth{\T{metaid}^{\ssf{Declarer}}}}
699\end{grammar}
700
701\section{Statements}
702
703The first rule {\em statement} describes the various forms of a statement.
704The remaining rules implement the constraints that are sensitive to the
705context in which the statement occurs: {\em single\_statement} for a
706context in which only one statement is allowed, and {\em decl\_statement}
707for a context in which a declaration, statement, or sequence thereof is
708allowed.
709
710\begin{grammar}
711 \RULE{\rt{stmt}}
712 \CASE{\NT{include}}
713 \CASE{\mth{\T{metaid}^{\ssf{Stmt}}}}
714 \CASE{\NT{expr};}
715 \CASE{if (\NT{dot\_expr}) \NT{single\_stmt} \opt{else \NT{single\_stmt}}}
716 \CASE{for (\opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}})
717 \NT{single\_stmt}}
718 \CASE{while (\NT{dot\_expr}) \NT{single\_stmt}}
719 \CASE{do \NT{single\_stmt} while (\NT{dot\_expr});}
720 \CASE{\NT{iter\_ident} (\any{\NT{dot\_expr}}) \NT{single\_stmt}}
721 \CASE{switch (\opt{\NT{dot\_expr}}) \ttlb \any{\NT{case\_line}} \ttrb}
722 \CASE{return \opt{\NT{dot\_expr}};}
723 \CASE{\ttlb~\opt{\NT{stmt\_seq}} \ttrb}
724 \CASE{\NT{NEST}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
725 \CASE{\NT{NEST}\mth{(}\NT{expr}, \NT{when}\mth{)}}
726 \CASE{break;}
727 \CASE{continue;}
728 \CASE{\NT{id}:}
729 \CASE{goto \NT{id};}
730 \CASE{\ttlb \NT{stmt\_seq} \ttrb}
731
732 \RULE{\rt{single\_stmt}}
733 \CASE{\NT{stmt}}
734 \CASE{\NT{OR}\mth{(}\NT{stmt}\mth{)}}
735
736 \RULE{\rt{decl\_stmt}}
737 \CASE{\mth{\T{metaid}^{\ssf{StmtList}}}}
738 \CASE{\NT{decl\_var}}
739 \CASE{\NT{stmt}}
740 \CASE{\NT{OR}\mth{(}\NT{stmt\_seq}\mth{)}}
741
742 \RULE{\rt{stmt\_seq}}
743 \CASE{\any{\NT{decl\_stmt}}
744 \opt{\NT{DOTSEQ}\mth{(}\some{\NT{decl\_stmt}},
745 \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
746 \CASE{\any{\NT{decl\_stmt}}
747 \opt{\NT{DOTSEQ}\mth{(}\NT{expr},
748 \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
749
750 \RULE{\rt{case\_line}}
751 \CASE{default :~\NT{stmt\_seq}}
752 \CASE{case \NT{dot\_expr} :~\NT{stmt\_seq}}
753
754 \RULE{\rt{iter\_ident}}
755 \CASE{\T{IteratorId}}
756 \CASE{\mth{\T{metaid}^{\ssf{Iterator}}}}
757\end{grammar}
758
759\begin{grammar}
760 \RULE{\rt{OR}\mth{(}\rt{gram\_o}\mth{)}}
761 \CASE{( \NT{gram\_o} \ANY{\ttmid \NT{gram\_o}})}
762
763 \RULE{\rt{DOTSEQ}\mth{(}\rt{gram\_d}, \rt{when\_d}\mth{)}}
764 \CASE{\ldots \opt{\NT{when\_d}} \ANY{\NT{gram\_d} \ldots \opt{\NT{when\_d}}}}
765
766 \RULE{\rt{NEST}\mth{(}\rt{gram\_n}, \rt{when\_n}\mth{)}}
767 \CASE{<\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots>}
768 \CASE{<+\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots+>}
769\end{grammar}
770
771\noindent
772OR is a macro that generates a disjunction of patterns. The three
773tokens \T{(}, \T{\ttmid}, and \T{)} must appear in the leftmost
774column, to differentiate them from the parentheses and bit-or tokens
775that can appear within expressions (and cannot appear in the leftmost
776column). These token may also be preceded by \texttt{\bs}
777when they are used in an other column. These tokens are furthermore
778different from (, \(\mid\), and ), which are part of the grammar
779metalanguage.
780
781\section{Expressions}
782
783A nest or a single ellipsis is allowed in some expression contexts, and
784causes ambiguity in others. For example, in a sequence \mtt{\ldots
785\mita{expr} \ldots}, the nonterminal \mita{expr} must be instantiated as an
786explicit C-language expression, while in an array reference,
787\mtt{\mth{\mita{expr}_1} \mtt{[} \mth{\mita{expr}_2} \mtt{]}}, the
788nonterminal \mth{\mita{expr}_2}, because it is delimited by brackets, can
789be also instantiated as \mtt{\ldots}, representing an arbitrary expression. To
790distinguish between the various possibilities, we define three nonterminals
791for expressions: {\em expr} does not allow either top-level nests or
792ellipses, {\em nest\_expr} allows a nest but not an ellipsis, and {\em
793dot\_expr} allows both. The EXPR macro is used to express these variants
794in a concise way.
795
796\begin{grammar}
797 \RULE{\rt{expr}}
798 \CASE{\NT{EXPR}\mth{(}\NT{expr}\mth{)}}
799
800 \RULE{\rt{nest\_expr}}
801 \CASE{\NT{EXPR}\mth{(}\NT{nest\_expr}\mth{)}}
802 \CASE{\NT{NEST}\mth{(}\NT{nest\_expr}, \NT{exp\_whencode}\mth{)}}
803
804 \RULE{\rt{dot\_expr}}
805 \CASE{\NT{EXPR}\mth{(}\NT{dot\_expr}\mth{)}}
806 \CASE{\NT{NEST}\mth{(}\NT{dot\_expr}, \NT{exp\_whencode}\mth{)}}
807 \CASE{...~\opt{\NT{exp\_whencode}}}
808
809 \RULE{\rt{EXPR}\mth{(}\rt{exp}\mth{)}}
810 \CASE{\NT{exp} \NT{assign\_op} \NT{exp}}
811 \CASE{\NT{exp}++}
812 \CASE{\NT{exp}--}
813 \CASE{\NT{unary\_op} \NT{exp}}
814 \CASE{\NT{exp} \NT{bin\_op} \NT{exp}}
815 \CASE{\NT{exp} ?~\NT{dot\_expr} :~\NT{exp}}
816 \CASE{(\NT{type}) \NT{exp}}
817 \CASE{\NT{exp} [\NT{dot\_expr}]}
818 \CASE{\NT{exp} .~\NT{id}}
819 \CASE{\NT{exp} -> \NT{id}}
820 \CASE{\NT{exp}(\opt{\NT{PARAMSEQ}\mth{(}\NT{arg}, \NT{exp\_whencode}\mth{)}})}
821 \CASE{\NT{id}}
822% \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
823% \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
824 \CASE{\mth{\T{metaid}^{\ssf{Exp}}}}
825% \CASE{\mth{\T{metaid}^{\ssf{Err}}}}
826 \CASE{\mth{\T{metaid}^{\ssf{Const}}}}
827 \CASE{\NT{const}}
828 \CASE{(\NT{dot\_expr})}
829 \CASE{\NT{OR}\mth{(}\NT{exp}\mth{)}}
830
831 \RULE{\rt{arg}}
832 \CASE{\NT{nest\_expr}}
833 \CASE{\mth{\T{metaid}^{\ssf{ExpList}}}}
834
835 \RULE{\rt{exp\_whencode}}
836 \CASE{when != \NT{expr}}
837
838 \RULE{\rt{assign\_op}}
839 \CASE{= \OR -= \OR += \OR *= \OR /= \OR \%=}
840 \CASE{\&= \OR |= \OR \caret= \OR \lt\lt= \OR \gt\gt=}
841
842 \RULE{\rt{bin\_op}}
843 \CASE{* \OR / \OR \% \OR + \OR -}
844 \CASE{\lt\lt \OR \gt\gt \OR \caret\xspace \OR \& \OR \ttmid}
845 \CASE{< \OR > \OR <= \OR >= \OR == \OR != \OR \&\& \OR \ttmid\ttmid}
846
847 \RULE{\rt{unary\_op}}
848 \CASE{++ \OR -- \OR \& \OR * \OR + \OR - \OR !}
849
850\end{grammar}
851
852\section{Constant, Identifiers and Types for Transformations}
853
854\begin{grammar}
855 \RULE{\rt{const}}
856 \CASE{\NT{string}}
857 \CASE{[0-9]+}
858 \CASE{\mth{\cdots}}
859
860 \RULE{\rt{string}}
861 \CASE{"\any{[\^{}"]}"}
862
863 \RULE{\rt{id}}
864 \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Id}}}}
865
866 \RULE{\rt{typedef\_ident}}
867 \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Type}}}}
868
869 \RULE{\rt{type}}
870 \CASE{\NT{ctype} \OR \mth{\T{metaid}^{\ssf{Type}}}}
871
872 \RULE{\rt{pathToIsoFile}}
873 \CASE{<.*>}
951c7801
C
874
875 \RULE{\rt{regexp}}
876 \CASE{"\any{[\^{}"]}"}
faf9a90c
C
877\end{grammar}
878
faf9a90c
C
879
880%%% Local Variables:
881%%% mode: LaTeX
708f4980 882%%% TeX-master: "main_grammar"
5636bb2c 883%%% coding: utf-8
faf9a90c
C
884%%% TeX-PDF-mode: t
885%%% ispell-local-dictionary: "american"
886%%% End: