98dab47f87a5b0ea3e7af229ee810cda390769d0
[bpt/coccinelle.git] / docs / manual / cocci_syntax.tex
1
2 %\section{The SmPL Grammar}
3
4 % This section presents the SmPL grammar. This definition follows closely
5 % our implementation using the Menhir parser generator \cite{menhir}.
6
7 This document presents the grammar of the SmPL language used by the
8 \href{http://coccinelle.lip6.fr/}{Coccinelle tool}. For the most
9 part, the grammar is written using standard notation. In some rules,
10 however, the left-hand side is in all uppercase letters. These are
11 macros, which take one or more grammar rule right-hand-sides as
12 arguments. The grammar also uses some unspecified nonterminals, such
13 as \T{id}, \T{const}, etc. These refer to the sets suggested by
14 the name, {\em i.e.}, \T{id} refers to the set of possible
15 C-language identifiers, while \T{const} refers to the set of
16 possible C-language constants.
17 %
18 \ifhevea
19 A PDF version of this documentation is available at
20 \url{http://coccinelle.lip6.fr/docs/main_grammar.pdf}.
21 \else
22 A HTML version of this documentation is available online at
23 \url{http://coccinelle.lip6.fr/docs/main_grammar.html}.
24 \fi
25
26 \section{Program}
27
28 \begin{grammar}
29 \RULE{\rt{program}}
30 \CASE{\any{\NT{include\_cocci}} \some{\NT{changeset}}}
31
32 \RULE{\rt{include\_cocci}}
33 \CASE{using \NT{string}}
34 \CASE{using \NT{pathToIsoFile}}
35 \CASE{virtual \T{id} \ANY{, \T{id}}}
36
37 \RULE{\rt{changeset}}
38 \CASE{\NT{metavariables} \NT{transformation}}
39 \CASE{\NT{script\_metavariables} \T{script\_code}}
40 % \CASE{\NT{metavariables} \ANY{--- filename +++ filename} \NT{transformation}}
41 \end{grammar}
42
43 \noindent
44 \T{script\_code} is any code in the chosen scripting language. Parsing of
45 the semantic patch does not check the validity of this code; any errors are
46 first detected when the code is executed. Furthermore, \texttt{@} should
47 not be use in this code. Spatch scans the script code for the next
48 \texttt{@} and considers that to be the beginning of the next rule, even if
49 \texttt{@} occurs within e.g., a string or a comment.
50
51 \texttt{virtual} keyword is used to declare virtual rules. Virtual
52 rules may be subsequently used as a dependency for the rules in the
53 SmPL file. Whether a virtual rule is defined or not is controlled by
54 the \texttt{-D} option on the command line.
55
56 % Between the metavariables and the transformation rule, there can be a
57 % specification of constraints on the names of the old and new files,
58 % analogous to the filename specifications in the standard patch syntax.
59 % (see Figure \ref{scsiglue_patch}).
60
61 \section{Metavariables for transformations}
62
63 The \NT{rulename} portion of the metavariable declaration can specify
64 properties of a rule such as its name, the names of the rules that it
65 depends on, the isomorphisms to be used in processing the rule, and whether
66 quantification over paths should be universal or existential. The optional
67 annotation {\tt expression} indicates that the pattern is to be considered
68 as matching an expression, and thus can be used to avoid some parsing
69 problems.
70
71 The \NT{metadecl} portion of the metavariable declaration defines various
72 types of metavariables that will be used for matching in the transformation
73 section.
74
75 \begin{grammar}
76 \RULE{\rt{metavariables}}
77 \CASE{@@ \any{\NT{metadecl}} @@}
78 \CASE{@ \NT{rulename} @ \any{\NT{metadecl}} @@}
79
80 \RULE{\rt{rulename}}
81 \CASE{\T{id} \OPT{extends \T{id}} \OPT{depends on \NT{dep}} \opt{\NT{iso}}
82 \opt{\NT{disable-iso}} \opt{\NT{exists}} \opt{expression}}
83
84 \RULE{\rt{dep}}
85 \CASE{\NT{pnrule}}
86 \CASE{\NT{dep} \&\& \NT{dep}}
87 \CASE{\NT{dep} || \NT{dep}}
88
89 \RULE{\rt{pnrule}}
90 \CASE{\T{id}}
91 \CASE{!\T{id}}
92 \CASE{ever \T{id}}
93 \CASE{never \T{id}}
94 \CASE{(\NT{dep})}
95
96 \RULE{\rt{iso}}
97 \CASE{using \NT{string} \ANY{, \NT{string}}}
98
99 \RULE{\rt{disable-iso}}
100 \CASE{disable \NT{COMMA\_LIST}\mth{(}\T{id}\mth{)}}
101
102 \RULE{\rt{exists}}
103 \CASE{exists}
104 \CASE{forall}
105 % \CASE{\opt{reverse} forall}
106
107 \RULE{\rt{COMMA\_LIST}\mth{(}\rt{elem}\mth{)}}
108 \CASE{\NT{elem} \ANY{, \NT{elem}}}
109 \end{grammar}
110
111 The keyword \KW{disable} is normally used with the names of
112 isomorphisms defined in standard.iso or whatever isomorphism file has been
113 included. There are, however, some other isomorphisms that are built into
114 the implementation of Coccinelle and that can be disabled as well. Their
115 names are given below. In each case, the text describes the standard
116 behavior. Using \NT{disable-iso} with the given name disables this behavior.
117
118 \begin{itemize}
119 \item \KW{optional\_storage}: A SmPL function definition that does not
120 specify any visibility (i.e., static or extern), or a SmPL variable
121 declaration that does not specify any storage (i.e., auto, static,
122 register, or extern), matches a function declaration or variable
123 declaration with any visibility or storage, respectively.
124 \item \KW{optional\_qualifier}: This is similar to \KW{optional\_storage},
125 except that here is it the qualifier (i.e., const or volatile) that does
126 not have to be specified in the SmPL code, but may be present in the C code.
127 \item \KW{value\_format}: Integers in various formats, e.g., 1 and 0x1, are
128 considered to be equivalent in the matching process.
129 \item \KW{comm\_assoc}: An expression of the form \NT{exp} \NT{bin\_op}
130 \KW{...}, where \NT{bin\_op} is commutative and associative, is
131 considered to match any top-level sequence of \NT{bin\_op} operators
132 containing \NT{exp} as the top-level argument.
133 \end{itemize}
134
135 The possible types of metavariable declarations are defined by the grammar
136 rule below. Metavariables should occur at least once in the transformation
137 immediately following their declaration. Fresh metavariables must only be
138 used in {\tt +} code. These properties are not expressed in the grammar,
139 but are checked by a subsequent analysis. The metavariables are designated
140 according to the kind of terms they can match, such as a statement, an
141 identifier, or an expression. An expression metavariable can be further
142 constrained by its type. A declaration metavariable matches the
143 declaration of one or more variables, all sharing the same type
144 specification ({\em e.g.}, {\tt int a,b,c=3;}). A field metavariable does
145 the same, but for structure fields.
146
147 \begin{grammar}
148 \RULE{\rt{metadecl}}
149 \CASE{fresh identifier \NT{ids} ;}
150 \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
151 \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_virt\_or\_not\_eq}\mth{)} ;}
152 \CASE{parameter \opt{list} \NT{ids} ;}
153 \CASE{parameter list [ \NT{id} ] \NT{ids} ;}
154 \CASE{parameter list [ \NT{const} ] \NT{ids} ;}
155 \CASE{type \NT{ids} ;}
156 \CASE{statement \opt{list} \NT{ids} ;}
157 \CASE{declaration \opt{list} \NT{ids} ;}
158 \CASE{field \opt{list} \NT{ids} ;}
159 \CASE{typedef \NT{ids} ;}
160 \CASE{declarer name \NT{ids} ;}
161 % \CASE{\opt{local} function \NT{pmid\_with\_not\_eq\_list} ;}
162 \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
163 \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
164 \CASE{iterator name \NT{ids} ;}
165 \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
166 \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
167 % \CASE{error \NT{pmid\_with\_not\_eq\_list} ; }
168 \CASE{\opt{local} idexpression \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
169 \CASE{\opt{local} idexpression \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
170 \CASE{\opt{local} idexpression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
171 \CASE{expression list \NT{ids} ;}
172 \CASE{expression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
173 \CASE{expression enum \any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
174 \CASE{expression struct \any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
175 \CASE{expression union \any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
176 \CASE{expression \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
177 \CASE{expression list [ \NT{id} ] \NT{ids} ;}
178 \CASE{expression list [ \NT{const} ] \NT{ids} ;}
179 \CASE{\NT{ctype} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
180 \CASE{\NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
181 \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
182 \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
183 \CASE{constant \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
184 \CASE{constant \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
185 \CASE{position \opt{any} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq\_mid}\mth{)} ;}
186 \end{grammar}
187
188 It is possible to specify that an expression list or a parameter list
189 metavariable should match a specific number of expressions or parameters.
190
191 \begin{grammar}
192 \RULE{\rt{ids}}
193 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{pmid}\mth{)}}
194
195 \RULE{\rt{pmid}}
196 \CASE{\T{id}}
197 \CASE{\NT{mid}}
198 % \CASE{list}
199 % \CASE{error}
200 % \CASE{type}
201
202 \RULE{\rt{mid}} \CASE{\T{rulename\_id}.\T{id}}
203
204 \RULE{\rt{pmid\_with\_regexp}}
205 \CASE{\NT{pmid} \~{}= \NT{regexp}}
206
207 \RULE{\rt{pmid\_with\_not\_eq}}
208 \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_meta}}}
209 \CASE{\NT{pmid}
210 \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_meta}\mth{)} \ttrb}}
211
212 \RULE{\rt{pmid\_with\_virt\_or\_not\_eq}}
213 \CASE{virtual.\T{id}}
214 \CASE{\NT{pmid\_with\_not\_eq}}
215
216 \RULE{\rt{pmid\_with\_not\_ceq}}
217 \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_cst}}}
218 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_cst}\mth{)} \ttrb}}
219
220 \RULE{\rt{id\_or\_cst}}
221 \CASE{\T{id}}
222 \CASE{\T{integer}}
223
224 \RULE{\rt{id\_or\_meta}}
225 \CASE{\T{id}}
226 \CASE{\T{rulename\_id}.\T{id}}
227
228 \RULE{\rt{pmid\_with\_not\_eq\_mid}}
229 \CASE{\NT{pmid} \OPT{!= \NT{mid}}}
230 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{mid}\mth{)} \ttrb}}
231 \end{grammar}
232
233 Subsequently, we refer to arbitrary metavariables as
234 \mth{\msf{metaid}^{\mbox{\scriptsize{\it{ty}}}}}, where {\it{ty}}
235 indicates the {\it metakind} used in the declaration of the variable.
236 For example, \mth{\msf{metaid}^{\ssf{Type}}} refers to a metavariable
237 that was declared using \texttt{type} and stands for any type.
238
239 The \NT{ctype} and \NT{ctypes} nonterminals are used by both the grammar of
240 metavariable declarations and the grammar of transformations, and are
241 defined on page~\pageref{types}.
242
243 An identifier metavariable with {\tt virtual} as its ``rule name'' is given
244 a value on the command line. For example, if a semantic patch contains a
245 rule that declares an identifier metavariable with the name {\tt
246 virtual.alloc}, then the command line could contain {\tt -D
247 alloc=kmalloc}. There should not be space around the {\tt =}. An
248 example is in {\tt demos/vm.cocci} and {\tt demos/vm.c}.
249
250 \section{Metavariables for scripts}
251
252 Metavariables for scripts can only be inherited from transformation rules.
253 In the spirit of scripting languages such as Python that use dynamic
254 typing, metavariables for scripts do not include type declarations.
255
256 \begin{grammar}
257 \RULE{\rt{script\_metavariables}}
258 \CASE{@ script:\NT{language} \OPT{\NT{rulename}} \OPT{depends on \NT{dep}} @
259 \any{\NT{script\_metadecl}} @@}
260 \CASE{@ initialize:\NT{language} \OPT{depends on \NT{dep}} @}
261 \CASE{@ finalize:\NT{language} \OPT{depends on \NT{dep}} @}
262
263 \RULE{\rt{language}} \CASE{python} \CASE{ocaml}
264
265 \RULE{\rt{script\_metadecl}}
266 \CASE{\T{id} <{}< \T{rulename\_id}.\T{id} ;}
267 \CASE{\T{id} ;}
268 \end{grammar}
269
270 Currently, the only scripting languages that are supported are Python and
271 OCaml, indicated using {\tt python} and {\tt ocaml}, respectively. The
272 set of available scripting languages may be extended at some point.
273
274 Script rules declared with \KW{initialize} are run before the treatment of
275 any file. Script rules declared with \KW{finalize} are run when the
276 treatment of all of the files has completed. There can be at most one of
277 each per scripting language (thus currently at most one of each).
278 Initialize and finalize script rules do not have access to SmPL
279 metavariables. Nevertheless, a finalize script rule can access any
280 variables initialized by the other script rules, allowing information to be
281 transmitted from the matching process to the finalize rule.
282
283 A script metavariable that does not specify an origin, using \texttt{<<},
284 is newly declared by the script. This metavariable should be assigned to a
285 string and can be inherited by subsequent rules as an identifier. In
286 Python, the assignment of such a metavariable $x$ should refer to the
287 metavariable as {\tt coccinelle.\(x\)}. Examples are in the files
288 \texttt{demos/pythontococci.cocci} and \texttt{demos/camltococci.cocci}.
289
290 In an ocaml script, the following extended form of \textit{script\_metadecl}
291 may be used:
292
293 \begin{grammar}
294 \RULE{\rt{script\_metadecl}}
295 \CASE{(\T{id},\T{id}) <{}< \T{rulename\_id}.\T{id} ;}
296 \CASE{\T{id} <{}< \T{rulename\_id}.\T{id} ;}
297 \CASE{\T{id} ;}
298 \end{grammar}
299
300 \noindent
301 In a declaration of the form \texttt{(\T{id},\T{id}) <{}<
302 \T{rulename\_id}.\T{id} ;}, the left component of \texttt{(\T{id},\T{id})}
303 receives a string representation of the value of the inherited metavariable
304 while the right component receives its abstract syntax tree. The file
305 \texttt{parsing\_c/ast\_c.ml} in the Coccinelle implementation gives some
306 information about the structure of the abstract syntax tree. Either the
307 left or right component may be replaced by \verb+_+, indicating that the
308 string representation or abstract syntax trees representation is not
309 wanted, respectively.
310
311 \section{Transformation}
312
313 The transformation specification essentially has the form of C code,
314 except that lines to remove are annotated with \verb+-+ in the first
315 column, and lines to add are annotated with \verb-+-. A
316 transformation specification can also use {\em dots}, ``\verb-...-'',
317 describing an arbitrary sequence of function arguments or instructions
318 within a control-flow path. Dots may be modified with a {\tt when}
319 clause, indicating a pattern that should not occur anywhere within the
320 matched sequence. Finally, a transformation can specify a disjunction
321 of patterns, of the form \mtt{( \mth{\mita{pat}_1} | \mita{\ldots} |
322 \mth{\mita{pat}_n} )} where each \texttt{(}, \texttt{|} or
323 \texttt{)} is in column 0 or preceded by \texttt{\textbackslash}.
324
325 The grammar that we present for the transformation is not actually the
326 grammar of the SmPL code that can be written by the programmer, but is
327 instead the grammar of the slice of this consisting of the {\tt -}
328 annotated and the unannotated code (the context of the transformed lines),
329 or the {\tt +} annotated code and the unannotated code. For example, for
330 parsing purposes, the following transformation
331 %presented in Section \ref{sec:seq2}
332 is split into the two variants shown below and each is parsed
333 separately.
334
335 \begin{center}
336 \begin{tabular}{c}
337 \begin{lstlisting}[language=Cocci]
338 proc_info_func(...) {
339 <...
340 @-- hostno
341 @++ hostptr->host_no
342 ...>
343 }
344 \end{lstlisting}\\
345 \end{tabular}
346 \end{center}
347
348 {%\sizecodebis
349 \begin{center}
350 \begin{tabular}{p{5cm}p{3cm}p{5cm}}
351 \begin{lstlisting}[language=Cocci]
352 proc_info_func(...) {
353 <...
354 @-- hostno
355 ...>
356 }
357 \end{lstlisting}
358 &&
359 \begin{lstlisting}[language=Cocci]
360 proc_info_func(...) {
361 <...
362 @++ hostptr->host_no
363 ...>
364 }
365 \end{lstlisting}
366 \end{tabular}
367 \end{center}
368 }
369
370 \noindent
371 Requiring that both slices parse correctly ensures that the rule matches
372 syntactically valid C code and that it produces syntactically valid C code.
373 The generated parse trees are then merged for use in the subsequent
374 matching and transformation process.
375
376 The grammar for the minus or plus slice of a transformation is as follows:
377
378 \begin{grammar}
379
380 \RULE{\rt{transformation}}
381 \CASE{\some{\NT{include}}}
382 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
383 \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
384 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{fundecl}, \NT{when}\mth{)}}
385
386 \RULE{\rt{include}}
387 \CASE{\#include \T{include\_string}}
388
389 % \RULE{\rt{fun\_decl\_stmt}}
390 % \CASE{\NT{decl\_stmt}}
391 % \CASE{\NT{fundecl}}
392
393 % \CASE{\NT{ctype}}
394 % \CASE{\ttlb \NT{initialize\_list} \ttrb}
395 % \CASE{\NT{toplevel\_seq\_start\_after\_dots\_init}}
396 %
397 % \RULE{\rt{toplevel\_seq\_start\_after\_dots\_init}}
398 % \CASE{\NT{stmt\_dots} \NT{toplevel\_after\_dots}}
399 % \CASE{\NT{expr} \opt{\NT{toplevel\_after\_exp}}}
400 % \CASE{\NT{decl\_stmt\_expr} \opt{\NT{toplevel\_after\_stmt}}}
401 %
402 % \RULE{\rt{stmt\_dots}}
403 % \CASE{... \any{\NT{when}}}
404 % \CASE{<... \any{\NT{when}} \NT{nest\_after\_dots} ...>}
405 % \CASE{<+... \any{\NT{when}} \NT{nest\_after\_dots} ...+>}
406
407 \RULE{\rt{when}}
408 \CASE{when != \NT{when\_code}}
409 \CASE{when = \NT{rule\_elem\_stmt}}
410 \CASE{when \NT{COMMA\_LIST}\mth{(}\NT{any\_strict}\mth{)}}
411 \CASE{when true != \NT{expr}}
412 \CASE{when false != \NT{expr}}
413
414 \RULE{\rt{when\_code}}
415 \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
416 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
417
418 \RULE{\rt{rule\_elem\_stmt}}
419 \CASE{\NT{one\_decl}}
420 \CASE{\NT{expr};}
421 \CASE{return \opt{\NT{expr}};}
422 \CASE{break;}
423 \CASE{continue;}
424 \CASE{\bs(\NT{rule\_elem\_stmt} \SOME{\bs| \NT{rule\_elem\_stmt}}\bs)}
425
426 \RULE{\rt{any\_strict}}
427 \CASE{any}
428 \CASE{strict}
429 \CASE{forall}
430 \CASE{exists}
431
432 % \RULE{\rt{nest\_after\_dots}}
433 % \CASE{\NT{decl\_stmt\_exp} \opt{\NT{nest\_after\_stmt}}}
434 % \CASE{\opt{\NT{exp}} \opt{\NT{nest\_after\_exp}}}
435 %
436 % \RULE{\rt{nest\_after\_stmt}}
437 % \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
438 % \CASE{\NT{decl\_stmt} \opt{\NT{nest\_after\_stmt}}}
439 %
440 % \RULE{\rt{nest\_after\_exp}}
441 % \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
442 %
443 % \RULE{\rt{toplevel\_after\_dots}}
444 % \CASE{\opt{\NT{toplevel\_after\_exp}}}
445 % \CASE{\NT{exp} \opt{\NT{toplevel\_after\_exp}}}
446 % \CASE{\NT{decl\_stmt\_expr} \NT{toplevel\_after\_stmt}}
447 %
448 % \RULE{\rt{toplevel\_after\_exp}}
449 % \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
450 %
451 % \RULE{\rt{decl\_stmt\_expr}}
452 % \CASE{TMetaStmList$^\ddag$}
453 % \CASE{\NT{decl\_var}}
454 % \CASE{\NT{stmt}}
455 % \CASE{(\NT{stmt\_seq} \ANY{| \NT{stmt\_seq}})}
456 %
457 % \RULE{\rt{toplevel\_after\_stmt}}
458 % \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
459 % \CASE{\NT{decl\_stmt} \NT{toplevel\_after\_stmt}}
460
461 \end{grammar}
462
463 \begin{grammar}
464 \RULE{\rt{OPTDOTSEQ}\mth{(}\rt{grammar\_ds}, \rt{when\_ds}\mth{)}}
465 \CASE{}\multicolumn{3}{r}{\hspace{1cm}
466 \KW{\opt{... \opt{\NT{when\_ds}}} \NT{grammar\_ds}
467 \ANY{... \opt{\NT{when\_ds}} \NT{grammar\_ds}}
468 \opt{... \opt{\NT{when\_ds}}}}
469 }
470
471 % \CASE{\opt{... \opt{\NT{when\_ds}}} \NT{grammar}
472 % \ANY{... \opt{\NT{when\_ds}} \NT{grammar}}
473 % \opt{... \opt{\NT{when\_ds}}}}
474 % \CASE{<... \any{\NT{when\_ds}} \NT{grammar} ...>}
475 % \CASE{<+... \any{\NT{when\_ds}} \NT{grammar} ...+>}
476
477 \end{grammar}
478
479 \noindent
480 Lines may be annotated with an element of the set $\{\mtt{-}, \mtt{+},
481 \mtt{*}\}$ or the singleton $\mtt{?}$, or one of each set. \mtt{?}
482 represents at most one match of the given pattern. \mtt{*} is used for
483 semantic match, \emph{i.e.}, a pattern that highlights the fragments
484 annotated with \mtt{*}, but does not perform any modification of the
485 matched code. \mtt{*} cannot be mixed with \mtt{-} and \mtt{+}. There are
486 some constraints on the use of these annotations:
487 \begin{itemize}
488 \item Dots, {\em i.e.} \texttt{...}, cannot occur on a line marked
489 \texttt{+}.
490 \item Nested dots, {\em i.e.}, dots enclosed in {\tt <} and {\tt >}, cannot
491 occur on a line with any marking.
492 \end{itemize}
493
494 Each element of a disjunction must be a proper term like an
495 expression, a statement, an identifier or a declaration. Thus, the
496 rule on the left below is not a syntactically correct SmPL rule. One may
497 use the rule on the right instead.
498
499 \begin{center}
500 \begin{tabular}{l@{\hspace{5cm}}r}
501 \begin{lstlisting}[language=Cocci]
502 @@
503 type T;
504 T b;
505 @@
506
507 (
508 writeb(...,
509 |
510 readb(
511 )
512 @--(T)
513 b)
514 \end{lstlisting}
515 &
516 \begin{lstlisting}[language=Cocci]
517 @@
518 type T;
519 T b;
520 @@
521
522 (
523 read
524 |
525 write
526 )
527 (...,
528 @-- (T)
529 b)
530 \end{lstlisting}
531 \\
532 \end{tabular}
533 \end{center}
534
535 \section{Types}
536 \label{types}
537
538 \begin{grammar}
539
540 \RULE{\rt{ctypes}}
541 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{ctype}\mth{)}}
542
543 \RULE{\rt{ctype}}
544 \CASE{\opt{\NT{const\_vol}} \NT{generic\_ctype} \any{*}}
545 \CASE{\opt{\NT{const\_vol}} void \some{*}}
546 \CASE{(\NT{ctype} \ANY{| \NT{ctype}})}
547
548 \RULE{\rt{const\_vol}}
549 \CASE{const}
550 \CASE{volatile}
551
552 \RULE{\rt{generic\_ctype}}
553 \CASE{\NT{ctype\_qualif}}
554 \CASE{\opt{\NT{ctype\_qualif}} char}
555 \CASE{\opt{\NT{ctype\_qualif}} short}
556 \CASE{\opt{\NT{ctype\_qualif}} int}
557 \CASE{\opt{\NT{ctype\_qualif}} long}
558 \CASE{\opt{\NT{ctype\_qualif}} long long}
559 \CASE{double}
560 \CASE{float}
561 \CASE{enum \NT{id} \{ \NT{PARAMSEQ}\mth{(}\NT{dot\_expr}, \NT{exp\_whencode}\mth{)} \OPT{,} \}}
562 \CASE{\OPT{struct\OR union} \T{id} \OPT{\{ \any{\NT{struct\_decl\_list}} \}}}
563
564 \RULE{\rt{ctype\_qualif}}
565 \CASE{unsigned}
566 \CASE{signed}
567
568 \RULE{\rt{struct\_decl\_list}}
569 \CASE{\NT{struct\_decl\_list\_start}}
570
571 \RULE{\rt{struct\_decl\_list\_start}}
572 \CASE{\NT{struct\_decl}}
573 \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
574 \CASE{... \opt{when != \NT{struct\_decl}}$^\dag$ \opt{\NT{continue\_struct\_decl\_list}}}
575
576 \RULE{\rt{continue\_struct\_decl\_list}}
577 \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
578 \CASE{\NT{struct\_decl}}
579
580 \RULE{\rt{struct\_decl}}
581 \CASE{\NT{ctype} \NT{d\_ident};}
582 \CASE{\NT{fn\_ctype} (* \NT{d\_ident}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)});)}
583 \CASE{\opt{\NT{const\_vol}} \T{id} \NT{d\_ident};}
584
585 \RULE{\rt{d\_ident}}
586 \CASE{\T{id} \any{[\opt{\NT{expr}}]}}
587
588 \RULE{\rt{fn\_ctype}}
589 \CASE{\NT{generic\_ctype} \any{*}}
590 \CASE{void \any{*}}
591
592 \RULE{\rt{name\_opt\_decl}}
593 \CASE{\NT{decl}}
594 \CASE{\NT{ctype}}
595 \CASE{\NT{fn\_ctype}}
596 \end{grammar}
597
598 $^\dag$ The optional \texttt{when} construct ends at the end of the line.
599
600 \section{Function declarations}
601
602 \begin{grammar}
603
604 \RULE{\rt{fundecl}}
605 \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
606 (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}})
607 \ttlb~\opt{\NT{stmt\_seq}} \ttrb}
608
609 \RULE{\rt{funproto}}
610 \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
611 (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}});}
612
613 \RULE{\rt{funinfo}}
614 \CASE{inline}
615 \CASE{\NT{storage}}
616 % \CASE{\NT{attr}}
617
618 \RULE{\rt{storage}}
619 \CASE{static}
620 \CASE{auto}
621 \CASE{register}
622 \CASE{extern}
623
624 \RULE{\rt{funid}}
625 \CASE{\T{id}}
626 \CASE{\mth{\T{metaid}^{\ssf{Id}}}}
627 % \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
628 % \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
629
630 \RULE{\rt{param}}
631 \CASE{\NT{type} \T{id}}
632 \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
633 \CASE{\mth{\T{metaid}^{\ssf{ParamList}}}}
634
635 \RULE{\rt{decl}}
636 \CASE{\NT{ctype} \NT{id}}
637 \CASE{\NT{fn\_ctype} (* \NT{id}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)})}
638 \CASE{void}
639 \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
640 \end{grammar}
641
642 \begin{grammar}
643 \RULE{\rt{PARAMSEQ}\mth{(}\rt{gram\_p}, \rt{when\_p}\mth{)}}
644 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{gram\_p} \OR \ldots \opt{\NT{when\_p}}\mth{)}}
645 \end{grammar}
646
647 To match a function it is not necessary to provide all of the annotations
648 that appear before the function name. For example, the following semantic
649 patch:
650
651 \begin{lstlisting}[language=Cocci]
652 @@
653 @@
654
655 foo() { ... }
656 \end{lstlisting}
657
658 \noindent
659 matches a function declared as follows:
660
661 \begin{lstlisting}[language=C]
662 static int foo() { return 12; }
663 \end{lstlisting}
664
665 \noindent
666 This behavior can be turned off by disabling the \KW{optional\_storage}
667 isomorphism. If one adds code before a function declaration, then the
668 effect depends on the kind of code that is added. If the added code is a
669 function definition or CPP code, then the new code is placed before
670 all information associated with the function definition, including any
671 comments preceeding the function definition. On the other hand, if the new
672 code is associated with the function, such as the addition of the keyword
673 {\tt static}, the new code is placed exactly where it appears with respect
674 to the rest of the function definition in the semantic patch. For example,
675
676 \begin{lstlisting}[language=Cocci]
677 @@
678 @@
679
680 + static
681 foo() { ... }
682 \end{lstlisting}
683
684 \noindent
685 causes static to be placed just before the function name. The following
686 causes it to be placed just before the type
687
688 \begin{lstlisting}[language=Cocci]
689 @@
690 type T;
691 @@
692
693 + static
694 T foo() { ... }
695 \end{lstlisting}
696
697 \noindent
698 It may be necessary to consider several cases to ensure that the added ode
699 is placed in the right position. For example, one may need one pattern
700 that considers that the function is declared {\tt inline} and another that
701 considers that it is not.
702
703 %\newpage
704
705 \section{Declarations}
706
707 \begin{grammar}
708 \RULE{\rt{decl\_var}}
709 % \CASE{\NT{type} \opt{\NT{id} \opt{[\opt{\NT{dot\_expr}}]}
710 % \ANY{, \NT{id} \opt{[ \opt{\NT{dot\_expr}}]}}};}
711 \CASE{\NT{common\_decl}}
712 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
713 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
714 \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) = \NT{initialize} ;}
715 \CASE{typedef \NT{ctype} \NT{typedef\_ident} ;}
716
717 \RULE{\rt{one\_decl}}
718 \CASE{\NT{common\_decl}}
719 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{id};}
720 % \CASE{\NT{storage} \NT{ctype} \NT{id} \opt{[\opt{\NT{dot\\_expr}}]} = \NT{nest\\_expr};}
721 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} ;}
722
723 \RULE{\rt{common\_decl}}
724 \CASE{\NT{ctype};}
725 \CASE{\NT{funproto}}
726 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{d\_ident} = \NT{initialize} ;}
727 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} = \NT{initialize} ;}
728 \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) ;}
729 \CASE{\NT{decl\_ident} ( \OPT{\NT{COMMA\_LIST}\mth{(}\NT{expr}\mth{)}} ) ;}
730
731 \RULE{\rt{initialize}}
732 \CASE{\NT{dot\_expr}}
733 \CASE{\mth{\T{metaid}^{\ssf{Initialiser}}}}
734 \CASE{\ttlb~\opt{\NT{COMMA\_LIST}\mth{(}\NT{dot\_expr}\mth{)}}~\ttrb}
735
736 \RULE{\rt{init\_list\_elem}}
737 \CASE{\NT{dot\_expr}}
738 \CASE{\NT{designator} = \NT{dot\_expr}}
739 \CASE{\NT{id} : \NT{dot\_expr}}
740
741 \RULE{\rt{designator}}
742 \CASE{. \NT{id}}
743 \CASE{[ \NT{dot\_expr} ]}
744 \CASE{[ \NT{dot\_expr} ... \NT{dot\_expr} ]}
745
746 \RULE{\rt{decl\_ident}}
747 \CASE{\T{DeclarerId}}
748 \CASE{\mth{\T{metaid}^{\ssf{Declarer}}}}
749 \end{grammar}
750
751 \section{Statements}
752
753 The first rule {\em statement} describes the various forms of a statement.
754 The remaining rules implement the constraints that are sensitive to the
755 context in which the statement occurs: {\em single\_statement} for a
756 context in which only one statement is allowed, and {\em decl\_statement}
757 for a context in which a declaration, statement, or sequence thereof is
758 allowed.
759
760 \begin{grammar}
761 \RULE{\rt{stmt}}
762 \CASE{\NT{include}}
763 \CASE{\mth{\T{metaid}^{\ssf{Stmt}}}}
764 \CASE{\NT{expr};}
765 \CASE{if (\NT{dot\_expr}) \NT{single\_stmt} \opt{else \NT{single\_stmt}}}
766 \CASE{for (\opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}})
767 \NT{single\_stmt}}
768 \CASE{while (\NT{dot\_expr}) \NT{single\_stmt}}
769 \CASE{do \NT{single\_stmt} while (\NT{dot\_expr});}
770 \CASE{\NT{iter\_ident} (\any{\NT{dot\_expr}}) \NT{single\_stmt}}
771 \CASE{switch (\opt{\NT{dot\_expr}}) \ttlb \any{\NT{case\_line}} \ttrb}
772 \CASE{return \opt{\NT{dot\_expr}};}
773 \CASE{\ttlb~\opt{\NT{stmt\_seq}} \ttrb}
774 \CASE{\NT{NEST}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
775 \CASE{\NT{NEST}\mth{(}\NT{expr}, \NT{when}\mth{)}}
776 \CASE{break;}
777 \CASE{continue;}
778 \CASE{\NT{id}:}
779 \CASE{goto \NT{id};}
780 \CASE{\ttlb \NT{stmt\_seq} \ttrb}
781
782 \RULE{\rt{single\_stmt}}
783 \CASE{\NT{stmt}}
784 \CASE{\NT{OR}\mth{(}\NT{stmt}\mth{)}}
785
786 \RULE{\rt{decl\_stmt}}
787 \CASE{\mth{\T{metaid}^{\ssf{StmtList}}}}
788 \CASE{\NT{decl\_var}}
789 \CASE{\NT{stmt}}
790 \CASE{\NT{OR}\mth{(}\NT{stmt\_seq}\mth{)}}
791
792 \RULE{\rt{stmt\_seq}}
793 \CASE{\any{\NT{decl\_stmt}}
794 \opt{\NT{DOTSEQ}\mth{(}\some{\NT{decl\_stmt}},
795 \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
796 \CASE{\any{\NT{decl\_stmt}}
797 \opt{\NT{DOTSEQ}\mth{(}\NT{expr},
798 \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
799
800 \RULE{\rt{case\_line}}
801 \CASE{default :~\NT{stmt\_seq}}
802 \CASE{case \NT{dot\_expr} :~\NT{stmt\_seq}}
803
804 \RULE{\rt{iter\_ident}}
805 \CASE{\T{IteratorId}}
806 \CASE{\mth{\T{metaid}^{\ssf{Iterator}}}}
807 \end{grammar}
808
809 \begin{grammar}
810 \RULE{\rt{OR}\mth{(}\rt{gram\_o}\mth{)}}
811 \CASE{( \NT{gram\_o} \ANY{\ttmid \NT{gram\_o}})}
812
813 \RULE{\rt{DOTSEQ}\mth{(}\rt{gram\_d}, \rt{when\_d}\mth{)}}
814 \CASE{\ldots \opt{\NT{when\_d}} \ANY{\NT{gram\_d} \ldots \opt{\NT{when\_d}}}}
815
816 \RULE{\rt{NEST}\mth{(}\rt{gram\_n}, \rt{when\_n}\mth{)}}
817 \CASE{<\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots>}
818 \CASE{<+\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots+>}
819 \end{grammar}
820
821 \noindent
822 OR is a macro that generates a disjunction of patterns. The three
823 tokens \T{(}, \T{\ttmid}, and \T{)} must appear in the leftmost
824 column, to differentiate them from the parentheses and bit-or tokens
825 that can appear within expressions (and cannot appear in the leftmost
826 column). These token may also be preceded by \texttt{\bs}
827 when they are used in an other column. These tokens are furthermore
828 different from (, \(\mid\), and ), which are part of the grammar
829 metalanguage.
830
831 \section{Expressions}
832
833 A nest or a single ellipsis is allowed in some expression contexts, and
834 causes ambiguity in others. For example, in a sequence \mtt{\ldots
835 \mita{expr} \ldots}, the nonterminal \mita{expr} must be instantiated as an
836 explicit C-language expression, while in an array reference,
837 \mtt{\mth{\mita{expr}_1} \mtt{[} \mth{\mita{expr}_2} \mtt{]}}, the
838 nonterminal \mth{\mita{expr}_2}, because it is delimited by brackets, can
839 be also instantiated as \mtt{\ldots}, representing an arbitrary expression. To
840 distinguish between the various possibilities, we define three nonterminals
841 for expressions: {\em expr} does not allow either top-level nests or
842 ellipses, {\em nest\_expr} allows a nest but not an ellipsis, and {\em
843 dot\_expr} allows both. The EXPR macro is used to express these variants
844 in a concise way.
845
846 \begin{grammar}
847 \RULE{\rt{expr}}
848 \CASE{\NT{EXPR}\mth{(}\NT{expr}\mth{)}}
849
850 \RULE{\rt{nest\_expr}}
851 \CASE{\NT{EXPR}\mth{(}\NT{nest\_expr}\mth{)}}
852 \CASE{\NT{NEST}\mth{(}\NT{nest\_expr}, \NT{exp\_whencode}\mth{)}}
853
854 \RULE{\rt{dot\_expr}}
855 \CASE{\NT{EXPR}\mth{(}\NT{dot\_expr}\mth{)}}
856 \CASE{\NT{NEST}\mth{(}\NT{dot\_expr}, \NT{exp\_whencode}\mth{)}}
857 \CASE{...~\opt{\NT{exp\_whencode}}}
858
859 \RULE{\rt{EXPR}\mth{(}\rt{exp}\mth{)}}
860 \CASE{\NT{exp} \NT{assign\_op} \NT{exp}}
861 \CASE{\NT{exp}++}
862 \CASE{\NT{exp}--}
863 \CASE{\NT{unary\_op} \NT{exp}}
864 \CASE{\NT{exp} \NT{bin\_op} \NT{exp}}
865 \CASE{\NT{exp} ?~\NT{dot\_expr} :~\NT{exp}}
866 \CASE{(\NT{type}) \NT{exp}}
867 \CASE{\NT{exp} [\NT{dot\_expr}]}
868 \CASE{\NT{exp} .~\NT{id}}
869 \CASE{\NT{exp} -> \NT{id}}
870 \CASE{\NT{exp}(\opt{\NT{PARAMSEQ}\mth{(}\NT{arg}, \NT{exp\_whencode}\mth{)}})}
871 \CASE{\NT{id}}
872 % \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
873 % \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
874 \CASE{\mth{\T{metaid}^{\ssf{Exp}}}}
875 % \CASE{\mth{\T{metaid}^{\ssf{Err}}}}
876 \CASE{\mth{\T{metaid}^{\ssf{Const}}}}
877 \CASE{\NT{const}}
878 \CASE{(\NT{dot\_expr})}
879 \CASE{\NT{OR}\mth{(}\NT{exp}\mth{)}}
880
881 \RULE{\rt{arg}}
882 \CASE{\NT{nest\_expr}}
883 \CASE{\mth{\T{metaid}^{\ssf{ExpList}}}}
884
885 \RULE{\rt{exp\_whencode}}
886 \CASE{when != \NT{expr}}
887
888 \RULE{\rt{assign\_op}}
889 \CASE{= \OR -= \OR += \OR *= \OR /= \OR \%=}
890 \CASE{\&= \OR |= \OR \caret= \OR \lt\lt= \OR \gt\gt=}
891
892 \RULE{\rt{bin\_op}}
893 \CASE{* \OR / \OR \% \OR + \OR -}
894 \CASE{\lt\lt \OR \gt\gt \OR \caret\xspace \OR \& \OR \ttmid}
895 \CASE{< \OR > \OR <= \OR >= \OR == \OR != \OR \&\& \OR \ttmid\ttmid}
896
897 \RULE{\rt{unary\_op}}
898 \CASE{++ \OR -- \OR \& \OR * \OR + \OR - \OR !}
899
900 \end{grammar}
901
902 \section{Constant, Identifiers and Types for Transformations}
903
904 \begin{grammar}
905 \RULE{\rt{const}}
906 \CASE{\NT{string}}
907 \CASE{[0-9]+}
908 \CASE{\mth{\cdots}}
909
910 \RULE{\rt{string}}
911 \CASE{"\any{[\^{}"]}"}
912
913 \RULE{\rt{id}}
914 \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Id}}}}
915
916 \RULE{\rt{typedef\_ident}}
917 \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Type}}}}
918
919 \RULE{\rt{type}}
920 \CASE{\NT{ctype} \OR \mth{\T{metaid}^{\ssf{Type}}}}
921
922 \RULE{\rt{pathToIsoFile}}
923 \CASE{<.*>}
924
925 \RULE{\rt{regexp}}
926 \CASE{"\any{[\^{}"]}"}
927 \end{grammar}
928
929
930 %%% Local Variables:
931 %%% mode: LaTeX
932 %%% TeX-master: "main_grammar"
933 %%% coding: utf-8
934 %%% TeX-PDF-mode: t
935 %%% ispell-local-dictionary: "american"
936 %%% End: