Release coccinelle-0.2.3rc3
[bpt/coccinelle.git] / docs / manual / cocci_syntax.tex
1
2 %\section{The SmPL Grammar}
3
4 % This section presents the SmPL grammar. This definition follows closely
5 % our implementation using the Menhir parser generator \cite{menhir}.
6
7 This document presents the grammar of the SmPL language used by the
8 \href{http://coccinelle.lip6.fr/}{Coccinelle tool}. For the most
9 part, the grammar is written using standard notation. In some rules,
10 however, the left-hand side is in all uppercase letters. These are
11 macros, which take one or more grammar rule right-hand-sides as
12 arguments. The grammar also uses some unspecified nonterminals, such
13 as \T{id}, \T{const}, etc. These refer to the sets suggested by
14 the name, {\em i.e.}, \T{id} refers to the set of possible
15 C-language identifiers, while \T{const} refers to the set of
16 possible C-language constants.
17 %
18 \ifhevea
19 A PDF version of this documentation is available at
20 \url{http://coccinelle.lip6.fr/docs/main_grammar.pdf}.
21 \else
22 A HTML version of this documentation is available online at
23 \url{http://coccinelle.lip6.fr/docs/main_grammar.html}.
24 \fi
25
26 \section{Program}
27
28 \begin{grammar}
29 \RULE{\rt{program}}
30 \CASE{\any{\NT{include\_cocci}} \some{\NT{changeset}}}
31
32 \RULE{\rt{include\_cocci}}
33 \CASE{using \NT{string}}
34 \CASE{using \NT{pathToIsoFile}}
35 \CASE{virtual \T{id} \ANY{, \T{id}}}
36
37 \RULE{\rt{changeset}}
38 \CASE{\NT{metavariables} \NT{transformation}}
39 \CASE{\NT{script\_metavariables} \T{script\_code}}
40 % \CASE{\NT{metavariables} \ANY{--- filename +++ filename} \NT{transformation}}
41 \end{grammar}
42
43 \noindent
44 \T{script\_code} is any code in the chosen scripting language. Parsing of
45 the semantic patch does not check the validity of this code; any errors are
46 first detected when the code is executed. Furthermore, \texttt{@} should
47 not be use in this code. Spatch scans the script code for the next
48 \texttt{@} and considers that to be the beginning of the next rule, even if
49 \texttt{@} occurs within e.g., a string or a comment.
50
51 \texttt{virtual} keyword is used to declare virtual rules. Virtual
52 rules may be subsequently used as a dependency for the rules in the
53 SmPL file. Whether a virtual rule is defined or not is controlled by
54 the \texttt{-D} option on the command line.
55
56 % Between the metavariables and the transformation rule, there can be a
57 % specification of constraints on the names of the old and new files,
58 % analogous to the filename specifications in the standard patch syntax.
59 % (see Figure \ref{scsiglue_patch}).
60
61 \section{Metavariables for transformations}
62
63 The \NT{rulename} portion of the metavariable declaration can specify
64 properties of a rule such as its name, the names of the rules that it
65 depends on, the isomorphisms to be used in processing the rule, and whether
66 quantification over paths should be universal or existential. The optional
67 annotation {\tt expression} indicates that the pattern is to be considered
68 as matching an expression, and thus can be used to avoid some parsing
69 problems.
70
71 The \NT{metadecl} portion of the metavariable declaration defines various
72 types of metavariables that will be used for matching in the transformation
73 section.
74
75 \begin{grammar}
76 \RULE{\rt{metavariables}}
77 \CASE{@@ \any{\NT{metadecl}} @@}
78 \CASE{@ \NT{rulename} @ \any{\NT{metadecl}} @@}
79
80 \RULE{\rt{rulename}}
81 \CASE{\T{id} \OPT{extends \T{id}} \OPT{depends on \NT{dep}} \opt{\NT{iso}}
82 \opt{\NT{disable-iso}} \opt{\NT{exists}} \opt{expression}}
83
84 \RULE{\rt{dep}}
85 \CASE{\NT{pnrule}}
86 \CASE{\NT{dep} \&\& \NT{dep}}
87 \CASE{\NT{dep} || \NT{dep}}
88
89 \RULE{\rt{pnrule}}
90 \CASE{\T{id}}
91 \CASE{!\T{id}}
92 \CASE{ever \T{id}}
93 \CASE{never \T{id}}
94 \CASE{(\NT{dep})}
95
96 \RULE{\rt{iso}}
97 \CASE{using \NT{string} \ANY{, \NT{string}}}
98
99 \RULE{\rt{disable-iso}}
100 \CASE{disable \NT{COMMA\_LIST}\mth{(}\T{id}\mth{)}}
101
102 \RULE{\rt{exists}}
103 \CASE{exists}
104 \CASE{forall}
105 % \CASE{\opt{reverse} forall}
106
107 \RULE{\rt{COMMA\_LIST}\mth{(}\rt{elem}\mth{)}}
108 \CASE{\NT{elem} \ANY{, \NT{elem}}}
109 \end{grammar}
110
111 The keyword \KW{disable} is normally used with the names of
112 isomorphisms defined in standard.iso or whatever isomorphism file has been
113 included. There are, however, some other isomorphisms that are built into
114 the implementation of Coccinelle and that can be disabled as well. Their
115 names are given below. In each case, the text descibes the standard
116 behavior. Using \NT{disable-iso} with the given name disables this behavior.
117
118 \begin{itemize}
119 \item \KW{optional\_storage}: A SmPL function definition that does not
120 specify any visibility (i.e., static or extern), or a SmPL variable
121 declaration that does not specify any storage (i.e., auto, static,
122 register, or extern), matches a function declaration or variable
123 declaration with any visibility or storage, respectively.
124 \item \KW{optional\_qualifier}: This is similar to \KW{optional\_storage},
125 except that here is it the qualifier (i.e., const or volatile) that does
126 not have to be specified in the SmPL code, but may be present in the C code.
127 \item \KW{value\_format}: Integers in various formats, e.g., 1 and 0x1, are
128 considered to be equivalent in the matching process.
129 \item \KW{comm\_assoc}: An expression of the form \NT{exp} \NT{bin\_op}
130 \KW{...}, where \NT{bin\_op} is commutative and associative, is
131 considered to match any top-level sequence of \NT{bin\_op} operators
132 containing \NT{exp} as the top-level argument.
133 \end{itemize}
134
135 The possible types of metavariable declarations are defined by the grammar
136 rule below. Metavariables should occur at least once in the transformation
137 immediately following their declaration. Fresh metavariables must only be
138 used in {\tt +} code. These properties are not expressed in the grammar,
139 but are checked by a subsequent analysis. The metavariables are designated
140 according to the kind of terms they can match, such as a statement, an
141 identifier, or an expression. An expression metavariable can be further
142 constrained by its type.
143
144 \begin{grammar}
145 \RULE{\rt{metadecl}}
146 \CASE{fresh identifier \NT{ids} ;}
147 \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
148 \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_virt\_or\_not\_eq}\mth{)} ;}
149 \CASE{parameter \opt{list} \NT{ids} ;}
150 \CASE{parameter list [ \NT{id} ] \NT{ids} ;}
151 \CASE{type \NT{ids} ;}
152 \CASE{statement \opt{list} \NT{ids} ;}
153 \CASE{typedef \NT{ids} ;}
154 \CASE{declarer name \NT{ids} ;}
155 % \CASE{\opt{local} function \NT{pmid\_with\_not\_eq\_list} ;}
156 \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
157 \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
158 \CASE{iterator name \NT{ids} ;}
159 \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
160 \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
161 % \CASE{error \NT{pmid\_with\_not\_eq\_list} ; }
162 \CASE{\opt{local} idexpression \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
163 \CASE{\opt{local} idexpression \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
164 \CASE{\opt{local} idexpression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
165 \CASE{expression list \NT{ids} ;}
166 \CASE{expression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
167 \CASE{expression \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
168 \CASE{expression list [ ident ] \NT{ids} ;}
169 \CASE{\NT{ctype} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
170 \CASE{\NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
171 \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
172 \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
173 \CASE{constant \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
174 \CASE{constant \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
175 \CASE{position \opt{any} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq\_mid}\mth{)} ;}
176 \end{grammar}
177
178 \begin{grammar}
179 \RULE{\rt{ids}}
180 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{pmid}\mth{)}}
181
182 \RULE{\rt{pmid}}
183 \CASE{\T{id}}
184 \CASE{\NT{mid}}
185 % \CASE{list}
186 % \CASE{error}
187 % \CASE{type}
188
189 \RULE{\rt{mid}} \CASE{\T{rulename\_id}.\T{id}}
190
191 \RULE{\rt{pmid\_with\_regexp}}
192 \CASE{\NT{pmid} \~{}= \NT{regexp}}
193
194 \RULE{\rt{pmid\_with\_not\_eq}}
195 \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_meta}}}
196 \CASE{\NT{pmid}
197 \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_meta}\mth{)} \ttrb}}
198
199 \RULE{\rt{pmid\_with\_virt\_or\_not\_eq}}
200 \CASE{virtual.\T{id}}
201 \CASE{\NT{pmid\_with\_not\_eq}}
202
203 \RULE{\rt{pmid\_with\_not\_ceq}}
204 \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_cst}}}
205 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_cst}\mth{)} \ttrb}}
206
207 \RULE{\rt{id\_or\_cst}}
208 \CASE{\T{id}}
209 \CASE{\T{integer}}
210
211 \RULE{\rt{id\_or\_meta}}
212 \CASE{\T{id}}
213 \CASE{\T{rulename\_id}.\T{id}}
214
215 \RULE{\rt{pmid\_with\_not\_eq\_mid}}
216 \CASE{\NT{pmid} \OPT{!= \NT{mid}}}
217 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{mid}\mth{)} \ttrb}}
218 \end{grammar}
219
220 Subsequently, we refer to arbitrary metavariables as
221 \mth{\msf{metaid}^{\mbox{\scriptsize{\it{ty}}}}}, where {\it{ty}}
222 indicates the {\it metakind} used in the declaration of the variable.
223 For example, \mth{\msf{metaid}^{\ssf{Type}}} refers to a metavariable
224 that was declared using \texttt{type} and stands for any type.
225
226 The \NT{ctype} and \NT{ctypes} nonterminals are used by both the grammar of
227 metavariable declarations and the grammar of transformations, and are
228 defined on page~\pageref{types}.
229
230 An identifier metavariable with {\tt virtual} as its ``rule name'' is given
231 a value on the command line. For example, if a semantic patch contains a
232 rule that declares an identifier metavariable with the name {\tt
233 virtual.alloc}, then the command line could contain {\tt -D
234 alloc=kmalloc}. There should not be space around the {\tt =}. An
235 example is in {\tt demos/vm.cocci} and {\tt demos/vm.c}.
236
237 \section{Metavariables for scripts}
238
239 Metavariables for scripts can only be inherited from transformation rules.
240 In the spirit of scripting languages such as Python that use dynamic
241 typing, metavariables for scripts do not include type declarations.
242
243 \begin{grammar}
244 \RULE{\rt{script\_metavariables}}
245 \CASE{@ script:\NT{language} \OPT{depends on \NT{dep}} @
246 \any{\NT{script\_metadecl}} @@}
247 \CASE{@ initialize:\NT{language} \OPT{depends on \NT{dep}} @}
248 \CASE{@ finalize:\NT{language} \OPT{depends on \NT{dep}} @}
249
250 \RULE{\rt{language}} \CASE{python}
251
252 \RULE{\rt{script\_metadecl}} \CASE{\T{id} <{}< \T{rulename\_id}.\T{id} ;}
253 \end{grammar}
254
255 Currently, the only scripting languages that are supported are Python and
256 OCaml, indicated using {\tt python} and {\tt ocaml}, respectively. The
257 set of available scripting languages may be extended at some point.
258
259 Script rules declared with \KW{initialize} are run before the treatment of
260 any file. Script rules declared with \KW{finalize} are run when the
261 treatment of all of the files has completed. There can be at most one of
262 each per scripting language (thus currently at most one of each).
263 Initialize and finalize script rules do not have access to SmPL
264 metavariables. Nevertheless, a finalize script rule can access any
265 variables initialized by the other script rules, allowing information to be
266 transmitted from the matching process to the finalize rule.
267
268 \section{Transformation}
269
270 The transformation specification essentially has the form of C code,
271 except that lines to remove are annotated with \verb+-+ in the first
272 column, and lines to add are annotated with \verb-+-. A
273 transformation specification can also use {\em dots}, ``\verb-...-'',
274 describing an arbitrary sequence of function arguments or instructions
275 within a control-flow path. Dots may be modified with a {\tt when}
276 clause, indicating a pattern that should not occur anywhere within the
277 matched sequence. Finally, a transformation can specify a disjunction
278 of patterns, of the form \mtt{( \mth{\mita{pat}_1} | \mita{\ldots} |
279 \mth{\mita{pat}_n} )} where each \texttt{(}, \texttt{|} or
280 \texttt{)} is in column 0 or preceded by \texttt{\textbackslash}.
281
282 The grammar that we present for the transformation is not actually the
283 grammar of the SmPL code that can be written by the programmer, but is
284 instead the grammar of the slice of this consisting of the {\tt -}
285 annotated and the unannotated code (the context of the transformed lines),
286 or the {\tt +} annotated code and the unannotated code. For example, for
287 parsing purposes, the following transformation
288 %presented in Section \ref{sec:seq2}
289 is split into the two variants shown below and each is parsed
290 separately.
291
292 \begin{center}
293 \begin{tabular}{c}
294 \begin{lstlisting}[language=Cocci]
295 proc_info_func(...) {
296 <...
297 @-- hostno
298 @++ hostptr->host_no
299 ...>
300 }
301 \end{lstlisting}\\
302 \end{tabular}
303 \end{center}
304
305 {%\sizecodebis
306 \begin{center}
307 \begin{tabular}{p{5cm}p{3cm}p{5cm}}
308 \begin{lstlisting}[language=Cocci]
309 proc_info_func(...) {
310 <...
311 @-- hostno
312 ...>
313 }
314 \end{lstlisting}
315 &&
316 \begin{lstlisting}[language=Cocci]
317 proc_info_func(...) {
318 <...
319 @++ hostptr->host_no
320 ...>
321 }
322 \end{lstlisting}
323 \end{tabular}
324 \end{center}
325 }
326
327 \noindent
328 Requiring that both slices parse correctly ensures that the rule matches
329 syntactically valid C code and that it produces syntactically valid C code.
330 The generated parse trees are then merged for use in the subsequent
331 matching and transformation process.
332
333 The grammar for the minus or plus slice of a transformation is as follows:
334
335 \begin{grammar}
336
337 \RULE{\rt{transformation}}
338 \CASE{\some{\NT{include}}}
339 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
340 \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
341 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{fundecl}, \NT{when}\mth{)}}
342
343 \RULE{\rt{include}}
344 \CASE{\#include \T{include\_string}}
345
346 % \RULE{\rt{fun\_decl\_stmt}}
347 % \CASE{\NT{decl\_stmt}}
348 % \CASE{\NT{fundecl}}
349
350 % \CASE{\NT{ctype}}
351 % \CASE{\ttlb \NT{initialize\_list} \ttrb}
352 % \CASE{\NT{toplevel\_seq\_start\_after\_dots\_init}}
353 %
354 % \RULE{\rt{toplevel\_seq\_start\_after\_dots\_init}}
355 % \CASE{\NT{stmt\_dots} \NT{toplevel\_after\_dots}}
356 % \CASE{\NT{expr} \opt{\NT{toplevel\_after\_exp}}}
357 % \CASE{\NT{decl\_stmt\_expr} \opt{\NT{toplevel\_after\_stmt}}}
358 %
359 % \RULE{\rt{stmt\_dots}}
360 % \CASE{... \any{\NT{when}}}
361 % \CASE{<... \any{\NT{when}} \NT{nest\_after\_dots} ...>}
362 % \CASE{<+... \any{\NT{when}} \NT{nest\_after\_dots} ...+>}
363
364 \RULE{\rt{when}}
365 \CASE{when != \NT{when\_code}}
366 \CASE{when = \NT{rule\_elem\_stmt}}
367 \CASE{when \NT{COMMA\_LIST}\mth{(}\NT{any\_strict}\mth{)}}
368 \CASE{when true != \NT{expr}}
369 \CASE{when false != \NT{expr}}
370
371 \RULE{\rt{when\_code}}
372 \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
373 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
374
375 \RULE{\rt{rule\_elem\_stmt}}
376 \CASE{\NT{one\_decl}}
377 \CASE{\NT{expr};}
378 \CASE{return \opt{\NT{expr}};}
379 \CASE{break;}
380 \CASE{continue;}
381 \CASE{\bs(\NT{rule\_elem\_stmt} \SOME{\bs| \NT{rule\_elem\_stmt}}\bs)}
382
383 \RULE{\rt{any\_strict}}
384 \CASE{any}
385 \CASE{strict}
386 \CASE{forall}
387 \CASE{exists}
388
389 % \RULE{\rt{nest\_after\_dots}}
390 % \CASE{\NT{decl\_stmt\_exp} \opt{\NT{nest\_after\_stmt}}}
391 % \CASE{\opt{\NT{exp}} \opt{\NT{nest\_after\_exp}}}
392 %
393 % \RULE{\rt{nest\_after\_stmt}}
394 % \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
395 % \CASE{\NT{decl\_stmt} \opt{\NT{nest\_after\_stmt}}}
396 %
397 % \RULE{\rt{nest\_after\_exp}}
398 % \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
399 %
400 % \RULE{\rt{toplevel\_after\_dots}}
401 % \CASE{\opt{\NT{toplevel\_after\_exp}}}
402 % \CASE{\NT{exp} \opt{\NT{toplevel\_after\_exp}}}
403 % \CASE{\NT{decl\_stmt\_expr} \NT{toplevel\_after\_stmt}}
404 %
405 % \RULE{\rt{toplevel\_after\_exp}}
406 % \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
407 %
408 % \RULE{\rt{decl\_stmt\_expr}}
409 % \CASE{TMetaStmList$^\ddag$}
410 % \CASE{\NT{decl\_var}}
411 % \CASE{\NT{stmt}}
412 % \CASE{(\NT{stmt\_seq} \ANY{| \NT{stmt\_seq}})}
413 %
414 % \RULE{\rt{toplevel\_after\_stmt}}
415 % \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
416 % \CASE{\NT{decl\_stmt} \NT{toplevel\_after\_stmt}}
417
418 \end{grammar}
419
420 \begin{grammar}
421 \RULE{\rt{OPTDOTSEQ}\mth{(}\rt{grammar\_ds}, \rt{when\_ds}\mth{)}}
422 \CASE{}\multicolumn{3}{r}{\hspace{1cm}
423 \KW{\opt{... \opt{\NT{when\_ds}}} \NT{grammar\_ds}
424 \ANY{... \opt{\NT{when\_ds}} \NT{grammar\_ds}}
425 \opt{... \opt{\NT{when\_ds}}}}
426 }
427
428 % \CASE{\opt{... \opt{\NT{when\_ds}}} \NT{grammar}
429 % \ANY{... \opt{\NT{when\_ds}} \NT{grammar}}
430 % \opt{... \opt{\NT{when\_ds}}}}
431 % \CASE{<... \any{\NT{when\_ds}} \NT{grammar} ...>}
432 % \CASE{<+... \any{\NT{when\_ds}} \NT{grammar} ...+>}
433
434 \end{grammar}
435
436 \noindent
437 Lines may be annotated with an element of the set $\{\mtt{-}, \mtt{+},
438 \mtt{*}\}$ or the singleton $\mtt{?}$, or one of each set. \mtt{?}
439 represents at most one match of the given pattern. \mtt{*} is used for
440 semantic match, \emph{i.e.}, a pattern that highlights the fragments
441 annotated with \mtt{*}, but does not perform any modification of the
442 matched code. \mtt{*} cannot be mixed with \mtt{-} and \mtt{+}. There are
443 some constraints on the use of these annotations:
444 \begin{itemize}
445 \item Dots, {\em i.e.} \texttt{...}, cannot occur on a line marked
446 \texttt{+}.
447 \item Nested dots, {\em i.e.}, dots enclosed in {\tt <} and {\tt >}, cannot
448 occur on a line with any marking.
449 \end{itemize}
450
451 Each element of a disjunction must be a proper term like an
452 expression, a statement, an identifier or a declaration. Thus, the
453 rule on the left below is not a syntaxically correct SmPL rule. One may
454 use the rule on the right instead.
455
456 \begin{center}
457 \begin{tabular}{l@{\hspace{5cm}}r}
458 \begin{lstlisting}[language=Cocci]
459 @@
460 type T;
461 T b;
462 @@
463
464 (
465 writeb(...,
466 |
467 readb(
468 )
469 @--(T)
470 b)
471 \end{lstlisting}
472 &
473 \begin{lstlisting}[language=Cocci]
474 @@
475 type T;
476 T b;
477 @@
478
479 (
480 read
481 |
482 write
483 )
484 (...,
485 @-- (T)
486 b)
487 \end{lstlisting}
488 \\
489 \end{tabular}
490 \end{center}
491
492 \section{Types}
493 \label{types}
494
495 \begin{grammar}
496
497 \RULE{\rt{ctypes}}
498 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{ctype}\mth{)}}
499
500 \RULE{\rt{ctype}}
501 \CASE{\opt{\NT{const\_vol}} \NT{generic\_ctype} \any{*}}
502 \CASE{\opt{\NT{const\_vol}} void \some{*}}
503 \CASE{(\NT{ctype} \ANY{| \NT{ctype}})}
504
505 \RULE{\rt{const\_vol}}
506 \CASE{const}
507 \CASE{volatile}
508
509 \RULE{\rt{generic\_ctype}}
510 \CASE{\NT{ctype\_qualif}}
511 \CASE{\opt{\NT{ctype\_qualif}} char}
512 \CASE{\opt{\NT{ctype\_qualif}} short}
513 \CASE{\opt{\NT{ctype\_qualif}} int}
514 \CASE{\opt{\NT{ctype\_qualif}} long}
515 \CASE{\opt{\NT{ctype\_qualif}} long long}
516 \CASE{double}
517 \CASE{float}
518 \CASE{\OPT{struct\OR union} \T{id} \OPT{\{ \any{\NT{struct\_decl\_list}} \}}}
519
520 \RULE{\rt{ctype\_qualif}}
521 \CASE{unsigned}
522 \CASE{signed}
523
524 \RULE{\rt{struct\_decl\_list}}
525 \CASE{\NT{struct\_decl\_list\_start}}
526
527 \RULE{\rt{struct\_decl\_list\_start}}
528 \CASE{\NT{struct\_decl}}
529 \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
530 \CASE{... \opt{when != \NT{struct\_decl}}$^\dag$ \opt{\NT{continue\_struct\_decl\_list}}}
531
532 \RULE{\rt{continue\_struct\_decl\_list}}
533 \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
534 \CASE{\NT{struct\_decl}}
535
536 \RULE{\rt{struct\_decl}}
537 \CASE{\NT{ctype} \NT{d\_ident};}
538 \CASE{\NT{fn\_ctype} (* \NT{d\_ident}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)});)}
539 \CASE{\opt{\NT{const\_vol}} \T{id} \NT{d\_ident};}
540
541 \RULE{\rt{d\_ident}}
542 \CASE{\NT{id} \any{[\opt{\NT{expr}}]}}
543
544 \RULE{\rt{fn\_ctype}}
545 \CASE{\NT{generic\_ctype} \any{*}}
546 \CASE{void \any{*}}
547
548 \RULE{\rt{name\_opt\_decl}}
549 \CASE{\NT{decl}}
550 \CASE{\NT{ctype}}
551 \CASE{\NT{fn\_ctype}}
552 \end{grammar}
553
554 $^\dag$ The optional \texttt{when} construct ends at the end of the line.
555
556 \section{Function declarations}
557
558 \begin{grammar}
559
560 \RULE{\rt{fundecl}}
561 \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
562 (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}})
563 \ttlb~\opt{\NT{stmt\_seq}} \ttrb}
564
565 \RULE{\rt{funproto}}
566 \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
567 (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}});}
568
569 \RULE{\rt{funinfo}}
570 \CASE{inline}
571 \CASE{\NT{storage}}
572 % \CASE{\NT{attr}}
573
574 \RULE{\rt{storage}}
575 \CASE{static}
576 \CASE{auto}
577 \CASE{register}
578 \CASE{extern}
579
580 \RULE{\rt{funid}}
581 \CASE{\T{id}}
582 \CASE{\mth{\T{metaid}^{\ssf{Id}}}}
583 % \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
584 % \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
585
586 \RULE{\rt{param}}
587 \CASE{\NT{type} \T{id}}
588 \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
589 \CASE{\mth{\T{metaid}^{\ssf{ParamList}}}}
590
591 \RULE{\rt{decl}}
592 \CASE{\NT{ctype} \NT{id}}
593 \CASE{\NT{fn\_ctype} (* \NT{id}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)})}
594 \CASE{void}
595 \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
596 \end{grammar}
597
598 \begin{grammar}
599 \RULE{\rt{PARAMSEQ}\mth{(}\rt{gram\_p}, \rt{when\_p}\mth{)}}
600 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{gram\_p} \OR \ldots \opt{\NT{when\_p}}\mth{)}}
601 \end{grammar}
602
603 To match a function it is not necessary to provide all of the annotations
604 that appear before the function name. For example, the following semantic
605 patch:
606
607 \begin{lstlisting}[language=Cocci]
608 @@
609 @@
610
611 foo() { ... }
612 \end{lstlisting}
613
614 \noindent
615 matches a function declared as follows:
616
617 \begin{lstlisting}[language=C]
618 static int foo() { return 12; }
619 \end{lstlisting}
620
621 \noindent
622 This behavior can be turned off by disabling the \KW{optional\_storage}
623 isomorphism. If one adds code before a function declaration, then the
624 effect depends on the kind of code that is added. If the added code is a
625 function definition or CPP code, then the new code is placed before
626 all information associated with the function definition, including any
627 comments preceeding the function definition. On the other hand, if the new
628 code is associated with the function, such as the addition of the keyword
629 {\tt static}, the new code is placed exactly where it appears with respect
630 to the rest of the function definition in the semantic patch. For example,
631
632 \begin{lstlisting}[language=Cocci]
633 @@
634 @@
635
636 + static
637 foo() { ... }
638 \end{lstlisting}
639
640 \noindent
641 causes static to be placed just before the function name. The following
642 causes it to be placed just before the type
643
644 \begin{lstlisting}[language=Cocci]
645 @@
646 type T;
647 @@
648
649 + static
650 T foo() { ... }
651 \end{lstlisting}
652
653 \noindent
654 It may be nencessary to consider several cases to ensure that the added ode
655 is placed in the right position. For example, one may need one pattern
656 that considers that the function is declared {\tt inline} and another that
657 considers that it is not.
658
659 %\newpage
660
661 \section{Declarations}
662
663 \begin{grammar}
664 \RULE{\rt{decl\_var}}
665 % \CASE{\NT{type} \opt{\NT{id} \opt{[\opt{\NT{dot\_expr}}]}
666 % \ANY{, \NT{id} \opt{[ \opt{\NT{dot\_expr}}]}}};}
667 \CASE{\NT{common\_decl}}
668 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
669 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
670 \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) = \NT{initialize} ;}
671 \CASE{typedef \NT{ctype} \NT{typedef\_ident} ;}
672
673 \RULE{\rt{one\_decl}}
674 \CASE{\NT{common\_decl}}
675 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{id};}
676 % \CASE{\NT{storage} \NT{ctype} \NT{id} \opt{[\opt{\NT{dot\\_expr}}]} = \NT{nest\\_expr};}
677 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} ;}
678
679 \RULE{\rt{common\_decl}}
680 \CASE{\NT{ctype};}
681 \CASE{\NT{funproto}}
682 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{d\_ident} = \NT{initialize} ;}
683 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} = \NT{initialize} ;}
684 \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) ;}
685 \CASE{\NT{decl\_ident} ( \OPT{\NT{COMMA\_LIST}\mth{(}\NT{expr}\mth{)}} ) ;}
686
687 \RULE{\rt{initialize}}
688 \CASE{\NT{dot\_expr}}
689 \CASE{\ttlb~\opt{\NT{COMMA\_LIST}\mth{(}\NT{dot\_expr}\mth{)}}~\ttrb}
690
691 \RULE{\rt{decl\_ident}}
692 \CASE{\T{DeclarerId}}
693 \CASE{\mth{\T{metaid}^{\ssf{Declarer}}}}
694 \end{grammar}
695
696 \section{Statements}
697
698 The first rule {\em statement} describes the various forms of a statement.
699 The remaining rules implement the constraints that are sensitive to the
700 context in which the statement occurs: {\em single\_statement} for a
701 context in which only one statement is allowed, and {\em decl\_statement}
702 for a context in which a declaration, statement, or sequence thereof is
703 allowed.
704
705 \begin{grammar}
706 \RULE{\rt{stmt}}
707 \CASE{\NT{include}}
708 \CASE{\mth{\T{metaid}^{\ssf{Stmt}}}}
709 \CASE{\NT{expr};}
710 \CASE{if (\NT{dot\_expr}) \NT{single\_stmt} \opt{else \NT{single\_stmt}}}
711 \CASE{for (\opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}})
712 \NT{single\_stmt}}
713 \CASE{while (\NT{dot\_expr}) \NT{single\_stmt}}
714 \CASE{do \NT{single\_stmt} while (\NT{dot\_expr});}
715 \CASE{\NT{iter\_ident} (\any{\NT{dot\_expr}}) \NT{single\_stmt}}
716 \CASE{switch (\opt{\NT{dot\_expr}}) \ttlb \any{\NT{case\_line}} \ttrb}
717 \CASE{return \opt{\NT{dot\_expr}};}
718 \CASE{\ttlb~\opt{\NT{stmt\_seq}} \ttrb}
719 \CASE{\NT{NEST}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
720 \CASE{\NT{NEST}\mth{(}\NT{expr}, \NT{when}\mth{)}}
721 \CASE{break;}
722 \CASE{continue;}
723 \CASE{\NT{id}:}
724 \CASE{goto \NT{id};}
725 \CASE{\ttlb \NT{stmt\_seq} \ttrb}
726
727 \RULE{\rt{single\_stmt}}
728 \CASE{\NT{stmt}}
729 \CASE{\NT{OR}\mth{(}\NT{stmt}\mth{)}}
730
731 \RULE{\rt{decl\_stmt}}
732 \CASE{\mth{\T{metaid}^{\ssf{StmtList}}}}
733 \CASE{\NT{decl\_var}}
734 \CASE{\NT{stmt}}
735 \CASE{\NT{OR}\mth{(}\NT{stmt\_seq}\mth{)}}
736
737 \RULE{\rt{stmt\_seq}}
738 \CASE{\any{\NT{decl\_stmt}}
739 \opt{\NT{DOTSEQ}\mth{(}\some{\NT{decl\_stmt}},
740 \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
741 \CASE{\any{\NT{decl\_stmt}}
742 \opt{\NT{DOTSEQ}\mth{(}\NT{expr},
743 \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
744
745 \RULE{\rt{case\_line}}
746 \CASE{default :~\NT{stmt\_seq}}
747 \CASE{case \NT{dot\_expr} :~\NT{stmt\_seq}}
748
749 \RULE{\rt{iter\_ident}}
750 \CASE{\T{IteratorId}}
751 \CASE{\mth{\T{metaid}^{\ssf{Iterator}}}}
752 \end{grammar}
753
754 \begin{grammar}
755 \RULE{\rt{OR}\mth{(}\rt{gram\_o}\mth{)}}
756 \CASE{( \NT{gram\_o} \ANY{\ttmid \NT{gram\_o}})}
757
758 \RULE{\rt{DOTSEQ}\mth{(}\rt{gram\_d}, \rt{when\_d}\mth{)}}
759 \CASE{\ldots \opt{\NT{when\_d}} \ANY{\NT{gram\_d} \ldots \opt{\NT{when\_d}}}}
760
761 \RULE{\rt{NEST}\mth{(}\rt{gram\_n}, \rt{when\_n}\mth{)}}
762 \CASE{<\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots>}
763 \CASE{<+\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots+>}
764 \end{grammar}
765
766 \noindent
767 OR is a macro that generates a disjunction of patterns. The three
768 tokens \T{(}, \T{\ttmid}, and \T{)} must appear in the leftmost
769 column, to differentiate them from the parentheses and bit-or tokens
770 that can appear within expressions (and cannot appear in the leftmost
771 column). These token may also be preceded by \texttt{\bs}
772 when they are used in an other column. These tokens are furthermore
773 different from (, \(\mid\), and ), which are part of the grammar
774 metalanguage.
775
776 \section{Expressions}
777
778 A nest or a single ellipsis is allowed in some expression contexts, and
779 causes ambiguity in others. For example, in a sequence \mtt{\ldots
780 \mita{expr} \ldots}, the nonterminal \mita{expr} must be instantiated as an
781 explicit C-language expression, while in an array reference,
782 \mtt{\mth{\mita{expr}_1} \mtt{[} \mth{\mita{expr}_2} \mtt{]}}, the
783 nonterminal \mth{\mita{expr}_2}, because it is delimited by brackets, can
784 be also instantiated as \mtt{\ldots}, representing an arbitrary expression. To
785 distinguish between the various possibilities, we define three nonterminals
786 for expressions: {\em expr} does not allow either top-level nests or
787 ellipses, {\em nest\_expr} allows a nest but not an ellipsis, and {\em
788 dot\_expr} allows both. The EXPR macro is used to express these variants
789 in a concise way.
790
791 \begin{grammar}
792 \RULE{\rt{expr}}
793 \CASE{\NT{EXPR}\mth{(}\NT{expr}\mth{)}}
794
795 \RULE{\rt{nest\_expr}}
796 \CASE{\NT{EXPR}\mth{(}\NT{nest\_expr}\mth{)}}
797 \CASE{\NT{NEST}\mth{(}\NT{nest\_expr}, \NT{exp\_whencode}\mth{)}}
798
799 \RULE{\rt{dot\_expr}}
800 \CASE{\NT{EXPR}\mth{(}\NT{dot\_expr}\mth{)}}
801 \CASE{\NT{NEST}\mth{(}\NT{dot\_expr}, \NT{exp\_whencode}\mth{)}}
802 \CASE{...~\opt{\NT{exp\_whencode}}}
803
804 \RULE{\rt{EXPR}\mth{(}\rt{exp}\mth{)}}
805 \CASE{\NT{exp} \NT{assign\_op} \NT{exp}}
806 \CASE{\NT{exp}++}
807 \CASE{\NT{exp}--}
808 \CASE{\NT{unary\_op} \NT{exp}}
809 \CASE{\NT{exp} \NT{bin\_op} \NT{exp}}
810 \CASE{\NT{exp} ?~\NT{dot\_expr} :~\NT{exp}}
811 \CASE{(\NT{type}) \NT{exp}}
812 \CASE{\NT{exp} [\NT{dot\_expr}]}
813 \CASE{\NT{exp} .~\NT{id}}
814 \CASE{\NT{exp} -> \NT{id}}
815 \CASE{\NT{exp}(\opt{\NT{PARAMSEQ}\mth{(}\NT{arg}, \NT{exp\_whencode}\mth{)}})}
816 \CASE{\NT{id}}
817 % \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
818 % \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
819 \CASE{\mth{\T{metaid}^{\ssf{Exp}}}}
820 % \CASE{\mth{\T{metaid}^{\ssf{Err}}}}
821 \CASE{\mth{\T{metaid}^{\ssf{Const}}}}
822 \CASE{\NT{const}}
823 \CASE{(\NT{dot\_expr})}
824 \CASE{\NT{OR}\mth{(}\NT{exp}\mth{)}}
825
826 \RULE{\rt{arg}}
827 \CASE{\NT{nest\_expr}}
828 \CASE{\mth{\T{metaid}^{\ssf{ExpList}}}}
829
830 \RULE{\rt{exp\_whencode}}
831 \CASE{when != \NT{expr}}
832
833 \RULE{\rt{assign\_op}}
834 \CASE{= \OR -= \OR += \OR *= \OR /= \OR \%=}
835 \CASE{\&= \OR |= \OR \caret= \OR \lt\lt= \OR \gt\gt=}
836
837 \RULE{\rt{bin\_op}}
838 \CASE{* \OR / \OR \% \OR + \OR -}
839 \CASE{\lt\lt \OR \gt\gt \OR \caret\xspace \OR \& \OR \ttmid}
840 \CASE{< \OR > \OR <= \OR >= \OR == \OR != \OR \&\& \OR \ttmid\ttmid}
841
842 \RULE{\rt{unary\_op}}
843 \CASE{++ \OR -- \OR \& \OR * \OR + \OR - \OR !}
844
845 \end{grammar}
846
847 \section{Constant, Identifiers and Types for Transformations}
848
849 \begin{grammar}
850 \RULE{\rt{const}}
851 \CASE{\NT{string}}
852 \CASE{[0-9]+}
853 \CASE{\mth{\cdots}}
854
855 \RULE{\rt{string}}
856 \CASE{"\any{[\^{}"]}"}
857
858 \RULE{\rt{id}}
859 \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Id}}}}
860
861 \RULE{\rt{typedef\_ident}}
862 \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Type}}}}
863
864 \RULE{\rt{type}}
865 \CASE{\NT{ctype} \OR \mth{\T{metaid}^{\ssf{Type}}}}
866
867 \RULE{\rt{pathToIsoFile}}
868 \CASE{<.*>}
869
870 \RULE{\rt{regexp}}
871 \CASE{"\any{[\^{}"]}"}
872 \end{grammar}
873
874
875 %%% Local Variables:
876 %%% mode: LaTeX
877 %%% TeX-master: "main_grammar"
878 %%% coding: utf-8
879 %%% TeX-PDF-mode: t
880 %%% ispell-local-dictionary: "american"
881 %%% End: