Release coccinelle-0.1.7
[bpt/coccinelle.git] / docs / grammar / cocci_syntax.tex
1 \documentclass{article}
2 %\usepackage[latin9]{inputenc}
3 \usepackage{hevea}
4 \usepackage{fullpage}
5 \usepackage{alltt}
6 \usepackage{xspace}
7 \usepackage[pdfborder={0 0 0}]{hyperref}
8 \usepackage{listings}
9 \usepackage[usenames,dvipsnames]{color}
10 \usepackage{times}
11 \usepackage[T1]{fontenc}
12 \usepackage{multirow}
13
14 \lstset{basicstyle=\ttfamily,numbers=left, numberstyle=\tiny, stepnumber=1, numbersep=5pt,language=C,commentstyle=\color{OliveGreen},keywordstyle=\color{blue},stringstyle=\color{BrickRed}}
15
16 %
17 % You must prefix the +/- lines of
18 % cocci files with @+/@- respectively.
19 % This will enable the automatic coloration.
20 %
21 % Note: You need at least the following version of hevea
22 % http://hevea.inria.fr/distri/unstable/hevea-2008-12-17.tar.gz
23 %
24 \ifhevea % For HTML generation
25 \lstdefinelanguage{Cocci}{
26 morekeywords={idexpression,expression,statement,identifier,type,
27 parameter,list,when,strict,any,forall,local,position,typedef},
28 keywordstyle=\color{OliveGreen}\bfseries,
29 sensitive=false,
30 moredelim=[is][\color{blue}]{@M}{@M},
31 moredelim=[il][\color{OliveGreen}]{@+},
32 moredelim=[il][\color{BrickRed}]{@-}}
33
34 \lstdefinelanguage{PatchC}[ANSI]{C}{
35 stringstyle=\color{black},
36 moredelim=[il][\color{OliveGreen}]{@+},
37 moredelim=[il][\color{BrickRed}]{@-},
38 moredelim=[il][\color{Plum}]{@M}}
39
40 \else % For DVI/PS/PDF generation
41 \lstdefinelanguage{Cocci}{
42 morekeywords={idexpression,expression,statement,identifier,type,
43 parameter,list,when,strict,any,forall,local,position,typedef},
44 keywordstyle=\color{OliveGreen}\bfseries,
45 sensitive=false,
46 moredelim=*[is][\color{blue}]{@M}{@M},
47 moredelim=[il][\color{OliveGreen}]{@+},
48 moredelim=[il][\color{BrickRed}]{@-}}
49
50 \lstdefinelanguage{PatchC}[ANSI]{C}{
51 stringstyle=\color{black},
52 moredelim=[il][\color{OliveGreen}]{@+},
53 moredelim=[il][\color{BrickRed}]{@-},
54 moredelim=[il][\color{Plum}]{@M}}
55 \fi
56
57 \newif\iflanguagestyle
58 \languagestylefalse
59 \input{grammar}
60
61 \newcommand{\sizecodebis}[0]{\scriptsize}
62
63 \newcommand{\mita}[1]{\mbox{\it{{#1}}}}
64 \newcommand{\mtt}[1]{\mbox{\tt{{#1}}}}
65 \newcommand{\msf}[1]{\mbox{\sf{{#1}}}}
66 \newcommand{\stt}[1]{\mbox{\scriptsize\tt{{#1}}}}
67 \newcommand{\ssf}[1]{\mbox{\scriptsize\sf{{#1}}}}
68 \newcommand{\sita}[1]{\mbox{\scriptsize\it{{#1}}}}
69 \newcommand{\mrm}[1]{\mbox{\rm{{#1}}}}
70 \newcommand{\mth}[1]{\({#1}\)}
71 \newcommand{\entails}[2]{\begin{array}{@{}c@{}}{#1}\\\hline{#2}\end{array}}
72 \newcommand{\ttlb}{\mbox{\tt \char'173}}
73 \newcommand{\ttrb}{\mbox{\tt \char'175}}
74 \newcommand{\ttmid}{\mbox{\tt \char'174}}
75 \newcommand{\tttld}{\mbox{\tt \char'176}}
76
77 \newcommand{\fixme}[1]{{\color{red} #1}}
78
79 \ifhevea
80 \newcommand{\phantom}{}
81 \newcommand{\air}{ }
82 \else
83 \newcommand{\air}{\phantom{xxx}}
84 \fi
85
86 \title{The SmPL Grammar (version 0.1.4)}
87 \author{Research group on Coccinelle}
88 \date{\today}
89
90 \begin{document}
91 \maketitle
92
93 %\section{The SmPL Grammar}
94
95 % This section presents the SmPL grammar. This definition follows closely
96 % our implementation using the Menhir parser generator \cite{menhir}.
97
98 This document presents the grammar of the SmPL language used by the
99 \href{http://www.emn.fr/x-info/coccinelle}{Coccinelle tool}. For the most
100 part, the grammar is written using standard notation. In some rules,
101 however, the left-hand side is in all uppercase letters. These are
102 macros, which take one or more grammar rule right-hand-sides as
103 arguments. The grammar also uses some unspecified nonterminals, such
104 as \T{id}, \T{const}, etc. These refer to the sets suggested by
105 the name, {\em i.e.}, \T{id} refers to the set of possible
106 C-language identifiers, while \T{const} refers to the set of
107 possible C-language constants. \ifhevea A PDF version of this
108 documentation is available at
109 \url{http://www.emn.fr/x-info/coccinelle/docs/cocci_syntax.pdf}.\else
110 A HTML version of this documentation is available online at
111 \url{http://www.emn.fr/x-info/coccinelle/docs/cocci_syntax.html}. \fi
112
113 %% \ifhevea A PDF
114 %% version of this documentation is available at
115 %% \url{http://localhost:8080/coccinelle/cocci_syntax.pdf}.\else A HTML
116 %% version of this documentation is available online at
117 %% \url{http://localhost:8080/coccinelle/cocci_syntax.html}. \fi
118
119 \section{Program}
120
121 \begin{grammar}
122 \RULE{\rt{program}}
123 \CASE{\any{\NT{include\_cocci}} \some{\NT{changeset}}}
124
125 \RULE{\rt{include\_cocci}}
126 \CASE{using \NT{string}}
127 \CASE{using \NT{pathToIsoFile}}
128
129 \RULE{\rt{changeset}}
130 \CASE{\NT{metavariables} \NT{transformation}}
131 \CASE{\NT{script\_metavariables} \T{script\_code}}
132 % \CASE{\NT{metavariables} \ANY{--- filename +++ filename} \NT{transformation}}
133 \end{grammar}
134
135 \noindent
136 \T{script\_code} is any code in the chosen scripting language. Parsing of
137 the semantic patch does not check the validity of this code; any errors are
138 first detected when the code is executed.
139
140 % Between the metavariables and the transformation rule, there can be a
141 % specification of constraints on the names of the old and new files,
142 % analogous to the filename specifications in the standard patch syntax.
143 % (see Figure \ref{scsiglue_patch}).
144
145 \section{Metavariables for transformations}
146
147 The \NT{rulename} portion of the metavariable declaration can specify
148 properties of a rule such as its name, the names of the rules that it
149 depends on, the isomorphisms to be used in processing the rule, and whether
150 quantification over paths should be universal or existential. The optional
151 annotation {\tt expression} indicates that the pattern is to be considered
152 as matching an expression, and thus can be used to avoid some parsing
153 problems.
154
155 The \NT{metadecl} portion of the metavariable declaration defines various
156 types of metavariables that will be used for matching in the transformation
157 section.
158
159 \begin{grammar}
160 \RULE{\rt{metavariables}}
161 \CASE{@@ \any{\NT{metadecl}} @@}
162 \CASE{@ \NT{rulename} @ \any{\NT{metadecl}} @@}
163
164 \RULE{\rt{rulename}}
165 \CASE{\T{id} \OPT{extends \T{id}} \OPT{depends on \NT{dep}} \opt{\NT{iso}}
166 \opt{\NT{disable-iso}} \opt{\NT{exists}} \opt{expression}}
167 \CASE{script:\T{language} \OPT{depends on \NT{dep}}}
168
169 \RULE{\rt{script\_init\_final}}
170 \CASE{initialize:\T{language}}
171 \CASE{finalize:\T{language}}
172
173 \RULE{\rt{dep}}
174 \CASE{\NT{pnrule}}
175 \CASE{\NT{dep} \&\& \NT{dep}}
176 \CASE{\NT{dep} || \NT{dep}}
177
178 \RULE{\rt{pnrule}}
179 \CASE{\T{id}}
180 \CASE{!\T{id}}
181 \CASE{ever \T{id}}
182 \CASE{never \T{id}}
183 \CASE{(\NT{dep})}
184
185 \RULE{\rt{iso}}
186 \CASE{using \NT{string} \ANY{, \NT{string}}}
187
188 \RULE{\rt{disable-iso}}
189 \CASE{disable \NT{COMMA\_LIST}\mth{(}\T{id}\mth{)}}
190
191 \RULE{\rt{exists}}
192 \CASE{exists}
193 \CASE{forall}
194 % \CASE{\opt{reverse} forall}
195
196 \RULE{\rt{COMMA\_LIST}\mth{(}\rt{elem}\mth{)}}
197 \CASE{\NT{elem} \ANY{, \NT{elem}}}
198 \end{grammar}
199
200 The keyword \KW{disable} is normally used with the names of
201 isomorphisms defined in standard.iso or whatever isomorphism file has been
202 included. There are, however, some other isomorphisms that are built into
203 the implementation of Coccinelle and that can be disabled as well. Their
204 names are given below. In each case, the text descibes the standard
205 behavior. Using \NT{disable-iso} with the given name disables this behavior.
206
207 \begin{itemize}
208 \item \KW{optional\_storage}: A SmPL function definition that does not
209 specify any visibility (i.e., static or extern), or a SmPL variable
210 declaration that does not specify any storage (i.e., auto, static,
211 register, or extern), matches a function declaration or variable
212 declaration with any visibility or storage, respectively.
213 \item \KW{optional\_qualifier}: This is similar to \KW{optional\_storage},
214 except that here is it the qualifier (i.e., const or volatile) that does
215 not have to be specified in the SmPL code, but may be present in the C code.
216 \item \KW{value\_format}: Integers in various formats, e.g., 1 and 0x1, are
217 considered to be equivalent in the matching process.
218 \item \KW{comm\_assoc}: An expression of the form \NT{exp} \NT{bin\_op}
219 \KW{...}, where \NT{bin\_op} is commutative and associative, is
220 considered to match any top-level sequence of \NT{bin\_op} operators
221 containing \NT{exp} as the top-level argument.
222 \end{itemize}
223
224 The possible types of metavariable declarations are defined by the grammar
225 rule below. Metavariables should occur at least once in the transformation
226 immediately following their declaration. Fresh metavariables must only be
227 used in {\tt +} code. These properties are not expressed in the grammar,
228 but are checked by a subsequent analysis. The metavariables are designated
229 according to the kind of terms they can match, such as a statement, an
230 identifier, or an expression. An expression metavariable can be further
231 constrained by its type.
232
233 \begin{grammar}
234 \RULE{\rt{metadecl}}
235 \CASE{fresh identifier \NT{ids} ;}
236 \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
237 \CASE{parameter \opt{list} \NT{ids} ;}
238 \CASE{parameter list [ \NT{id} ] \NT{ids} ;}
239 \CASE{type \NT{ids} ;}
240 \CASE{statement \opt{list} \NT{ids} ;}
241 \CASE{typedef \NT{ids} ;}
242 \CASE{declarer name \NT{ids} ;}
243 % \CASE{\opt{local} function \NT{pmid\_with\_not\_eq\_list} ;}
244 \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
245 \CASE{iterator name \NT{ids} ;}
246 \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
247 % \CASE{error \NT{pmid\_with\_not\_eq\_list} ; }
248 \CASE{\opt{local} idexpression \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
249 \CASE{\opt{local} idexpression \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
250 \CASE{\opt{local} idexpression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
251 \CASE{expression list \NT{ids} ;}
252 \CASE{expression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
253 \CASE{expression \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
254 \CASE{expression list [ ident ] \NT{ids} ;}
255 \CASE{\NT{ctype} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
256 \CASE{\NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
257 \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
258 \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
259 \CASE{constant \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
260 \CASE{constant \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
261 \CASE{position \opt{any} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq\_mid}\mth{)} ;}
262 \end{grammar}
263
264 \begin{grammar}
265 \RULE{\rt{ids}}
266 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{pmid}\mth{)}}
267
268 \RULE{\rt{pmid}}
269 \CASE{\T{id}}
270 \CASE{\NT{mid}}
271 % \CASE{list}
272 % \CASE{error}
273 % \CASE{type}
274
275 \RULE{\rt{mid}} \CASE{\T{rulename\_id}.\T{id}}
276
277 \RULE{\rt{pmid\_with\_not\_eq}}
278 \CASE{\NT{pmid} \OPT{!= \T{id}}}
279 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\T{id}\mth{)} \ttrb}}
280
281 \RULE{\rt{pmid\_with\_not\_ceq}}
282 \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_cst}}}
283 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_cst}\mth{)} \ttrb}}
284
285 \RULE{\rt{id\_or\_cst}}
286 \CASE{\T{id}}
287 \CASE{\T{integer}}
288
289 \RULE{\rt{pmid\_with\_not\_eq\_mid}}
290 \CASE{\NT{pmid} \OPT{!= \NT{mid}}}
291 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{mid}\mth{)} \ttrb}}
292 \end{grammar}
293
294 Subsequently, we refer to arbitrary metavariables as
295 \mth{\msf{metaid}^{\mbox{\scriptsize{\it{ty}}}}}, where {\it{ty}}
296 indicates the {\it metakind} used in the declaration of the variable.
297 For example, \mth{\msf{metaid}^{\ssf{Type}}} refers to a metavariable
298 that was declared using \texttt{type} and stands for any type.
299
300 The \NT{ctype} and \NT{ctypes} nonterminals are used by both the grammar of
301 metavariable declarations and the grammar of transformations, and are
302 defined on page~\pageref{types}.
303
304 \section{Metavariables for scripts}
305
306 Metavariables for scripts can only be inherited from transformation rules.
307 In the spirit of scripting languages such as Python that use dynamic
308 typing, metavariables for scripts do not include type declarations.
309
310 \begin{grammar}
311 \RULE{\rt{script\_metavariables}}
312 \CASE{@ script:\NT{language} \OPT{depends on \NT{dep}} @
313 \any{\NT{script\_metadecl}} @@}
314 \CASE{@ initialize:\NT{language} @}
315 \CASE{@ finalize:\NT{language} @}
316
317 \RULE{\rt{language}} \CASE{python}
318
319 \RULE{\rt{script\_metadecl}} \CASE{\T{id} <{}< \T{rulename\_id}.\T{id} ;}
320 \end{grammar}
321
322 Currently, the only scripting language that is supported is Python. The
323 set of available scripting languages may be extended at some point.
324
325 Script rules declared with \KW{initialize} are run before the treatment of
326 any file. Script rules declared with \KW{finalize} are run when the
327 treatment of all of the files has completed. There can be at most one of
328 each per scripting language (thus currently at most one of each).
329 Initialize and finalize script rules do not have access to SmPL
330 metavariables. Nevertheless, a finalize script rule can access any
331 variables initialized by the other script rules, allowing information to be
332 transmitted from the matching process to the finalize rule.
333
334 \section{Transformation}
335
336 The transformation specification essentially has the form of C code,
337 except that lines to remove are annotated with \verb+-+ in the first
338 column, and lines to add are annotated with \verb-+-. A
339 transformation specification can also use {\em dots}, ``\verb-...-'',
340 describing an arbitrary sequence of function arguments or instructions
341 within a control-flow path. Dots may be modified with a {\tt when}
342 clause, indicating a pattern that should not occur anywhere within the
343 matched sequence. Finally, a transformation can specify a disjunction
344 of patterns, of the form \mtt{( \mth{\mita{pat}_1} | \mita{\ldots} |
345 \mth{\mita{pat}_n} )} where each \texttt{(}, \texttt{|} or
346 \texttt{)} is in column 0 or preceded by \texttt{\textbackslash}.
347
348 The grammar that we present for the transformation is not actually the
349 grammar of the SmPL code that can be written by the programmer, but is
350 instead the grammar of the slice of this consisting of the {\tt -}
351 annotated and the unannotated code (the context of the transformed lines),
352 or the {\tt +} annotated code and the unannotated code. For example, for
353 parsing purposes, the following transformation
354 %presented in Section \ref{sec:seq2}
355 is split into the two variants shown below and each is parsed
356 separately.
357
358 \begin{center}
359 \begin{tabular}{c}
360 \begin{lstlisting}[language=Cocci]
361 proc_info_func(...) {
362 <...
363 @-- hostno
364 @++ hostptr->host_no
365 ...>
366 }
367 \end{lstlisting}\\
368 \end{tabular}
369 \end{center}
370
371 {%\sizecodebis
372 \begin{center}
373 \begin{tabular}{p{5cm}p{3cm}p{5cm}}
374 \begin{lstlisting}[language=Cocci]
375 proc_info_func(...) {
376 <...
377 @-- hostno
378 ...>
379 }
380 \end{lstlisting}
381 &&
382 \begin{lstlisting}[language=Cocci]
383 proc_info_func(...) {
384 <...
385 @++ hostptr->host_no
386 ...>
387 }
388 \end{lstlisting}
389 \end{tabular}
390 \end{center}
391 }
392
393 \noindent
394 Requiring that both slices parse correctly ensures that the rule matches
395 syntactically valid C code and that it produces syntactically valid C code.
396 The generated parse trees are then merged for use in the subsequent
397 matching and transformation process.
398
399 The grammar for the minus or plus slice of a transformation is as follows:
400
401 \begin{grammar}
402
403 \RULE{\rt{transformation}}
404 \CASE{\some{\NT{include}}}
405 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
406 \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
407 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{fundecl}, \NT{when}\mth{)}}
408
409 \RULE{\rt{include}}
410 \CASE{\#include \T{include\_string}}
411
412 % \RULE{\rt{fun\_decl\_stmt}}
413 % \CASE{\NT{decl\_stmt}}
414 % \CASE{\NT{fundecl}}
415
416 % \CASE{\NT{ctype}}
417 % \CASE{\ttlb \NT{initialize\_list} \ttrb}
418 % \CASE{\NT{toplevel\_seq\_start\_after\_dots\_init}}
419 %
420 % \RULE{\rt{toplevel\_seq\_start\_after\_dots\_init}}
421 % \CASE{\NT{stmt\_dots} \NT{toplevel\_after\_dots}}
422 % \CASE{\NT{expr} \opt{\NT{toplevel\_after\_exp}}}
423 % \CASE{\NT{decl\_stmt\_expr} \opt{\NT{toplevel\_after\_stmt}}}
424 %
425 % \RULE{\rt{stmt\_dots}}
426 % \CASE{... \any{\NT{when}}}
427 % \CASE{<... \any{\NT{when}} \NT{nest\_after\_dots} ...>}
428 % \CASE{<+... \any{\NT{when}} \NT{nest\_after\_dots} ...+>}
429
430 \RULE{\rt{when}}
431 \CASE{when != \NT{when\_code}}
432 \CASE{when = \NT{rule\_elem\_stmt}}
433 \CASE{when \NT{COMMA\_LIST}\mth{(}\NT{any\_strict}\mth{)}}
434 \CASE{when true != \NT{expr}}
435 \CASE{when false != \NT{expr}}
436
437 \RULE{\rt{when\_code}}
438 \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
439 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
440
441 \RULE{\rt{rule\_elem\_stmt}}
442 \CASE{\NT{one\_decl}}
443 \CASE{\NT{expr};}
444 \CASE{return \opt{\NT{expr}};}
445 \CASE{break;}
446 \CASE{continue;}
447 \CASE{\bs(\NT{rule\_elem\_stmt} \SOME{\bs| \NT{rule\_elem\_stmt}}\bs)}
448
449 \RULE{\rt{any\_strict}}
450 \CASE{any}
451 \CASE{strict}
452 \CASE{forall}
453 \CASE{exists}
454
455 % \RULE{\rt{nest\_after\_dots}}
456 % \CASE{\NT{decl\_stmt\_exp} \opt{\NT{nest\_after\_stmt}}}
457 % \CASE{\opt{\NT{exp}} \opt{\NT{nest\_after\_exp}}}
458 %
459 % \RULE{\rt{nest\_after\_stmt}}
460 % \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
461 % \CASE{\NT{decl\_stmt} \opt{\NT{nest\_after\_stmt}}}
462 %
463 % \RULE{\rt{nest\_after\_exp}}
464 % \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
465 %
466 % \RULE{\rt{toplevel\_after\_dots}}
467 % \CASE{\opt{\NT{toplevel\_after\_exp}}}
468 % \CASE{\NT{exp} \opt{\NT{toplevel\_after\_exp}}}
469 % \CASE{\NT{decl\_stmt\_expr} \NT{toplevel\_after\_stmt}}
470 %
471 % \RULE{\rt{toplevel\_after\_exp}}
472 % \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
473 %
474 % \RULE{\rt{decl\_stmt\_expr}}
475 % \CASE{TMetaStmList$^\ddag$}
476 % \CASE{\NT{decl\_var}}
477 % \CASE{\NT{stmt}}
478 % \CASE{(\NT{stmt\_seq} \ANY{| \NT{stmt\_seq}})}
479 %
480 % \RULE{\rt{toplevel\_after\_stmt}}
481 % \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
482 % \CASE{\NT{decl\_stmt} \NT{toplevel\_after\_stmt}}
483
484 \end{grammar}
485
486 \begin{grammar}
487 \RULE{\rt{OPTDOTSEQ}\mth{(}\rt{grammar\_ds}, \rt{when\_ds}\mth{)}}
488 \CASE{}\multicolumn{3}{r}{\hspace{1cm}
489 \KW{\opt{... \opt{\NT{when\_ds}}} \NT{grammar\_ds}
490 \ANY{... \opt{\NT{when\_ds}} \NT{grammar\_ds}}
491 \opt{... \opt{\NT{when\_ds}}}}
492 }
493
494 % \CASE{\opt{... \opt{\NT{when\_ds}}} \NT{grammar}
495 % \ANY{... \opt{\NT{when\_ds}} \NT{grammar}}
496 % \opt{... \opt{\NT{when\_ds}}}}
497 % \CASE{<... \any{\NT{when\_ds}} \NT{grammar} ...>}
498 % \CASE{<+... \any{\NT{when\_ds}} \NT{grammar} ...+>}
499
500 \end{grammar}
501
502 \noindent
503 Lines may be annotated with an element of the set $\{\mtt{-}, \mtt{+},
504 \mtt{*}\}$ or the singleton $\mtt{?}$, or one of each set. \mtt{?}
505 represents at most one match of the given pattern. \mtt{*} is used for
506 semantic match, \emph{i.e.}, a pattern that highlights the fragments
507 annotated with \mtt{*}, but does not perform any modification of the
508 matched code. \mtt{*} cannot be mixed with \mtt{-} and \mtt{+}. There are
509 some constraints on the use of these annotations:
510 \begin{itemize}
511 \item Dots, {\em i.e.} \texttt{...}, cannot occur on a line marked
512 \texttt{+}.
513 \item Nested dots, {\em i.e.}, dots enclosed in {\tt <} and {\tt >}, cannot
514 occur on a line with any marking.
515 \end{itemize}
516
517 Each element of a disjunction must be a proper term like an
518 expression, a statement, an identifier or a declaration. Thus, the
519 rule on the left below is not a syntaxically correct SmPL rule. One may
520 use the rule on the right instead.
521
522 \begin{center}
523 \begin{tabular}{l@{\hspace{5cm}}r}
524 \begin{lstlisting}[language=Cocci]
525 @@
526 type T;
527 T b;
528 @@
529
530 (
531 writeb(...,
532 |
533 readb(
534 )
535 @--(T)
536 b)
537 \end{lstlisting}
538 &
539 \begin{lstlisting}[language=Cocci]
540 @@
541 type T;
542 T b;
543 @@
544
545 (
546 read
547 |
548 write
549 )
550 (...,
551 @-- (T)
552 b)
553 \end{lstlisting}
554 \\
555 \end{tabular}
556 \end{center}
557
558 \section{Types}
559 \label{types}
560
561 \begin{grammar}
562
563 \RULE{\rt{ctypes}}
564 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{ctype}\mth{)}}
565
566 \RULE{\rt{ctype}}
567 \CASE{\opt{\NT{const\_vol}} \NT{generic\_ctype} \any{*}}
568 \CASE{\opt{\NT{const\_vol}} void \some{*}}
569 \CASE{(\NT{ctype} \ANY{| \NT{ctype}})}
570
571 \RULE{\rt{const\_vol}}
572 \CASE{const}
573 \CASE{volatile}
574
575 \RULE{\rt{generic\_ctype}}
576 \CASE{\NT{ctype\_qualif}}
577 \CASE{\opt{\NT{ctype\_qualif}} char}
578 \CASE{\opt{\NT{ctype\_qualif}} short}
579 \CASE{\opt{\NT{ctype\_qualif}} int}
580 \CASE{\opt{\NT{ctype\_qualif}} long}
581 \CASE{\opt{\NT{ctype\_qualif}} long long}
582 \CASE{double}
583 \CASE{float}
584 \CASE{\OPT{struct\OR union} \T{id} \OPT{\{ \any{\NT{struct\_decl\_list}} \}}}
585
586 \RULE{\rt{ctype\_qualif}}
587 \CASE{unsigned}
588 \CASE{signed}
589
590 \RULE{\rt{struct\_decl\_list}}
591 \CASE{\NT{struct\_decl\_list\_start}}
592
593 \RULE{\rt{struct\_decl\_list\_start}}
594 \CASE{\NT{struct\_decl}}
595 \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
596 \CASE{... \opt{when != \NT{struct\_decl}}$^\dag$ \opt{\NT{continue\_struct\_decl\_list}}}
597
598 \RULE{\rt{continue\_struct\_decl\_list}}
599 \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
600 \CASE{\NT{struct\_decl}}
601
602 \RULE{\rt{struct\_decl}}
603 \CASE{\NT{ctype} \NT{d\_ident};}
604 \CASE{\NT{fn\_ctype} (* \NT{d\_ident}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)});)}
605 \CASE{\opt{\NT{const\_vol}} \T{id} \NT{d\_ident};}
606
607 \RULE{\rt{d\_ident}}
608 \CASE{\NT{id} \any{[\opt{\NT{expr}}]}}
609
610 \RULE{\rt{fn\_ctype}}
611 \CASE{\NT{generic\_ctype} \any{*}}
612 \CASE{void \any{*}}
613
614 \RULE{\rt{name\_opt\_decl}}
615 \CASE{\NT{decl}}
616 \CASE{\NT{ctype}}
617 \CASE{\NT{fn\_ctype}}
618 \end{grammar}
619
620 $^\dag$ The optional \texttt{when} construct ends at the end of the line.
621
622 \section{Function declarations}
623
624 \begin{grammar}
625
626 \RULE{\rt{fundecl}}
627 \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
628 (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}})
629 \ttlb~\opt{\NT{stmt\_seq}} \ttrb}
630
631 \RULE{\rt{funproto}}
632 \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
633 (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}});}
634
635 \RULE{\rt{funinfo}}
636 \CASE{inline}
637 \CASE{\NT{storage}}
638 % \CASE{\NT{attr}}
639
640 \RULE{\rt{storage}}
641 \CASE{static}
642 \CASE{auto}
643 \CASE{register}
644 \CASE{extern}
645
646 \RULE{\rt{funid}}
647 \CASE{\T{id}}
648 \CASE{\mth{\T{metaid}^{\ssf{Id}}}}
649 % \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
650 % \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
651
652 \RULE{\rt{param}}
653 \CASE{\NT{type} \T{id}}
654 \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
655 \CASE{\mth{\T{metaid}^{\ssf{ParamList}}}}
656
657 \RULE{\rt{decl}}
658 \CASE{\NT{ctype} \NT{id}}
659 \CASE{\NT{fn\_ctype} (* \NT{id}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)})}
660 \CASE{void}
661 \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
662 \end{grammar}
663
664 \begin{grammar}
665 \RULE{\rt{PARAMSEQ}\mth{(}\rt{gram\_p}, \rt{when\_p}\mth{)}}
666 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{gram\_p} \OR \ldots \opt{\NT{when\_p}}\mth{)}}
667 \end{grammar}
668
669 %\newpage
670
671 \section{Declarations}
672
673 \begin{grammar}
674 \RULE{\rt{decl\_var}}
675 % \CASE{\NT{type} \opt{\NT{id} \opt{[\opt{\NT{dot\_expr}}]}
676 % \ANY{, \NT{id} \opt{[ \opt{\NT{dot\_expr}}]}}};}
677 \CASE{\NT{common\_decl}}
678 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
679 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
680 \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) = \NT{initialize} ;}
681 \CASE{typedef \NT{ctype} \NT{typedef\_ident} ;}
682
683 \RULE{\rt{one\_decl}}
684 \CASE{\NT{common\_decl}}
685 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{id};}
686 % \CASE{\NT{storage} \NT{ctype} \NT{id} \opt{[\opt{\NT{dot\\_expr}}]} = \NT{nest\\_expr};}
687 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} ;}
688
689 \RULE{\rt{common\_decl}}
690 \CASE{\NT{ctype};}
691 \CASE{\NT{funproto}}
692 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{d\_ident} = \NT{initialize} ;}
693 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} = \NT{initialize} ;}
694 \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) ;}
695 \CASE{\NT{decl\_ident} ( \OPT{\NT{COMMA\_LIST}\mth{(}\NT{expr}\mth{)}} ) ;}
696
697 \RULE{\rt{initialize}}
698 \CASE{\NT{dot\_expr}}
699 \CASE{\ttlb~\opt{\NT{COMMA\_LIST}\mth{(}\NT{dot\_expr}\mth{)}}~\ttrb}
700
701 \RULE{\rt{decl\_ident}}
702 \CASE{\T{DeclarerId}}
703 \CASE{\mth{\T{metaid}^{\ssf{Declarer}}}}
704 \end{grammar}
705
706 \section{Statements}
707
708 The first rule {\em statement} describes the various forms of a statement.
709 The remaining rules implement the constraints that are sensitive to the
710 context in which the statement occurs: {\em single\_statement} for a
711 context in which only one statement is allowed, and {\em decl\_statement}
712 for a context in which a declaration, statement, or sequence thereof is
713 allowed.
714
715 \begin{grammar}
716 \RULE{\rt{stmt}}
717 \CASE{\NT{include}}
718 \CASE{\mth{\T{metaid}^{\ssf{Stmt}}}}
719 \CASE{\NT{expr};}
720 \CASE{if (\NT{dot\_expr}) \NT{single\_stmt} \opt{else \NT{single\_stmt}}}
721 \CASE{for (\opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}})
722 \NT{single\_stmt}}
723 \CASE{while (\NT{dot\_expr}) \NT{single\_stmt}}
724 \CASE{do \NT{single\_stmt} while (\NT{dot\_expr});}
725 \CASE{\NT{iter\_ident} (\any{\NT{dot\_expr}}) \NT{single\_stmt}}
726 \CASE{switch (\opt{\NT{dot\_expr}}) \ttlb \any{\NT{case\_line}} \ttrb}
727 \CASE{return \opt{\NT{dot\_expr}};}
728 \CASE{\ttlb~\opt{\NT{stmt\_seq}} \ttrb}
729 \CASE{\NT{NEST}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
730 \CASE{\NT{NEST}\mth{(}\NT{expr}, \NT{when}\mth{)}}
731 \CASE{break;}
732 \CASE{continue;}
733 \CASE{\NT{id}:}
734 \CASE{goto \NT{id};}
735 \CASE{\ttlb \NT{stmt\_seq} \ttrb}
736
737 \RULE{\rt{single\_stmt}}
738 \CASE{\NT{stmt}}
739 \CASE{\NT{OR}\mth{(}\NT{stmt}\mth{)}}
740
741 \RULE{\rt{decl\_stmt}}
742 \CASE{\mth{\T{metaid}^{\ssf{StmtList}}}}
743 \CASE{\NT{decl\_var}}
744 \CASE{\NT{stmt}}
745 \CASE{\NT{OR}\mth{(}\NT{stmt\_seq}\mth{)}}
746
747 \RULE{\rt{stmt\_seq}}
748 \CASE{\any{\NT{decl\_stmt}}
749 \opt{\NT{DOTSEQ}\mth{(}\some{\NT{decl\_stmt}},
750 \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
751 \CASE{\any{\NT{decl\_stmt}}
752 \opt{\NT{DOTSEQ}\mth{(}\NT{expr},
753 \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
754
755 \RULE{\rt{case\_line}}
756 \CASE{default :~\NT{stmt\_seq}}
757 \CASE{case \NT{dot\_expr} :~\NT{stmt\_seq}}
758
759 \RULE{\rt{iter\_ident}}
760 \CASE{\T{IteratorId}}
761 \CASE{\mth{\T{metaid}^{\ssf{Iterator}}}}
762 \end{grammar}
763
764 \begin{grammar}
765 \RULE{\rt{OR}\mth{(}\rt{gram\_o}\mth{)}}
766 \CASE{( \NT{gram\_o} \ANY{\ttmid \NT{gram\_o}})}
767
768 \RULE{\rt{DOTSEQ}\mth{(}\rt{gram\_d}, \rt{when\_d}\mth{)}}
769 \CASE{\ldots \opt{\NT{when\_d}} \ANY{\NT{gram\_d} \ldots \opt{\NT{when\_d}}}}
770
771 \RULE{\rt{NEST}\mth{(}\rt{gram\_n}, \rt{when\_n}\mth{)}}
772 \CASE{<\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots>}
773 \CASE{<+\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots+>}
774 \end{grammar}
775
776 \noindent
777 OR is a macro that generates a disjunction of patterns. The three
778 tokens \T{(}, \T{\ttmid}, and \T{)} must appear in the leftmost
779 column, to differentiate them from the parentheses and bit-or tokens
780 that can appear within expressions (and cannot appear in the leftmost
781 column). These token may also be preceded by \texttt{\bs}
782 when they are used in an other column. These tokens are furthermore
783 different from (, \(\mid\), and ), which are part of the grammar
784 metalanguage.
785
786 \section{Expressions}
787
788 A nest or a single ellipsis is allowed in some expression contexts, and
789 causes ambiguity in others. For example, in a sequence \mtt{\ldots
790 \mita{expr} \ldots}, the nonterminal \mita{expr} must be instantiated as an
791 explicit C-language expression, while in an array reference,
792 \mtt{\mth{\mita{expr}_1} \mtt{[} \mth{\mita{expr}_2} \mtt{]}}, the
793 nonterminal \mth{\mita{expr}_2}, because it is delimited by brackets, can
794 be also instantiated as \mtt{\ldots}, representing an arbitrary expression. To
795 distinguish between the various possibilities, we define three nonterminals
796 for expressions: {\em expr} does not allow either top-level nests or
797 ellipses, {\em nest\_expr} allows a nest but not an ellipsis, and {\em
798 dot\_expr} allows both. The EXPR macro is used to express these variants
799 in a concise way.
800
801 \begin{grammar}
802 \RULE{\rt{expr}}
803 \CASE{\NT{EXPR}\mth{(}\NT{expr}\mth{)}}
804
805 \RULE{\rt{nest\_expr}}
806 \CASE{\NT{EXPR}\mth{(}\NT{nest\_expr}\mth{)}}
807 \CASE{\NT{NEST}\mth{(}\NT{nest\_expr}, \NT{exp\_whencode}\mth{)}}
808
809 \RULE{\rt{dot\_expr}}
810 \CASE{\NT{EXPR}\mth{(}\NT{dot\_expr}\mth{)}}
811 \CASE{\NT{NEST}\mth{(}\NT{dot\_expr}, \NT{exp\_whencode}\mth{)}}
812 \CASE{...~\opt{\NT{exp\_whencode}}}
813
814 \RULE{\rt{EXPR}\mth{(}\rt{exp}\mth{)}}
815 \CASE{\NT{exp} \NT{assign\_op} \NT{exp}}
816 \CASE{\NT{exp}++}
817 \CASE{\NT{exp}--}
818 \CASE{\NT{unary\_op} \NT{exp}}
819 \CASE{\NT{exp} \NT{bin\_op} \NT{exp}}
820 \CASE{\NT{exp} ?~\NT{dot\_expr} :~\NT{exp}}
821 \CASE{(\NT{type}) \NT{exp}}
822 \CASE{\NT{exp} [\NT{dot\_expr}]}
823 \CASE{\NT{exp} .~\NT{id}}
824 \CASE{\NT{exp} -> \NT{id}}
825 \CASE{\NT{exp}(\opt{\NT{PARAMSEQ}\mth{(}\NT{arg}, \NT{exp\_whencode}\mth{)}})}
826 \CASE{\NT{id}}
827 % \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
828 % \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
829 \CASE{\mth{\T{metaid}^{\ssf{Exp}}}}
830 % \CASE{\mth{\T{metaid}^{\ssf{Err}}}}
831 \CASE{\mth{\T{metaid}^{\ssf{Const}}}}
832 \CASE{\NT{const}}
833 \CASE{(\NT{dot\_expr})}
834 \CASE{\NT{OR}\mth{(}\NT{exp}\mth{)}}
835
836 \RULE{\rt{arg}}
837 \CASE{\NT{nest\_expr}}
838 \CASE{\mth{\T{metaid}^{\ssf{ExpList}}}}
839
840 \RULE{\rt{exp\_whencode}}
841 \CASE{when != \NT{expr}}
842
843 \RULE{\rt{assign\_op}}
844 \CASE{= \OR -= \OR += \OR *= \OR /= \OR \%=}
845 \CASE{\&= \OR |= \OR \caret= \OR \lt\lt= \OR \gt\gt=}
846
847 \RULE{\rt{bin\_op}}
848 \CASE{* \OR / \OR \% \OR + \OR -}
849 \CASE{\lt\lt \OR \gt\gt \OR \caret\xspace \OR \& \OR \ttmid}
850 \CASE{< \OR > \OR <= \OR >= \OR == \OR != \OR \&\& \OR \ttmid\ttmid}
851
852 \RULE{\rt{unary\_op}}
853 \CASE{++ \OR -- \OR \& \OR * \OR + \OR - \OR !}
854
855 \end{grammar}
856
857 \section{Constant, Identifiers and Types for Transformations}
858
859 \begin{grammar}
860 \RULE{\rt{const}}
861 \CASE{\NT{string}}
862 \CASE{[0-9]+}
863 \CASE{\mth{\cdots}}
864
865 \RULE{\rt{string}}
866 \CASE{"\any{[\^{}"]}"}
867
868 \RULE{\rt{id}}
869 \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Id}}}}
870
871 \RULE{\rt{typedef\_ident}}
872 \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Type}}}}
873
874 \RULE{\rt{type}}
875 \CASE{\NT{ctype} \OR \mth{\T{metaid}^{\ssf{Type}}}}
876
877 \RULE{\rt{pathToIsoFile}}
878 \CASE{<.*>}
879 \end{grammar}
880
881 \include{examples}
882 \include{tips}
883 \end{document}
884
885 %%% Local Variables:
886 %%% mode: LaTeX
887 %%% TeX-master: "cocci_syntax"
888 %%% coding: latin-9
889 %%% TeX-PDF-mode: t
890 %%% ispell-local-dictionary: "american"
891 %%% End: