docs/manual/cocci_syntax.tex

   1
   2 %\section{The SmPL Grammar}
   3
   4 % This section presents the SmPL grammar.  This definition follows closely
   5 % our implementation using the Menhir parser generator \cite{menhir}.
   6
   7 This document presents the grammar of the SmPL language used by the
   8 \href{http://coccinelle.lip6.fr/}{Coccinelle tool}.  For the most
   9 part, the grammar is written using standard notation.  In some rules,
  10 however, the left-hand side is in all uppercase letters.  These are
  11 macros, which take one or more grammar rule right-hand-sides as
  12 arguments.  The grammar also uses some unspecified nonterminals, such
  13 as \T{id}, \T{const}, etc.  These refer to the sets suggested by
  14 the name, {\em i.e.}, \T{id} refers to the set of possible
  15 C-language identifiers, while \T{const} refers to the set of
  16 possible C-language constants.
  17 %
  18 \ifhevea
  19 A PDF version of this documentation is available at
  20 \url{http://coccinelle.lip6.fr/docs/main_grammar.pdf}.
  21 \else
  22 A HTML version of this documentation is available online at
  23 \url{http://coccinelle.lip6.fr/docs/main_grammar.html}.
  24 \fi
  25
  26 \section{Program}
  27
  28 \begin{grammar}
  29   \RULE{\rt{program}}
  30   \CASE{\any{\NT{include\_cocci}} \some{\NT{changeset}}}
  31
  32   \RULE{\rt{include\_cocci}}
  33   \CASE{using \NT{string}}
  34   \CASE{using \NT{pathToIsoFile}}
  35   \CASE{virtual \T{id} \ANY{, \T{id}}}
  36
  37   \RULE{\rt{changeset}}
  38   \CASE{\NT{metavariables} \NT{transformation}}
  39   \CASE{\NT{script\_metavariables} \T{script\_code}}
  40 %  \CASE{\NT{metavariables} \ANY{--- filename +++ filename} \NT{transformation}}
  41 \end{grammar}
  42
  43 \noindent
  44 \T{script\_code} is any code in the chosen scripting language.  Parsing of
  45 the semantic patch does not check the validity of this code; any errors are
  46 first detected when the code is executed.  Furthermore, \texttt{@} should
  47 not be use in this code.  Spatch scans the script code for the next
  48 \texttt{@} and considers that to be the beginning of the next rule, even if
  49 \texttt{@} occurs within e.g., a string or a comment.
  50
  51 \texttt{virtual} keyword is used to declare virtual rules. Virtual
  52 rules may be subsequently used as a dependency for the rules in the
  53 SmPL file. Whether a virtual rule is defined or not is controlled by
  54 the \texttt{-D} option on the command line.
  55
  56 % Between the metavariables and the transformation rule, there can be a
  57 % specification of constraints on the names of the old and new files,
  58 % analogous to the filename specifications in the standard patch syntax.
  59 % (see Figure \ref{scsiglue_patch}).
  60
  61 \section{Metavariables for transformations}
  62
  63 The \NT{rulename} portion of the metavariable declaration can specify
  64 properties of a rule such as its name, the names of the rules that it
  65 depends on, the isomorphisms to be used in processing the rule, and whether
  66 quantification over paths should be universal or existential.  The optional
  67 annotation {\tt expression} indicates that the pattern is to be considered
  68 as matching an expression, and thus can be used to avoid some parsing
  69 problems.
  70
  71 The \NT{metadecl} portion of the metavariable declaration defines various
  72 types of metavariables that will be used for matching in the transformation
  73 section.
  74
  75 \begin{grammar}
  76   \RULE{\rt{metavariables}}
  77   \CASE{@@ \any{\NT{metadecl}} @@}
  78   \CASE{@ \NT{rulename} @ \any{\NT{metadecl}} @@}
  79
  80   \RULE{\rt{rulename}}
  81   \CASE{\T{id} \OPT{extends \T{id}} \OPT{depends on \NT{dep}} \opt{\NT{iso}}
  82     \opt{\NT{disable-iso}} \opt{\NT{exists}} \opt{expression}}
  83
  84   \RULE{\rt{dep}}
  85   \CASE{\NT{pnrule}}
  86   \CASE{\NT{dep} \&\& \NT{dep}}
  87   \CASE{\NT{dep} || \NT{dep}}
  88
  89   \RULE{\rt{pnrule}}
  90   \CASE{\T{id}}
  91   \CASE{!\T{id}}
  92   \CASE{ever \T{id}}
  93   \CASE{never \T{id}}
  94   \CASE{(\NT{dep})}
  95
  96   \RULE{\rt{iso}}
  97   \CASE{using \NT{string} \ANY{, \NT{string}}}
  98
  99   \RULE{\rt{disable-iso}}
 100   \CASE{disable \NT{COMMA\_LIST}\mth{(}\T{id}\mth{)}}
 101
 102   \RULE{\rt{exists}}
 103   \CASE{exists}
 104   \CASE{forall}
 105 %  \CASE{\opt{reverse} forall}
 106
 107   \RULE{\rt{COMMA\_LIST}\mth{(}\rt{elem}\mth{)}}
 108   \CASE{\NT{elem} \ANY{, \NT{elem}}}
 109 \end{grammar}
 110
 111 The keyword \KW{disable} is normally used with the names of
 112 isomorphisms defined in standard.iso or whatever isomorphism file has been
 113 included.  There are, however, some other isomorphisms that are built into
 114 the implementation of Coccinelle and that can be disabled as well.  Their
 115 names are given below.  In each case, the text descibes the standard
 116 behavior.  Using \NT{disable-iso} with the given name disables this behavior.
 117
 118 \begin{itemize}
 119 \item \KW{optional\_storage}: A SmPL function definition that does not
 120   specify any visibility (i.e., static or extern), or a SmPL variable
 121   declaration that does not specify any storage (i.e., auto, static,
 122   register, or extern), matches a function declaration or variable
 123   declaration with any visibility or storage, respectively.
 124 \item \KW{optional\_qualifier}: This is similar to \KW{optional\_storage},
 125   except that here is it the qualifier (i.e., const or volatile) that does
 126   not have to be specified in the SmPL code, but may be present in the C code.
 127 \item \KW{value\_format}: Integers in various formats, e.g., 1 and 0x1, are
 128   considered to be equivalent in the matching process.
 129 \item \KW{comm\_assoc}: An expression of the form \NT{exp} \NT{bin\_op}
 130   \KW{...}, where \NT{bin\_op} is commutative and associative, is
 131   considered to match any top-level sequence of \NT{bin\_op} operators
 132   containing \NT{exp} as the top-level argument.
 133 \end{itemize}
 134
 135 The possible types of metavariable declarations are defined by the grammar
 136 rule below.  Metavariables should occur at least once in the transformation
 137 immediately following their declaration.  Fresh metavariables must only be
 138 used in {\tt +} code.  These properties are not expressed in the grammar,
 139 but are checked by a subsequent analysis.  The metavariables are designated
 140 according to the kind of terms they can match, such as a statement, an
 141 identifier, or an expression.  An expression metavariable can be further
 142 constrained by its type.
 143
 144 \begin{grammar}
 145   \RULE{\rt{metadecl}}
 146   \CASE{fresh identifier \NT{ids} ;}
 147   \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
 148   \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_virt\_or\_not\_eq}\mth{)} ;}
 149   \CASE{parameter \opt{list} \NT{ids} ;}
 150   \CASE{parameter list [ \NT{id} ] \NT{ids} ;}
 151   \CASE{type \NT{ids} ;}
 152   \CASE{statement \opt{list} \NT{ids} ;}
 153   \CASE{typedef \NT{ids} ;}
 154   \CASE{declarer name \NT{ids} ;}
 155 %  \CASE{\opt{local} function \NT{pmid\_with\_not\_eq\_list} ;}
 156   \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
 157   \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 158   \CASE{iterator name \NT{ids} ;}
 159   \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
 160   \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 161 %  \CASE{error \NT{pmid\_with\_not\_eq\_list} ; }
 162   \CASE{\opt{local} idexpression \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 163   \CASE{\opt{local} idexpression \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 164   \CASE{\opt{local} idexpression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 165   \CASE{expression list \NT{ids} ;}
 166   \CASE{expression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 167   \CASE{expression \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
 168   \CASE{expression list [ ident ] \NT{ids} ;}
 169   \CASE{\NT{ctype} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 170   \CASE{\NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
 171   \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
 172   \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 173   \CASE{constant \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 174   \CASE{constant \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 175   \CASE{position \opt{any} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq\_mid}\mth{)} ;}
 176 \end{grammar}
 177
 178 \begin{grammar}
 179   \RULE{\rt{ids}}
 180   \CASE{\NT{COMMA\_LIST}\mth{(}\NT{pmid}\mth{)}}
 181
 182   \RULE{\rt{pmid}}
 183   \CASE{\T{id}}
 184   \CASE{\NT{mid}}
 185 %   \CASE{list}
 186 %   \CASE{error}
 187 %   \CASE{type}
 188
 189   \RULE{\rt{mid}}  \CASE{\T{rulename\_id}.\T{id}}
 190
 191   \RULE{\rt{pmid\_with\_regexp}}
 192   \CASE{\NT{pmid} \~{}= \NT{regexp}}
 193
 194   \RULE{\rt{pmid\_with\_not\_eq}}
 195   \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_meta}}}
 196   \CASE{\NT{pmid}
 197      \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_meta}\mth{)} \ttrb}}
 198
 199   \RULE{\rt{pmid\_with\_virt\_or\_not\_eq}}
 200   \CASE{virtual.\T{id}}
 201   \CASE{\NT{pmid\_with\_not\_eq}}
 202
 203   \RULE{\rt{pmid\_with\_not\_ceq}}
 204   \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_cst}}}
 205   \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_cst}\mth{)} \ttrb}}
 206
 207   \RULE{\rt{id\_or\_cst}}
 208   \CASE{\T{id}}
 209   \CASE{\T{integer}}
 210
 211   \RULE{\rt{id\_or\_meta}}
 212   \CASE{\T{id}}
 213   \CASE{\T{rulename\_id}.\T{id}}
 214
 215   \RULE{\rt{pmid\_with\_not\_eq\_mid}}
 216   \CASE{\NT{pmid} \OPT{!= \NT{mid}}}
 217   \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{mid}\mth{)} \ttrb}}
 218 \end{grammar}
 219
 220 Subsequently, we refer to arbitrary metavariables as
 221 \mth{\msf{metaid}^{\mbox{\scriptsize{\it{ty}}}}}, where {\it{ty}}
 222 indicates the {\it metakind} used in the declaration of the variable.
 223 For example, \mth{\msf{metaid}^{\ssf{Type}}} refers to a metavariable
 224 that was declared using \texttt{type} and stands for any type.
 225
 226 The \NT{ctype} and \NT{ctypes} nonterminals are used by both the grammar of
 227 metavariable declarations and the grammar of transformations, and are
 228 defined on page~\pageref{types}.
 229
 230 An identifier metavariable with {\tt virtual} as its ``rule name'' is given
 231 a value on the command line.  For example, if a semantic patch contains a
 232 rule that declares an identifier metavariable with the name {\tt
 233   virtual.alloc}, then the command line could contain {\tt -D
 234   alloc=kmalloc}.  There should not be space around the {\tt =}.  An
 235 example is in {\tt demos/vm.cocci} and {\tt demos/vm.c}.
 236
 237 \section{Metavariables for scripts}
 238
 239 Metavariables for scripts can only be inherited from transformation rules.
 240 In the spirit of scripting languages such as Python that use dynamic
 241 typing, metavariables for scripts do not include type declarations.
 242
 243 \begin{grammar}
 244   \RULE{\rt{script\_metavariables}}
 245   \CASE{@ script:\NT{language} \OPT{depends on \NT{dep}} @
 246         \any{\NT{script\_metadecl}} @@}
 247   \CASE{@ initialize:\NT{language} \OPT{depends on \NT{dep}} @}
 248   \CASE{@ finalize:\NT{language} \OPT{depends on \NT{dep}} @}
 249
 250   \RULE{\rt{language}} \CASE{python}
 251
 252   \RULE{\rt{script\_metadecl}} \CASE{\T{id} <{}< \T{rulename\_id}.\T{id} ;}
 253 \end{grammar}
 254
 255 Currently, the only scripting languages that are supported are Python and
 256 OCaml, indicated using {\tt python} and {\tt ocaml}, respectively.  The
 257 set of available scripting languages may be extended at some point.
 258
 259 Script rules declared with \KW{initialize} are run before the treatment of
 260 any file.  Script rules declared with \KW{finalize} are run when the
 261 treatment of all of the files has completed.  There can be at most one of
 262 each per scripting language (thus currently at most one of each).
 263 Initialize and finalize script rules do not have access to SmPL
 264 metavariables.  Nevertheless, a finalize script rule can access any
 265 variables initialized by the other script rules, allowing information to be
 266 transmitted from the matching process to the finalize rule.
 267
 268 \section{Transformation}
 269
 270 The transformation specification essentially has the form of C code,
 271 except that lines to remove are annotated with \verb+-+ in the first
 272 column, and lines to add are annotated with \verb-+-.  A
 273 transformation specification can also use {\em dots}, ``\verb-...-'',
 274 describing an arbitrary sequence of function arguments or instructions
 275 within a control-flow path.  Dots may be modified with a {\tt when}
 276 clause, indicating a pattern that should not occur anywhere within the
 277 matched sequence.  Finally, a transformation can specify a disjunction
 278 of patterns, of the form \mtt{( \mth{\mita{pat}_1} | \mita{\ldots} |
 279   \mth{\mita{pat}_n} )} where each \texttt{(}, \texttt{|} or
 280 \texttt{)} is in column 0 or preceded by \texttt{\textbackslash}.
 281
 282 The grammar that we present for the transformation is not actually the
 283 grammar of the SmPL code that can be written by the programmer, but is
 284 instead the grammar of the slice of this consisting of the {\tt -}
 285 annotated and the unannotated code (the context of the transformed lines),
 286 or the {\tt +} annotated code and the unannotated code.  For example, for
 287 parsing purposes, the following transformation
 288 %presented in Section \ref{sec:seq2}
 289 is split into the two variants shown below and each is parsed
 290 separately.
 291
 292 \begin{center}
 293 \begin{tabular}{c}
 294 \begin{lstlisting}[language=Cocci]
 295   proc_info_func(...) {
 296     <...
 297 @--    hostno
 298 @++    hostptr->host_no
 299     ...>
 300  }
 301 \end{lstlisting}\\
 302 \end{tabular}
 303 \end{center}
 304
 305 {%\sizecodebis
 306 \begin{center}
 307 \begin{tabular}{p{5cm}p{3cm}p{5cm}}
 308 \begin{lstlisting}[language=Cocci]
 309   proc_info_func(...) {
 310     <...
 311 @--    hostno
 312     ...>
 313  }
 314 \end{lstlisting}
 315 &&
 316 \begin{lstlisting}[language=Cocci]
 317   proc_info_func(...) {
 318     <...
 319 @++    hostptr->host_no
 320     ...>
 321  }
 322 \end{lstlisting}
 323 \end{tabular}
 324 \end{center}
 325 }
 326
 327 \noindent
 328 Requiring that both slices parse correctly ensures that the rule matches
 329 syntactically valid C code and that it produces syntactically valid C code.
 330 The generated parse trees are then merged for use in the subsequent
 331 matching and transformation process.
 332
 333 The grammar for the minus or plus slice of a transformation is as follows:
 334
 335 \begin{grammar}
 336
 337   \RULE{\rt{transformation}}
 338   \CASE{\some{\NT{include}}}
 339   \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
 340   \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
 341   \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{fundecl}, \NT{when}\mth{)}}
 342
 343   \RULE{\rt{include}}
 344   \CASE{\#include \T{include\_string}}
 345
 346 %  \RULE{\rt{fun\_decl\_stmt}}
 347 %  \CASE{\NT{decl\_stmt}}
 348 %  \CASE{\NT{fundecl}}
 349
 350 %  \CASE{\NT{ctype}}
 351 %  \CASE{\ttlb \NT{initialize\_list} \ttrb}
 352 %  \CASE{\NT{toplevel\_seq\_start\_after\_dots\_init}}
 353 %
 354 %  \RULE{\rt{toplevel\_seq\_start\_after\_dots\_init}}
 355 %  \CASE{\NT{stmt\_dots} \NT{toplevel\_after\_dots}}
 356 %  \CASE{\NT{expr} \opt{\NT{toplevel\_after\_exp}}}
 357 %  \CASE{\NT{decl\_stmt\_expr} \opt{\NT{toplevel\_after\_stmt}}}
 358 %
 359 %  \RULE{\rt{stmt\_dots}}
 360 %  \CASE{... \any{\NT{when}}}
 361 %  \CASE{<... \any{\NT{when}} \NT{nest\_after\_dots} ...>}
 362 %  \CASE{<+... \any{\NT{when}} \NT{nest\_after\_dots} ...+>}
 363
 364   \RULE{\rt{when}}
 365   \CASE{when != \NT{when\_code}}
 366   \CASE{when = \NT{rule\_elem\_stmt}}
 367   \CASE{when \NT{COMMA\_LIST}\mth{(}\NT{any\_strict}\mth{)}}
 368   \CASE{when true != \NT{expr}}
 369   \CASE{when false != \NT{expr}}
 370
 371   \RULE{\rt{when\_code}}
 372   \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
 373   \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
 374
 375   \RULE{\rt{rule\_elem\_stmt}}
 376   \CASE{\NT{one\_decl}}
 377   \CASE{\NT{expr};}
 378   \CASE{return \opt{\NT{expr}};}
 379   \CASE{break;}
 380   \CASE{continue;}
 381   \CASE{\bs(\NT{rule\_elem\_stmt} \SOME{\bs| \NT{rule\_elem\_stmt}}\bs)}
 382
 383   \RULE{\rt{any\_strict}}
 384   \CASE{any}
 385   \CASE{strict}
 386   \CASE{forall}
 387   \CASE{exists}
 388
 389 %  \RULE{\rt{nest\_after\_dots}}
 390 %  \CASE{\NT{decl\_stmt\_exp} \opt{\NT{nest\_after\_stmt}}}
 391 %  \CASE{\opt{\NT{exp}} \opt{\NT{nest\_after\_exp}}}
 392 %
 393 %  \RULE{\rt{nest\_after\_stmt}}
 394 %  \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
 395 %  \CASE{\NT{decl\_stmt} \opt{\NT{nest\_after\_stmt}}}
 396 %
 397 %  \RULE{\rt{nest\_after\_exp}}
 398 %  \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
 399 %
 400 %  \RULE{\rt{toplevel\_after\_dots}}
 401 %  \CASE{\opt{\NT{toplevel\_after\_exp}}}
 402 %  \CASE{\NT{exp} \opt{\NT{toplevel\_after\_exp}}}
 403 %  \CASE{\NT{decl\_stmt\_expr} \NT{toplevel\_after\_stmt}}
 404 %
 405 %  \RULE{\rt{toplevel\_after\_exp}}
 406 %  \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
 407 %
 408 %  \RULE{\rt{decl\_stmt\_expr}}
 409 %  \CASE{TMetaStmList$^\ddag$}
 410 %  \CASE{\NT{decl\_var}}
 411 %  \CASE{\NT{stmt}}
 412 %  \CASE{(\NT{stmt\_seq} \ANY{| \NT{stmt\_seq}})}
 413 %
 414 %  \RULE{\rt{toplevel\_after\_stmt}}
 415 %  \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
 416 %  \CASE{\NT{decl\_stmt} \NT{toplevel\_after\_stmt}}
 417
 418 \end{grammar}
 419
 420 \begin{grammar}
 421   \RULE{\rt{OPTDOTSEQ}\mth{(}\rt{grammar\_ds}, \rt{when\_ds}\mth{)}}
 422   \CASE{}\multicolumn{3}{r}{\hspace{1cm}
 423   \KW{\opt{... \opt{\NT{when\_ds}}} \NT{grammar\_ds}
 424     \ANY{... \opt{\NT{when\_ds}} \NT{grammar\_ds}}
 425     \opt{... \opt{\NT{when\_ds}}}}
 426   }
 427
 428 %  \CASE{\opt{... \opt{\NT{when\_ds}}} \NT{grammar}
 429 %    \ANY{... \opt{\NT{when\_ds}} \NT{grammar}}
 430 %    \opt{... \opt{\NT{when\_ds}}}}
 431 %  \CASE{<... \any{\NT{when\_ds}} \NT{grammar} ...>}
 432 %  \CASE{<+... \any{\NT{when\_ds}} \NT{grammar} ...+>}
 433
 434 \end{grammar}
 435
 436 \noindent
 437 Lines may be annotated with an element of the set $\{\mtt{-}, \mtt{+},
 438 \mtt{*}\}$ or the singleton $\mtt{?}$, or one of each set. \mtt{?}
 439 represents at most one match of the given pattern. \mtt{*} is used for
 440 semantic match, \emph{i.e.}, a pattern that highlights the fragments
 441 annotated with \mtt{*}, but does not perform any modification of the
 442 matched code. \mtt{*} cannot be mixed with \mtt{-} and \mtt{+}.  There are
 443 some constraints on the use of these annotations:
 444 \begin{itemize}
 445 \item Dots, {\em i.e.} \texttt{...}, cannot occur on a line marked
 446   \texttt{+}.
 447 \item Nested dots, {\em i.e.}, dots enclosed in {\tt <} and {\tt >}, cannot
 448   occur on a line with any marking.
 449 \end{itemize}
 450
 451 Each element of a disjunction must be a proper term like an
 452 expression, a statement, an identifier or a declaration. Thus, the
 453 rule on the left below is not a syntaxically correct SmPL rule. One may
 454 use the rule on the right instead.
 455
 456 \begin{center}
 457   \begin{tabular}{l@{\hspace{5cm}}r}
 458 \begin{lstlisting}[language=Cocci]
 459 @@
 460 type T;
 461 T b;
 462 @@
 463
 464 (
 465  writeb(...,
 466 |
 467  readb(
 468 )
 469 @--(T)
 470  b)
 471 \end{lstlisting}
 472     &
 473 \begin{lstlisting}[language=Cocci]
 474 @@
 475 type T;
 476 T b;
 477 @@
 478
 479 (
 480 read
 481 |
 482 write
 483 )
 484  (...,
 485 @-- (T)
 486   b)
 487 \end{lstlisting}
 488     \\
 489   \end{tabular}
 490 \end{center}
 491
 492 \section{Types}
 493 \label{types}
 494
 495 \begin{grammar}
 496
 497   \RULE{\rt{ctypes}}
 498   \CASE{\NT{COMMA\_LIST}\mth{(}\NT{ctype}\mth{)}}
 499
 500   \RULE{\rt{ctype}}
 501   \CASE{\opt{\NT{const\_vol}} \NT{generic\_ctype} \any{*}}
 502   \CASE{\opt{\NT{const\_vol}} void \some{*}}
 503   \CASE{(\NT{ctype} \ANY{| \NT{ctype}})}
 504
 505   \RULE{\rt{const\_vol}}
 506   \CASE{const}
 507   \CASE{volatile}
 508
 509   \RULE{\rt{generic\_ctype}}
 510   \CASE{\NT{ctype\_qualif}}
 511   \CASE{\opt{\NT{ctype\_qualif}} char}
 512   \CASE{\opt{\NT{ctype\_qualif}} short}
 513   \CASE{\opt{\NT{ctype\_qualif}} int}
 514   \CASE{\opt{\NT{ctype\_qualif}} long}
 515   \CASE{\opt{\NT{ctype\_qualif}} long long}
 516   \CASE{double}
 517   \CASE{float}
 518   \CASE{\OPT{struct\OR union} \T{id} \OPT{\{ \any{\NT{struct\_decl\_list}} \}}}
 519
 520   \RULE{\rt{ctype\_qualif}}
 521   \CASE{unsigned}
 522   \CASE{signed}
 523
 524   \RULE{\rt{struct\_decl\_list}}
 525   \CASE{\NT{struct\_decl\_list\_start}}
 526
 527   \RULE{\rt{struct\_decl\_list\_start}}
 528   \CASE{\NT{struct\_decl}}
 529   \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
 530   \CASE{... \opt{when != \NT{struct\_decl}}$^\dag$ \opt{\NT{continue\_struct\_decl\_list}}}
 531
 532   \RULE{\rt{continue\_struct\_decl\_list}}
 533   \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
 534   \CASE{\NT{struct\_decl}}
 535
 536   \RULE{\rt{struct\_decl}}
 537   \CASE{\NT{ctype} \NT{d\_ident};}
 538   \CASE{\NT{fn\_ctype} (* \NT{d\_ident}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)});)}
 539   \CASE{\opt{\NT{const\_vol}} \T{id} \NT{d\_ident};}
 540
 541   \RULE{\rt{d\_ident}}
 542   \CASE{\NT{id} \any{[\opt{\NT{expr}}]}}
 543
 544   \RULE{\rt{fn\_ctype}}
 545   \CASE{\NT{generic\_ctype} \any{*}}
 546   \CASE{void \any{*}}
 547
 548   \RULE{\rt{name\_opt\_decl}}
 549   \CASE{\NT{decl}}
 550   \CASE{\NT{ctype}}
 551   \CASE{\NT{fn\_ctype}}
 552 \end{grammar}
 553
 554 $^\dag$ The optional \texttt{when} construct ends at the end of the line.
 555
 556 \section{Function declarations}
 557
 558 \begin{grammar}
 559
 560   \RULE{\rt{fundecl}}
 561   \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
 562     (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}})
 563     \ttlb~\opt{\NT{stmt\_seq}} \ttrb}
 564
 565   \RULE{\rt{funproto}}
 566   \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
 567     (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}});}
 568
 569   \RULE{\rt{funinfo}}
 570   \CASE{inline}
 571   \CASE{\NT{storage}}
 572 %   \CASE{\NT{attr}}
 573
 574   \RULE{\rt{storage}}
 575   \CASE{static}
 576   \CASE{auto}
 577   \CASE{register}
 578   \CASE{extern}
 579
 580   \RULE{\rt{funid}}
 581   \CASE{\T{id}}
 582   \CASE{\mth{\T{metaid}^{\ssf{Id}}}}
 583 %   \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
 584 %   \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
 585
 586   \RULE{\rt{param}}
 587   \CASE{\NT{type} \T{id}}
 588   \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
 589   \CASE{\mth{\T{metaid}^{\ssf{ParamList}}}}
 590
 591   \RULE{\rt{decl}}
 592   \CASE{\NT{ctype} \NT{id}}
 593   \CASE{\NT{fn\_ctype} (* \NT{id}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)})}
 594   \CASE{void}
 595   \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
 596 \end{grammar}
 597
 598 \begin{grammar}
 599   \RULE{\rt{PARAMSEQ}\mth{(}\rt{gram\_p}, \rt{when\_p}\mth{)}}
 600   \CASE{\NT{COMMA\_LIST}\mth{(}\NT{gram\_p} \OR \ldots \opt{\NT{when\_p}}\mth{)}}
 601 \end{grammar}
 602
 603 To match a function it is not necessary to provide all of the annotations
 604 that appear before the function name.  For example, the following semantic
 605 patch:
 606
 607 \begin{lstlisting}[language=Cocci]
 608 @@
 609 @@
 610
 611 foo() { ... }
 612 \end{lstlisting}
 613
 614 \noindent
 615 matches a function declared as follows:
 616
 617 \begin{lstlisting}[language=C]
 618 static int foo() { return 12; }
 619 \end{lstlisting}
 620
 621 \noindent
 622 This behavior can be turned off by disabling the \KW{optional\_storage}
 623 isomorphism.  If one adds code before a function declaration, then the
 624 effect depends on the kind of code that is added.  If the added code is a
 625 function definition or CPP code, then the new code is placed before
 626 all information associated with the function definition, including any
 627 comments preceeding the function definition.  On the other hand, if the new
 628 code is associated with the function, such as the addition of the keyword
 629 {\tt static}, the new code is placed exactly where it appears with respect
 630 to the rest of the function definition in the semantic patch.  For example,
 631
 632 \begin{lstlisting}[language=Cocci]
 633 @@
 634 @@
 635
 636 + static
 637 foo() { ... }
 638 \end{lstlisting}
 639
 640 \noindent
 641 causes static to be placed just before the function name.  The following
 642 causes it to be placed just before the type
 643
 644 \begin{lstlisting}[language=Cocci]
 645 @@
 646 type T;
 647 @@
 648
 649 + static
 650 T foo() { ... }
 651 \end{lstlisting}
 652
 653 \noindent
 654 It may be nencessary to consider several cases to ensure that the added ode
 655 is placed in the right position.  For example, one may need one pattern
 656 that considers that the function is declared {\tt inline} and another that
 657 considers that it is not.
 658
 659 %\newpage
 660
 661 \section{Declarations}
 662
 663 \begin{grammar}
 664   \RULE{\rt{decl\_var}}
 665 %  \CASE{\NT{type} \opt{\NT{id} \opt{[\opt{\NT{dot\_expr}}]}
 666 %      \ANY{, \NT{id} \opt{[ \opt{\NT{dot\_expr}}]}}};}
 667   \CASE{\NT{common\_decl}}
 668   \CASE{\opt{\NT{storage}} \NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
 669   \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
 670   \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) = \NT{initialize} ;}
 671   \CASE{typedef \NT{ctype} \NT{typedef\_ident} ;}
 672
 673   \RULE{\rt{one\_decl}}
 674   \CASE{\NT{common\_decl}}
 675   \CASE{\opt{\NT{storage}} \NT{ctype} \NT{id};}
 676 %  \CASE{\NT{storage} \NT{ctype} \NT{id} \opt{[\opt{\NT{dot\\_expr}}]} = \NT{nest\\_expr};}
 677   \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} ;}
 678
 679   \RULE{\rt{common\_decl}}
 680   \CASE{\NT{ctype};}
 681   \CASE{\NT{funproto}}
 682   \CASE{\opt{\NT{storage}} \NT{ctype} \NT{d\_ident} = \NT{initialize} ;}
 683   \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} = \NT{initialize} ;}
 684   \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) ;}
 685   \CASE{\NT{decl\_ident} ( \OPT{\NT{COMMA\_LIST}\mth{(}\NT{expr}\mth{)}} ) ;}
 686
 687   \RULE{\rt{initialize}}
 688   \CASE{\NT{dot\_expr}}
 689   \CASE{\ttlb~\opt{\NT{COMMA\_LIST}\mth{(}\NT{dot\_expr}\mth{)}}~\ttrb}
 690
 691   \RULE{\rt{decl\_ident}}
 692   \CASE{\T{DeclarerId}}
 693   \CASE{\mth{\T{metaid}^{\ssf{Declarer}}}}
 694 \end{grammar}
 695
 696 \section{Statements}
 697
 698 The first rule {\em statement} describes the various forms of a statement.
 699 The remaining rules implement the constraints that are sensitive to the
 700 context in which the statement occurs: {\em single\_statement} for a
 701 context in which only one statement is allowed, and {\em decl\_statement}
 702 for a context in which a declaration, statement, or sequence thereof is
 703 allowed.
 704
 705 \begin{grammar}
 706   \RULE{\rt{stmt}}
 707   \CASE{\NT{include}}
 708   \CASE{\mth{\T{metaid}^{\ssf{Stmt}}}}
 709   \CASE{\NT{expr};}
 710   \CASE{if (\NT{dot\_expr}) \NT{single\_stmt} \opt{else \NT{single\_stmt}}}
 711   \CASE{for (\opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}})
 712     \NT{single\_stmt}}
 713   \CASE{while (\NT{dot\_expr}) \NT{single\_stmt}}
 714   \CASE{do \NT{single\_stmt} while (\NT{dot\_expr});}
 715   \CASE{\NT{iter\_ident} (\any{\NT{dot\_expr}}) \NT{single\_stmt}}
 716   \CASE{switch (\opt{\NT{dot\_expr}}) \ttlb \any{\NT{case\_line}} \ttrb}
 717   \CASE{return \opt{\NT{dot\_expr}};}
 718   \CASE{\ttlb~\opt{\NT{stmt\_seq}} \ttrb}
 719   \CASE{\NT{NEST}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
 720   \CASE{\NT{NEST}\mth{(}\NT{expr}, \NT{when}\mth{)}}
 721   \CASE{break;}
 722   \CASE{continue;}
 723   \CASE{\NT{id}:}
 724   \CASE{goto \NT{id};}
 725   \CASE{\ttlb \NT{stmt\_seq} \ttrb}
 726
 727   \RULE{\rt{single\_stmt}}
 728   \CASE{\NT{stmt}}
 729   \CASE{\NT{OR}\mth{(}\NT{stmt}\mth{)}}
 730
 731   \RULE{\rt{decl\_stmt}}
 732   \CASE{\mth{\T{metaid}^{\ssf{StmtList}}}}
 733   \CASE{\NT{decl\_var}}
 734   \CASE{\NT{stmt}}
 735   \CASE{\NT{OR}\mth{(}\NT{stmt\_seq}\mth{)}}
 736
 737   \RULE{\rt{stmt\_seq}}
 738   \CASE{\any{\NT{decl\_stmt}}
 739     \opt{\NT{DOTSEQ}\mth{(}\some{\NT{decl\_stmt}},
 740       \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
 741   \CASE{\any{\NT{decl\_stmt}}
 742     \opt{\NT{DOTSEQ}\mth{(}\NT{expr},
 743       \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
 744
 745   \RULE{\rt{case\_line}}
 746   \CASE{default :~\NT{stmt\_seq}}
 747   \CASE{case \NT{dot\_expr} :~\NT{stmt\_seq}}
 748
 749   \RULE{\rt{iter\_ident}}
 750   \CASE{\T{IteratorId}}
 751   \CASE{\mth{\T{metaid}^{\ssf{Iterator}}}}
 752 \end{grammar}
 753
 754 \begin{grammar}
 755   \RULE{\rt{OR}\mth{(}\rt{gram\_o}\mth{)}}
 756   \CASE{( \NT{gram\_o} \ANY{\ttmid \NT{gram\_o}})}
 757
 758   \RULE{\rt{DOTSEQ}\mth{(}\rt{gram\_d}, \rt{when\_d}\mth{)}}
 759   \CASE{\ldots \opt{\NT{when\_d}} \ANY{\NT{gram\_d} \ldots \opt{\NT{when\_d}}}}
 760
 761   \RULE{\rt{NEST}\mth{(}\rt{gram\_n}, \rt{when\_n}\mth{)}}
 762   \CASE{<\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots>}
 763   \CASE{<+\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots+>}
 764 \end{grammar}
 765
 766 \noindent
 767 OR is a macro that generates a disjunction of patterns.  The three
 768 tokens \T{(}, \T{\ttmid}, and \T{)} must appear in the leftmost
 769 column, to differentiate them from the parentheses and bit-or tokens
 770 that can appear within expressions (and cannot appear in the leftmost
 771 column). These token may also be preceded by \texttt{\bs}
 772 when they are used in an other column.  These tokens are furthermore
 773 different from (, \(\mid\), and ), which are part of the grammar
 774 metalanguage.
 775
 776 \section{Expressions}
 777
 778 A nest or a single ellipsis is allowed in some expression contexts, and
 779 causes ambiguity in others.  For example, in a sequence \mtt{\ldots
 780 \mita{expr} \ldots}, the nonterminal \mita{expr} must be instantiated as an
 781 explicit C-language expression, while in an array reference,
 782 \mtt{\mth{\mita{expr}_1} \mtt{[} \mth{\mita{expr}_2} \mtt{]}}, the
 783 nonterminal \mth{\mita{expr}_2}, because it is delimited by brackets, can
 784 be also instantiated as \mtt{\ldots}, representing an arbitrary expression.  To
 785 distinguish between the various possibilities, we define three nonterminals
 786 for expressions: {\em expr} does not allow either top-level nests or
 787 ellipses, {\em nest\_expr} allows a nest but not an ellipsis, and {\em
 788 dot\_expr} allows both.  The EXPR macro is used to express these variants
 789 in a concise way.
 790
 791 \begin{grammar}
 792   \RULE{\rt{expr}}
 793   \CASE{\NT{EXPR}\mth{(}\NT{expr}\mth{)}}
 794
 795   \RULE{\rt{nest\_expr}}
 796   \CASE{\NT{EXPR}\mth{(}\NT{nest\_expr}\mth{)}}
 797   \CASE{\NT{NEST}\mth{(}\NT{nest\_expr}, \NT{exp\_whencode}\mth{)}}
 798
 799   \RULE{\rt{dot\_expr}}
 800   \CASE{\NT{EXPR}\mth{(}\NT{dot\_expr}\mth{)}}
 801   \CASE{\NT{NEST}\mth{(}\NT{dot\_expr}, \NT{exp\_whencode}\mth{)}}
 802   \CASE{...~\opt{\NT{exp\_whencode}}}
 803
 804   \RULE{\rt{EXPR}\mth{(}\rt{exp}\mth{)}}
 805   \CASE{\NT{exp} \NT{assign\_op} \NT{exp}}
 806   \CASE{\NT{exp}++}
 807   \CASE{\NT{exp}--}
 808   \CASE{\NT{unary\_op} \NT{exp}}
 809   \CASE{\NT{exp} \NT{bin\_op} \NT{exp}}
 810   \CASE{\NT{exp} ?~\NT{dot\_expr} :~\NT{exp}}
 811   \CASE{(\NT{type}) \NT{exp}}
 812   \CASE{\NT{exp} [\NT{dot\_expr}]}
 813   \CASE{\NT{exp} .~\NT{id}}
 814   \CASE{\NT{exp} -> \NT{id}}
 815   \CASE{\NT{exp}(\opt{\NT{PARAMSEQ}\mth{(}\NT{arg}, \NT{exp\_whencode}\mth{)}})}
 816   \CASE{\NT{id}}
 817 %   \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
 818 %   \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
 819   \CASE{\mth{\T{metaid}^{\ssf{Exp}}}}
 820 %   \CASE{\mth{\T{metaid}^{\ssf{Err}}}}
 821   \CASE{\mth{\T{metaid}^{\ssf{Const}}}}
 822   \CASE{\NT{const}}
 823   \CASE{(\NT{dot\_expr})}
 824   \CASE{\NT{OR}\mth{(}\NT{exp}\mth{)}}
 825
 826   \RULE{\rt{arg}}
 827   \CASE{\NT{nest\_expr}}
 828   \CASE{\mth{\T{metaid}^{\ssf{ExpList}}}}
 829
 830   \RULE{\rt{exp\_whencode}}
 831   \CASE{when != \NT{expr}}
 832
 833   \RULE{\rt{assign\_op}}
 834   \CASE{= \OR -= \OR += \OR *= \OR /= \OR \%=}
 835   \CASE{\&= \OR |= \OR \caret= \OR \lt\lt= \OR \gt\gt=}
 836
 837   \RULE{\rt{bin\_op}}
 838   \CASE{* \OR / \OR \% \OR + \OR -}
 839   \CASE{\lt\lt \OR \gt\gt \OR \caret\xspace \OR \& \OR \ttmid}
 840   \CASE{< \OR > \OR <= \OR >= \OR == \OR != \OR \&\& \OR \ttmid\ttmid}
 841
 842   \RULE{\rt{unary\_op}}
 843   \CASE{++ \OR -- \OR \& \OR * \OR + \OR - \OR !}
 844
 845 \end{grammar}
 846
 847 \section{Constant, Identifiers and Types for Transformations}
 848
 849 \begin{grammar}
 850   \RULE{\rt{const}}
 851   \CASE{\NT{string}}
 852   \CASE{[0-9]+}
 853   \CASE{\mth{\cdots}}
 854
 855   \RULE{\rt{string}}
 856   \CASE{"\any{[\^{}"]}"}
 857
 858   \RULE{\rt{id}}
 859   \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Id}}}}
 860
 861   \RULE{\rt{typedef\_ident}}
 862   \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Type}}}}
 863
 864   \RULE{\rt{type}}
 865   \CASE{\NT{ctype} \OR \mth{\T{metaid}^{\ssf{Type}}}}
 866
 867   \RULE{\rt{pathToIsoFile}}
 868   \CASE{<.*>}
 869
 870   \RULE{\rt{regexp}}
 871   \CASE{"\any{[\^{}"]}"}
 872 \end{grammar}
 873
 874
 875 %%% Local Variables:
 876 %%% mode: LaTeX
 877 %%% TeX-master: "main_grammar"
 878 %%% coding: utf-8
 879 %%% TeX-PDF-mode: t
 880 %%% ispell-local-dictionary: "american"
 881 %%% End: