docs/manual/cocci_syntax.tex

   1
   2 %\section{The SmPL Grammar}
   3
   4 % This section presents the SmPL grammar.  This definition follows closely
   5 % our implementation using the Menhir parser generator \cite{menhir}.
   6
   7 This document presents the grammar of the SmPL language used by the
   8 \href{http://www.emn.fr/x-info/coccinelle}{Coccinelle tool}.  For the most
   9 part, the grammar is written using standard notation.  In some rules,
  10 however, the left-hand side is in all uppercase letters.  These are
  11 macros, which take one or more grammar rule right-hand-sides as
  12 arguments.  The grammar also uses some unspecified nonterminals, such
  13 as \T{id}, \T{const}, etc.  These refer to the sets suggested by
  14 the name, {\em i.e.}, \T{id} refers to the set of possible
  15 C-language identifiers, while \T{const} refers to the set of
  16 possible C-language constants.
  17 %
  18 \ifhevea
  19 A PDF version of this documentation is available at
  20 \url{http://www.emn.fr/x-info/coccinelle/docs/cocci_syntax.pdf}.
  21 \else
  22 A HTML version of this documentation is available online at
  23 \url{http://www.emn.fr/x-info/coccinelle/docs/cocci_syntax.html}.
  24 \fi
  25
  26 %% \ifhevea A PDF
  27 %% version of this documentation is available at
  28 %% \url{http://localhost:8080/coccinelle/cocci_syntax.pdf}.\else A HTML
  29 %% version of this documentation is available online at
  30 %% \url{http://localhost:8080/coccinelle/cocci_syntax.html}. \fi
  31
  32 \section{Program}
  33
  34 \begin{grammar}
  35   \RULE{\rt{program}}
  36   \CASE{\any{\NT{include\_cocci}} \some{\NT{changeset}}}
  37
  38   \RULE{\rt{include\_cocci}}
  39   \CASE{using \NT{string}}
  40   \CASE{using \NT{pathToIsoFile}}
  41
  42   \RULE{\rt{changeset}}
  43   \CASE{\NT{metavariables} \NT{transformation}}
  44   \CASE{\NT{script\_metavariables} \T{script\_code}}
  45 %  \CASE{\NT{metavariables} \ANY{--- filename +++ filename} \NT{transformation}}
  46 \end{grammar}
  47
  48 \noindent
  49 \T{script\_code} is any code in the chosen scripting language.  Parsing of
  50 the semantic patch does not check the validity of this code; any errors are
  51 first detected when the code is executed.
  52
  53 % Between the metavariables and the transformation rule, there can be a
  54 % specification of constraints on the names of the old and new files,
  55 % analogous to the filename specifications in the standard patch syntax.
  56 % (see Figure \ref{scsiglue_patch}).
  57
  58 \section{Metavariables for transformations}
  59
  60 The \NT{rulename} portion of the metavariable declaration can specify
  61 properties of a rule such as its name, the names of the rules that it
  62 depends on, the isomorphisms to be used in processing the rule, and whether
  63 quantification over paths should be universal or existential.  The optional
  64 annotation {\tt expression} indicates that the pattern is to be considered
  65 as matching an expression, and thus can be used to avoid some parsing
  66 problems.
  67
  68 The \NT{metadecl} portion of the metavariable declaration defines various
  69 types of metavariables that will be used for matching in the transformation
  70 section.
  71
  72 \begin{grammar}
  73   \RULE{\rt{metavariables}}
  74   \CASE{@@ \any{\NT{metadecl}} @@}
  75   \CASE{@ \NT{rulename} @ \any{\NT{metadecl}} @@}
  76
  77   \RULE{\rt{rulename}}
  78   \CASE{\T{id} \OPT{extends \T{id}} \OPT{depends on \NT{dep}} \opt{\NT{iso}}
  79     \opt{\NT{disable-iso}} \opt{\NT{exists}} \opt{expression}}
  80   \CASE{script:\T{language} \OPT{depends on \NT{dep}}}
  81
  82   \RULE{\rt{script\_init\_final}}
  83   \CASE{initialize:\T{language}}
  84   \CASE{finalize:\T{language}}
  85
  86   \RULE{\rt{dep}}
  87   \CASE{\NT{pnrule}}
  88   \CASE{\NT{dep} \&\& \NT{dep}}
  89   \CASE{\NT{dep} || \NT{dep}}
  90
  91   \RULE{\rt{pnrule}}
  92   \CASE{\T{id}}
  93   \CASE{!\T{id}}
  94   \CASE{ever \T{id}}
  95   \CASE{never \T{id}}
  96   \CASE{(\NT{dep})}
  97
  98   \RULE{\rt{iso}}
  99   \CASE{using \NT{string} \ANY{, \NT{string}}}
 100
 101   \RULE{\rt{disable-iso}}
 102   \CASE{disable \NT{COMMA\_LIST}\mth{(}\T{id}\mth{)}}
 103
 104   \RULE{\rt{exists}}
 105   \CASE{exists}
 106   \CASE{forall}
 107 %  \CASE{\opt{reverse} forall}
 108
 109   \RULE{\rt{COMMA\_LIST}\mth{(}\rt{elem}\mth{)}}
 110   \CASE{\NT{elem} \ANY{, \NT{elem}}}
 111 \end{grammar}
 112
 113 The keyword \KW{disable} is normally used with the names of
 114 isomorphisms defined in standard.iso or whatever isomorphism file has been
 115 included.  There are, however, some other isomorphisms that are built into
 116 the implementation of Coccinelle and that can be disabled as well.  Their
 117 names are given below.  In each case, the text descibes the standard
 118 behavior.  Using \NT{disable-iso} with the given name disables this behavior.
 119
 120 \begin{itemize}
 121 \item \KW{optional\_storage}: A SmPL function definition that does not
 122   specify any visibility (i.e., static or extern), or a SmPL variable
 123   declaration that does not specify any storage (i.e., auto, static,
 124   register, or extern), matches a function declaration or variable
 125   declaration with any visibility or storage, respectively.
 126 \item \KW{optional\_qualifier}: This is similar to \KW{optional\_storage},
 127   except that here is it the qualifier (i.e., const or volatile) that does
 128   not have to be specified in the SmPL code, but may be present in the C code.
 129 \item \KW{value\_format}: Integers in various formats, e.g., 1 and 0x1, are
 130   considered to be equivalent in the matching process.
 131 \item \KW{comm\_assoc}: An expression of the form \NT{exp} \NT{bin\_op}
 132   \KW{...}, where \NT{bin\_op} is commutative and associative, is
 133   considered to match any top-level sequence of \NT{bin\_op} operators
 134   containing \NT{exp} as the top-level argument.
 135 \end{itemize}
 136
 137 The possible types of metavariable declarations are defined by the grammar
 138 rule below.  Metavariables should occur at least once in the transformation
 139 immediately following their declaration.  Fresh metavariables must only be
 140 used in {\tt +} code.  These properties are not expressed in the grammar,
 141 but are checked by a subsequent analysis.  The metavariables are designated
 142 according to the kind of terms they can match, such as a statement, an
 143 identifier, or an expression.  An expression metavariable can be further
 144 constrained by its type.
 145
 146 \begin{grammar}
 147   \RULE{\rt{metadecl}}
 148   \CASE{fresh identifier \NT{ids} ;}
 149   \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 150   \CASE{parameter \opt{list} \NT{ids} ;}
 151   \CASE{parameter list [ \NT{id} ] \NT{ids} ;}
 152   \CASE{type \NT{ids} ;}
 153   \CASE{statement \opt{list} \NT{ids} ;}
 154   \CASE{typedef \NT{ids} ;}
 155   \CASE{declarer name \NT{ids} ;}
 156 %  \CASE{\opt{local} function \NT{pmid\_with\_not\_eq\_list} ;}
 157   \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 158   \CASE{iterator name \NT{ids} ;}
 159   \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 160 %  \CASE{error \NT{pmid\_with\_not\_eq\_list} ; }
 161   \CASE{\opt{local} idexpression \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 162   \CASE{\opt{local} idexpression \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 163   \CASE{\opt{local} idexpression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 164   \CASE{expression list \NT{ids} ;}
 165   \CASE{expression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 166   \CASE{expression \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
 167   \CASE{expression list [ ident ] \NT{ids} ;}
 168   \CASE{\NT{ctype} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 169   \CASE{\NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
 170   \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
 171   \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 172   \CASE{constant \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 173   \CASE{constant \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 174   \CASE{position \opt{any} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq\_mid}\mth{)} ;}
 175 \end{grammar}
 176
 177 \begin{grammar}
 178   \RULE{\rt{ids}}
 179   \CASE{\NT{COMMA\_LIST}\mth{(}\NT{pmid}\mth{)}}
 180
 181   \RULE{\rt{pmid}}
 182   \CASE{\T{id}}
 183   \CASE{\NT{mid}}
 184 %   \CASE{list}
 185 %   \CASE{error}
 186 %   \CASE{type}
 187
 188   \RULE{\rt{mid}}  \CASE{\T{rulename\_id}.\T{id}}
 189
 190   \RULE{\rt{pmid\_with\_not\_eq}}
 191   \CASE{\NT{pmid} \OPT{!= \T{id}}}
 192   \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\T{id}\mth{)} \ttrb}}
 193
 194   \RULE{\rt{pmid\_with\_not\_ceq}}
 195   \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_cst}}}
 196   \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_cst}\mth{)} \ttrb}}
 197
 198   \RULE{\rt{id\_or\_cst}}
 199   \CASE{\T{id}}
 200   \CASE{\T{integer}}
 201
 202   \RULE{\rt{pmid\_with\_not\_eq\_mid}}
 203   \CASE{\NT{pmid} \OPT{!= \NT{mid}}}
 204   \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{mid}\mth{)} \ttrb}}
 205 \end{grammar}
 206
 207 Subsequently, we refer to arbitrary metavariables as
 208 \mth{\msf{metaid}^{\mbox{\scriptsize{\it{ty}}}}}, where {\it{ty}}
 209 indicates the {\it metakind} used in the declaration of the variable.
 210 For example, \mth{\msf{metaid}^{\ssf{Type}}} refers to a metavariable
 211 that was declared using \texttt{type} and stands for any type.
 212
 213 The \NT{ctype} and \NT{ctypes} nonterminals are used by both the grammar of
 214 metavariable declarations and the grammar of transformations, and are
 215 defined on page~\pageref{types}.
 216
 217 \section{Metavariables for scripts}
 218
 219 Metavariables for scripts can only be inherited from transformation rules.
 220 In the spirit of scripting languages such as Python that use dynamic
 221 typing, metavariables for scripts do not include type declarations.
 222
 223 \begin{grammar}
 224   \RULE{\rt{script\_metavariables}}
 225   \CASE{@ script:\NT{language} \OPT{depends on \NT{dep}} @
 226         \any{\NT{script\_metadecl}} @@}
 227   \CASE{@ initialize:\NT{language} @}
 228   \CASE{@ finalize:\NT{language} @}
 229
 230   \RULE{\rt{language}} \CASE{python}
 231
 232   \RULE{\rt{script\_metadecl}} \CASE{\T{id} <{}< \T{rulename\_id}.\T{id} ;}
 233 \end{grammar}
 234
 235 Currently, the only scripting language that is supported is Python.  The
 236 set of available scripting languages may be extended at some point.
 237
 238 Script rules declared with \KW{initialize} are run before the treatment of
 239 any file.  Script rules declared with \KW{finalize} are run when the
 240 treatment of all of the files has completed.  There can be at most one of
 241 each per scripting language (thus currently at most one of each).
 242 Initialize and finalize script rules do not have access to SmPL
 243 metavariables.  Nevertheless, a finalize script rule can access any
 244 variables initialized by the other script rules, allowing information to be
 245 transmitted from the matching process to the finalize rule.
 246
 247 \section{Transformation}
 248
 249 The transformation specification essentially has the form of C code,
 250 except that lines to remove are annotated with \verb+-+ in the first
 251 column, and lines to add are annotated with \verb-+-.  A
 252 transformation specification can also use {\em dots}, ``\verb-...-'',
 253 describing an arbitrary sequence of function arguments or instructions
 254 within a control-flow path.  Dots may be modified with a {\tt when}
 255 clause, indicating a pattern that should not occur anywhere within the
 256 matched sequence.  Finally, a transformation can specify a disjunction
 257 of patterns, of the form \mtt{( \mth{\mita{pat}_1} | \mita{\ldots} |
 258   \mth{\mita{pat}_n} )} where each \texttt{(}, \texttt{|} or
 259 \texttt{)} is in column 0 or preceded by \texttt{\textbackslash}.
 260
 261 The grammar that we present for the transformation is not actually the
 262 grammar of the SmPL code that can be written by the programmer, but is
 263 instead the grammar of the slice of this consisting of the {\tt -}
 264 annotated and the unannotated code (the context of the transformed lines),
 265 or the {\tt +} annotated code and the unannotated code.  For example, for
 266 parsing purposes, the following transformation
 267 %presented in Section \ref{sec:seq2}
 268 is split into the two variants shown below and each is parsed
 269 separately.
 270
 271 \begin{center}
 272 \begin{tabular}{c}
 273 \begin{lstlisting}[language=Cocci]
 274   proc_info_func(...) {
 275     <...
 276 @--    hostno
 277 @++    hostptr->host_no
 278     ...>
 279  }
 280 \end{lstlisting}\\
 281 \end{tabular}
 282 \end{center}
 283
 284 {%\sizecodebis
 285 \begin{center}
 286 \begin{tabular}{p{5cm}p{3cm}p{5cm}}
 287 \begin{lstlisting}[language=Cocci]
 288   proc_info_func(...) {
 289     <...
 290 @--    hostno
 291     ...>
 292  }
 293 \end{lstlisting}
 294 &&
 295 \begin{lstlisting}[language=Cocci]
 296   proc_info_func(...) {
 297     <...
 298 @++    hostptr->host_no
 299     ...>
 300  }
 301 \end{lstlisting}
 302 \end{tabular}
 303 \end{center}
 304 }
 305
 306 \noindent
 307 Requiring that both slices parse correctly ensures that the rule matches
 308 syntactically valid C code and that it produces syntactically valid C code.
 309 The generated parse trees are then merged for use in the subsequent
 310 matching and transformation process.
 311
 312 The grammar for the minus or plus slice of a transformation is as follows:
 313
 314 \begin{grammar}
 315
 316   \RULE{\rt{transformation}}
 317   \CASE{\some{\NT{include}}}
 318   \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
 319   \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
 320   \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{fundecl}, \NT{when}\mth{)}}
 321
 322   \RULE{\rt{include}}
 323   \CASE{\#include \T{include\_string}}
 324
 325 %  \RULE{\rt{fun\_decl\_stmt}}
 326 %  \CASE{\NT{decl\_stmt}}
 327 %  \CASE{\NT{fundecl}}
 328
 329 %  \CASE{\NT{ctype}}
 330 %  \CASE{\ttlb \NT{initialize\_list} \ttrb}
 331 %  \CASE{\NT{toplevel\_seq\_start\_after\_dots\_init}}
 332 %
 333 %  \RULE{\rt{toplevel\_seq\_start\_after\_dots\_init}}
 334 %  \CASE{\NT{stmt\_dots} \NT{toplevel\_after\_dots}}
 335 %  \CASE{\NT{expr} \opt{\NT{toplevel\_after\_exp}}}
 336 %  \CASE{\NT{decl\_stmt\_expr} \opt{\NT{toplevel\_after\_stmt}}}
 337 %
 338 %  \RULE{\rt{stmt\_dots}}
 339 %  \CASE{... \any{\NT{when}}}
 340 %  \CASE{<... \any{\NT{when}} \NT{nest\_after\_dots} ...>}
 341 %  \CASE{<+... \any{\NT{when}} \NT{nest\_after\_dots} ...+>}
 342
 343   \RULE{\rt{when}}
 344   \CASE{when != \NT{when\_code}}
 345   \CASE{when = \NT{rule\_elem\_stmt}}
 346   \CASE{when \NT{COMMA\_LIST}\mth{(}\NT{any\_strict}\mth{)}}
 347   \CASE{when true != \NT{expr}}
 348   \CASE{when false != \NT{expr}}
 349
 350   \RULE{\rt{when\_code}}
 351   \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
 352   \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
 353
 354   \RULE{\rt{rule\_elem\_stmt}}
 355   \CASE{\NT{one\_decl}}
 356   \CASE{\NT{expr};}
 357   \CASE{return \opt{\NT{expr}};}
 358   \CASE{break;}
 359   \CASE{continue;}
 360   \CASE{\bs(\NT{rule\_elem\_stmt} \SOME{\bs| \NT{rule\_elem\_stmt}}\bs)}
 361
 362   \RULE{\rt{any\_strict}}
 363   \CASE{any}
 364   \CASE{strict}
 365   \CASE{forall}
 366   \CASE{exists}
 367
 368 %  \RULE{\rt{nest\_after\_dots}}
 369 %  \CASE{\NT{decl\_stmt\_exp} \opt{\NT{nest\_after\_stmt}}}
 370 %  \CASE{\opt{\NT{exp}} \opt{\NT{nest\_after\_exp}}}
 371 %
 372 %  \RULE{\rt{nest\_after\_stmt}}
 373 %  \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
 374 %  \CASE{\NT{decl\_stmt} \opt{\NT{nest\_after\_stmt}}}
 375 %
 376 %  \RULE{\rt{nest\_after\_exp}}
 377 %  \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
 378 %
 379 %  \RULE{\rt{toplevel\_after\_dots}}
 380 %  \CASE{\opt{\NT{toplevel\_after\_exp}}}
 381 %  \CASE{\NT{exp} \opt{\NT{toplevel\_after\_exp}}}
 382 %  \CASE{\NT{decl\_stmt\_expr} \NT{toplevel\_after\_stmt}}
 383 %
 384 %  \RULE{\rt{toplevel\_after\_exp}}
 385 %  \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
 386 %
 387 %  \RULE{\rt{decl\_stmt\_expr}}
 388 %  \CASE{TMetaStmList$^\ddag$}
 389 %  \CASE{\NT{decl\_var}}
 390 %  \CASE{\NT{stmt}}
 391 %  \CASE{(\NT{stmt\_seq} \ANY{| \NT{stmt\_seq}})}
 392 %
 393 %  \RULE{\rt{toplevel\_after\_stmt}}
 394 %  \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
 395 %  \CASE{\NT{decl\_stmt} \NT{toplevel\_after\_stmt}}
 396
 397 \end{grammar}
 398
 399 \begin{grammar}
 400   \RULE{\rt{OPTDOTSEQ}\mth{(}\rt{grammar\_ds}, \rt{when\_ds}\mth{)}}
 401   \CASE{}\multicolumn{3}{r}{\hspace{1cm}
 402   \KW{\opt{... \opt{\NT{when\_ds}}} \NT{grammar\_ds}
 403     \ANY{... \opt{\NT{when\_ds}} \NT{grammar\_ds}}
 404     \opt{... \opt{\NT{when\_ds}}}}
 405   }
 406
 407 %  \CASE{\opt{... \opt{\NT{when\_ds}}} \NT{grammar}
 408 %    \ANY{... \opt{\NT{when\_ds}} \NT{grammar}}
 409 %    \opt{... \opt{\NT{when\_ds}}}}
 410 %  \CASE{<... \any{\NT{when\_ds}} \NT{grammar} ...>}
 411 %  \CASE{<+... \any{\NT{when\_ds}} \NT{grammar} ...+>}
 412
 413 \end{grammar}
 414
 415 \noindent
 416 Lines may be annotated with an element of the set $\{\mtt{-}, \mtt{+},
 417 \mtt{*}\}$ or the singleton $\mtt{?}$, or one of each set. \mtt{?}
 418 represents at most one match of the given pattern. \mtt{*} is used for
 419 semantic match, \emph{i.e.}, a pattern that highlights the fragments
 420 annotated with \mtt{*}, but does not perform any modification of the
 421 matched code. \mtt{*} cannot be mixed with \mtt{-} and \mtt{+}.  There are
 422 some constraints on the use of these annotations:
 423 \begin{itemize}
 424 \item Dots, {\em i.e.} \texttt{...}, cannot occur on a line marked
 425   \texttt{+}.
 426 \item Nested dots, {\em i.e.}, dots enclosed in {\tt <} and {\tt >}, cannot
 427   occur on a line with any marking.
 428 \end{itemize}
 429
 430 Each element of a disjunction must be a proper term like an
 431 expression, a statement, an identifier or a declaration. Thus, the
 432 rule on the left below is not a syntaxically correct SmPL rule. One may
 433 use the rule on the right instead.
 434
 435 \begin{center}
 436   \begin{tabular}{l@{\hspace{5cm}}r}
 437 \begin{lstlisting}[language=Cocci]
 438 @@
 439 type T;
 440 T b;
 441 @@
 442
 443 (
 444  writeb(...,
 445 |
 446  readb(
 447 )
 448 @--(T)
 449  b)
 450 \end{lstlisting}
 451     &
 452 \begin{lstlisting}[language=Cocci]
 453 @@
 454 type T;
 455 T b;
 456 @@
 457
 458 (
 459 read
 460 |
 461 write
 462 )
 463  (...,
 464 @-- (T)
 465   b)
 466 \end{lstlisting}
 467     \\
 468   \end{tabular}
 469 \end{center}
 470
 471 \section{Types}
 472 \label{types}
 473
 474 \begin{grammar}
 475
 476   \RULE{\rt{ctypes}}
 477   \CASE{\NT{COMMA\_LIST}\mth{(}\NT{ctype}\mth{)}}
 478
 479   \RULE{\rt{ctype}}
 480   \CASE{\opt{\NT{const\_vol}} \NT{generic\_ctype} \any{*}}
 481   \CASE{\opt{\NT{const\_vol}} void \some{*}}
 482   \CASE{(\NT{ctype} \ANY{| \NT{ctype}})}
 483
 484   \RULE{\rt{const\_vol}}
 485   \CASE{const}
 486   \CASE{volatile}
 487
 488   \RULE{\rt{generic\_ctype}}
 489   \CASE{\NT{ctype\_qualif}}
 490   \CASE{\opt{\NT{ctype\_qualif}} char}
 491   \CASE{\opt{\NT{ctype\_qualif}} short}
 492   \CASE{\opt{\NT{ctype\_qualif}} int}
 493   \CASE{\opt{\NT{ctype\_qualif}} long}
 494   \CASE{\opt{\NT{ctype\_qualif}} long long}
 495   \CASE{double}
 496   \CASE{float}
 497   \CASE{\OPT{struct\OR union} \T{id} \OPT{\{ \any{\NT{struct\_decl\_list}} \}}}
 498
 499   \RULE{\rt{ctype\_qualif}}
 500   \CASE{unsigned}
 501   \CASE{signed}
 502
 503   \RULE{\rt{struct\_decl\_list}}
 504   \CASE{\NT{struct\_decl\_list\_start}}
 505
 506   \RULE{\rt{struct\_decl\_list\_start}}
 507   \CASE{\NT{struct\_decl}}
 508   \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
 509   \CASE{... \opt{when != \NT{struct\_decl}}$^\dag$ \opt{\NT{continue\_struct\_decl\_list}}}
 510
 511   \RULE{\rt{continue\_struct\_decl\_list}}
 512   \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
 513   \CASE{\NT{struct\_decl}}
 514
 515   \RULE{\rt{struct\_decl}}
 516   \CASE{\NT{ctype} \NT{d\_ident};}
 517   \CASE{\NT{fn\_ctype} (* \NT{d\_ident}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)});)}
 518   \CASE{\opt{\NT{const\_vol}} \T{id} \NT{d\_ident};}
 519
 520   \RULE{\rt{d\_ident}}
 521   \CASE{\NT{id} \any{[\opt{\NT{expr}}]}}
 522
 523   \RULE{\rt{fn\_ctype}}
 524   \CASE{\NT{generic\_ctype} \any{*}}
 525   \CASE{void \any{*}}
 526
 527   \RULE{\rt{name\_opt\_decl}}
 528   \CASE{\NT{decl}}
 529   \CASE{\NT{ctype}}
 530   \CASE{\NT{fn\_ctype}}
 531 \end{grammar}
 532
 533 $^\dag$ The optional \texttt{when} construct ends at the end of the line.
 534
 535 \section{Function declarations}
 536
 537 \begin{grammar}
 538
 539   \RULE{\rt{fundecl}}
 540   \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
 541     (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}})
 542     \ttlb~\opt{\NT{stmt\_seq}} \ttrb}
 543
 544   \RULE{\rt{funproto}}
 545   \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
 546     (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}});}
 547
 548   \RULE{\rt{funinfo}}
 549   \CASE{inline}
 550   \CASE{\NT{storage}}
 551 %   \CASE{\NT{attr}}
 552
 553   \RULE{\rt{storage}}
 554   \CASE{static}
 555   \CASE{auto}
 556   \CASE{register}
 557   \CASE{extern}
 558
 559   \RULE{\rt{funid}}
 560   \CASE{\T{id}}
 561   \CASE{\mth{\T{metaid}^{\ssf{Id}}}}
 562 %   \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
 563 %   \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
 564
 565   \RULE{\rt{param}}
 566   \CASE{\NT{type} \T{id}}
 567   \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
 568   \CASE{\mth{\T{metaid}^{\ssf{ParamList}}}}
 569
 570   \RULE{\rt{decl}}
 571   \CASE{\NT{ctype} \NT{id}}
 572   \CASE{\NT{fn\_ctype} (* \NT{id}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)})}
 573   \CASE{void}
 574   \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
 575 \end{grammar}
 576
 577 \begin{grammar}
 578   \RULE{\rt{PARAMSEQ}\mth{(}\rt{gram\_p}, \rt{when\_p}\mth{)}}
 579   \CASE{\NT{COMMA\_LIST}\mth{(}\NT{gram\_p} \OR \ldots \opt{\NT{when\_p}}\mth{)}}
 580 \end{grammar}
 581
 582 %\newpage
 583
 584 \section{Declarations}
 585
 586 \begin{grammar}
 587   \RULE{\rt{decl\_var}}
 588 %  \CASE{\NT{type} \opt{\NT{id} \opt{[\opt{\NT{dot\_expr}}]}
 589 %      \ANY{, \NT{id} \opt{[ \opt{\NT{dot\_expr}}]}}};}
 590   \CASE{\NT{common\_decl}}
 591   \CASE{\opt{\NT{storage}} \NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
 592   \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
 593   \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) = \NT{initialize} ;}
 594   \CASE{typedef \NT{ctype} \NT{typedef\_ident} ;}
 595
 596   \RULE{\rt{one\_decl}}
 597   \CASE{\NT{common\_decl}}
 598   \CASE{\opt{\NT{storage}} \NT{ctype} \NT{id};}
 599 %  \CASE{\NT{storage} \NT{ctype} \NT{id} \opt{[\opt{\NT{dot\\_expr}}]} = \NT{nest\\_expr};}
 600   \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} ;}
 601
 602   \RULE{\rt{common\_decl}}
 603   \CASE{\NT{ctype};}
 604   \CASE{\NT{funproto}}
 605   \CASE{\opt{\NT{storage}} \NT{ctype} \NT{d\_ident} = \NT{initialize} ;}
 606   \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} = \NT{initialize} ;}
 607   \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) ;}
 608   \CASE{\NT{decl\_ident} ( \OPT{\NT{COMMA\_LIST}\mth{(}\NT{expr}\mth{)}} ) ;}
 609
 610   \RULE{\rt{initialize}}
 611   \CASE{\NT{dot\_expr}}
 612   \CASE{\ttlb~\opt{\NT{COMMA\_LIST}\mth{(}\NT{dot\_expr}\mth{)}}~\ttrb}
 613
 614   \RULE{\rt{decl\_ident}}
 615   \CASE{\T{DeclarerId}}
 616   \CASE{\mth{\T{metaid}^{\ssf{Declarer}}}}
 617 \end{grammar}
 618
 619 \section{Statements}
 620
 621 The first rule {\em statement} describes the various forms of a statement.
 622 The remaining rules implement the constraints that are sensitive to the
 623 context in which the statement occurs: {\em single\_statement} for a
 624 context in which only one statement is allowed, and {\em decl\_statement}
 625 for a context in which a declaration, statement, or sequence thereof is
 626 allowed.
 627
 628 \begin{grammar}
 629   \RULE{\rt{stmt}}
 630   \CASE{\NT{include}}
 631   \CASE{\mth{\T{metaid}^{\ssf{Stmt}}}}
 632   \CASE{\NT{expr};}
 633   \CASE{if (\NT{dot\_expr}) \NT{single\_stmt} \opt{else \NT{single\_stmt}}}
 634   \CASE{for (\opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}})
 635     \NT{single\_stmt}}
 636   \CASE{while (\NT{dot\_expr}) \NT{single\_stmt}}
 637   \CASE{do \NT{single\_stmt} while (\NT{dot\_expr});}
 638   \CASE{\NT{iter\_ident} (\any{\NT{dot\_expr}}) \NT{single\_stmt}}
 639   \CASE{switch (\opt{\NT{dot\_expr}}) \ttlb \any{\NT{case\_line}} \ttrb}
 640   \CASE{return \opt{\NT{dot\_expr}};}
 641   \CASE{\ttlb~\opt{\NT{stmt\_seq}} \ttrb}
 642   \CASE{\NT{NEST}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
 643   \CASE{\NT{NEST}\mth{(}\NT{expr}, \NT{when}\mth{)}}
 644   \CASE{break;}
 645   \CASE{continue;}
 646   \CASE{\NT{id}:}
 647   \CASE{goto \NT{id};}
 648   \CASE{\ttlb \NT{stmt\_seq} \ttrb}
 649
 650   \RULE{\rt{single\_stmt}}
 651   \CASE{\NT{stmt}}
 652   \CASE{\NT{OR}\mth{(}\NT{stmt}\mth{)}}
 653
 654   \RULE{\rt{decl\_stmt}}
 655   \CASE{\mth{\T{metaid}^{\ssf{StmtList}}}}
 656   \CASE{\NT{decl\_var}}
 657   \CASE{\NT{stmt}}
 658   \CASE{\NT{OR}\mth{(}\NT{stmt\_seq}\mth{)}}
 659
 660   \RULE{\rt{stmt\_seq}}
 661   \CASE{\any{\NT{decl\_stmt}}
 662     \opt{\NT{DOTSEQ}\mth{(}\some{\NT{decl\_stmt}},
 663       \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
 664   \CASE{\any{\NT{decl\_stmt}}
 665     \opt{\NT{DOTSEQ}\mth{(}\NT{expr},
 666       \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
 667
 668   \RULE{\rt{case\_line}}
 669   \CASE{default :~\NT{stmt\_seq}}
 670   \CASE{case \NT{dot\_expr} :~\NT{stmt\_seq}}
 671
 672   \RULE{\rt{iter\_ident}}
 673   \CASE{\T{IteratorId}}
 674   \CASE{\mth{\T{metaid}^{\ssf{Iterator}}}}
 675 \end{grammar}
 676
 677 \begin{grammar}
 678   \RULE{\rt{OR}\mth{(}\rt{gram\_o}\mth{)}}
 679   \CASE{( \NT{gram\_o} \ANY{\ttmid \NT{gram\_o}})}
 680
 681   \RULE{\rt{DOTSEQ}\mth{(}\rt{gram\_d}, \rt{when\_d}\mth{)}}
 682   \CASE{\ldots \opt{\NT{when\_d}} \ANY{\NT{gram\_d} \ldots \opt{\NT{when\_d}}}}
 683
 684   \RULE{\rt{NEST}\mth{(}\rt{gram\_n}, \rt{when\_n}\mth{)}}
 685   \CASE{<\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots>}
 686   \CASE{<+\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots+>}
 687 \end{grammar}
 688
 689 \noindent
 690 OR is a macro that generates a disjunction of patterns.  The three
 691 tokens \T{(}, \T{\ttmid}, and \T{)} must appear in the leftmost
 692 column, to differentiate them from the parentheses and bit-or tokens
 693 that can appear within expressions (and cannot appear in the leftmost
 694 column). These token may also be preceded by \texttt{\bs}
 695 when they are used in an other column.  These tokens are furthermore
 696 different from (, \(\mid\), and ), which are part of the grammar
 697 metalanguage.
 698
 699 \section{Expressions}
 700
 701 A nest or a single ellipsis is allowed in some expression contexts, and
 702 causes ambiguity in others.  For example, in a sequence \mtt{\ldots
 703 \mita{expr} \ldots}, the nonterminal \mita{expr} must be instantiated as an
 704 explicit C-language expression, while in an array reference,
 705 \mtt{\mth{\mita{expr}_1} \mtt{[} \mth{\mita{expr}_2} \mtt{]}}, the
 706 nonterminal \mth{\mita{expr}_2}, because it is delimited by brackets, can
 707 be also instantiated as \mtt{\ldots}, representing an arbitrary expression.  To
 708 distinguish between the various possibilities, we define three nonterminals
 709 for expressions: {\em expr} does not allow either top-level nests or
 710 ellipses, {\em nest\_expr} allows a nest but not an ellipsis, and {\em
 711 dot\_expr} allows both.  The EXPR macro is used to express these variants
 712 in a concise way.
 713
 714 \begin{grammar}
 715   \RULE{\rt{expr}}
 716   \CASE{\NT{EXPR}\mth{(}\NT{expr}\mth{)}}
 717
 718   \RULE{\rt{nest\_expr}}
 719   \CASE{\NT{EXPR}\mth{(}\NT{nest\_expr}\mth{)}}
 720   \CASE{\NT{NEST}\mth{(}\NT{nest\_expr}, \NT{exp\_whencode}\mth{)}}
 721
 722   \RULE{\rt{dot\_expr}}
 723   \CASE{\NT{EXPR}\mth{(}\NT{dot\_expr}\mth{)}}
 724   \CASE{\NT{NEST}\mth{(}\NT{dot\_expr}, \NT{exp\_whencode}\mth{)}}
 725   \CASE{...~\opt{\NT{exp\_whencode}}}
 726
 727   \RULE{\rt{EXPR}\mth{(}\rt{exp}\mth{)}}
 728   \CASE{\NT{exp} \NT{assign\_op} \NT{exp}}
 729   \CASE{\NT{exp}++}
 730   \CASE{\NT{exp}--}
 731   \CASE{\NT{unary\_op} \NT{exp}}
 732   \CASE{\NT{exp} \NT{bin\_op} \NT{exp}}
 733   \CASE{\NT{exp} ?~\NT{dot\_expr} :~\NT{exp}}
 734   \CASE{(\NT{type}) \NT{exp}}
 735   \CASE{\NT{exp} [\NT{dot\_expr}]}
 736   \CASE{\NT{exp} .~\NT{id}}
 737   \CASE{\NT{exp} -> \NT{id}}
 738   \CASE{\NT{exp}(\opt{\NT{PARAMSEQ}\mth{(}\NT{arg}, \NT{exp\_whencode}\mth{)}})}
 739   \CASE{\NT{id}}
 740 %   \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
 741 %   \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
 742   \CASE{\mth{\T{metaid}^{\ssf{Exp}}}}
 743 %   \CASE{\mth{\T{metaid}^{\ssf{Err}}}}
 744   \CASE{\mth{\T{metaid}^{\ssf{Const}}}}
 745   \CASE{\NT{const}}
 746   \CASE{(\NT{dot\_expr})}
 747   \CASE{\NT{OR}\mth{(}\NT{exp}\mth{)}}
 748
 749   \RULE{\rt{arg}}
 750   \CASE{\NT{nest\_expr}}
 751   \CASE{\mth{\T{metaid}^{\ssf{ExpList}}}}
 752
 753   \RULE{\rt{exp\_whencode}}
 754   \CASE{when != \NT{expr}}
 755
 756   \RULE{\rt{assign\_op}}
 757   \CASE{= \OR -= \OR += \OR *= \OR /= \OR \%=}
 758   \CASE{\&= \OR |= \OR \caret= \OR \lt\lt= \OR \gt\gt=}
 759
 760   \RULE{\rt{bin\_op}}
 761   \CASE{* \OR / \OR \% \OR + \OR -}
 762   \CASE{\lt\lt \OR \gt\gt \OR \caret\xspace \OR \& \OR \ttmid}
 763   \CASE{< \OR > \OR <= \OR >= \OR == \OR != \OR \&\& \OR \ttmid\ttmid}
 764
 765   \RULE{\rt{unary\_op}}
 766   \CASE{++ \OR -- \OR \& \OR * \OR + \OR - \OR !}
 767
 768 \end{grammar}
 769
 770 \section{Constant, Identifiers and Types for Transformations}
 771
 772 \begin{grammar}
 773   \RULE{\rt{const}}
 774   \CASE{\NT{string}}
 775   \CASE{[0-9]+}
 776   \CASE{\mth{\cdots}}
 777
 778   \RULE{\rt{string}}
 779   \CASE{"\any{[\^{}"]}"}
 780
 781   \RULE{\rt{id}}
 782   \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Id}}}}
 783
 784   \RULE{\rt{typedef\_ident}}
 785   \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Type}}}}
 786
 787   \RULE{\rt{type}}
 788   \CASE{\NT{ctype} \OR \mth{\T{metaid}^{\ssf{Type}}}}
 789
 790   \RULE{\rt{pathToIsoFile}}
 791   \CASE{<.*>}
 792 \end{grammar}
 793
 794
 795 %%% Local Variables:
 796 %%% mode: LaTeX
 797 %%% TeX-master: "main_grammar"
 798 %%% coding: latin-9
 799 %%% TeX-PDF-mode: t
 800 %%% ispell-local-dictionary: "american"
 801 %%% End: