docs/manual/cocci_syntax.tex

   1
   2 %\section{The SmPL Grammar}
   3
   4 % This section presents the SmPL grammar.  This definition follows closely
   5 % our implementation using the Menhir parser generator \cite{menhir}.
   6
   7 This document presents the grammar of the SmPL language used by the
   8 \href{http://coccinelle.lip6.fr/}{Coccinelle tool}.  For the most
   9 part, the grammar is written using standard notation.  In some rules,
  10 however, the left-hand side is in all uppercase letters.  These are
  11 macros, which take one or more grammar rule right-hand-sides as
  12 arguments.  The grammar also uses some unspecified nonterminals, such
  13 as \T{id}, \T{const}, etc.  These refer to the sets suggested by
  14 the name, {\em i.e.}, \T{id} refers to the set of possible
  15 C-language identifiers, while \T{const} refers to the set of
  16 possible C-language constants.
  17 %
  18 \ifhevea
  19 A PDF version of this documentation is available at
  20 \url{http://coccinelle.lip6.fr/docs/main_grammar.pdf}.
  21 \else
  22 A HTML version of this documentation is available online at
  23 \url{http://coccinelle.lip6.fr/docs/main_grammar.html}.
  24 \fi
  25
  26 \section{Program}
  27
  28 \begin{grammar}
  29   \RULE{\rt{program}}
  30   \CASE{\any{\NT{include\_cocci}} \some{\NT{changeset}}}
  31
  32   \RULE{\rt{include\_cocci}}
  33   \CASE{using \NT{string}}
  34   \CASE{using \NT{pathToIsoFile}}
  35
  36   \RULE{\rt{changeset}}
  37   \CASE{\NT{metavariables} \NT{transformation}}
  38   \CASE{\NT{script\_metavariables} \T{script\_code}}
  39 %  \CASE{\NT{metavariables} \ANY{--- filename +++ filename} \NT{transformation}}
  40 \end{grammar}
  41
  42 \noindent
  43 \T{script\_code} is any code in the chosen scripting language.  Parsing of
  44 the semantic patch does not check the validity of this code; any errors are
  45 first detected when the code is executed.  Furthermore, \texttt{@} should
  46 not be use in this code.  Spatch scans the script code for the next
  47 \texttt{@} and considers that to be the beginning of the next rule, even if
  48 \texttt{@} occurs within e.g., a string or a comment.
  49
  50 % Between the metavariables and the transformation rule, there can be a
  51 % specification of constraints on the names of the old and new files,
  52 % analogous to the filename specifications in the standard patch syntax.
  53 % (see Figure \ref{scsiglue_patch}).
  54
  55 \section{Metavariables for transformations}
  56
  57 The \NT{rulename} portion of the metavariable declaration can specify
  58 properties of a rule such as its name, the names of the rules that it
  59 depends on, the isomorphisms to be used in processing the rule, and whether
  60 quantification over paths should be universal or existential.  The optional
  61 annotation {\tt expression} indicates that the pattern is to be considered
  62 as matching an expression, and thus can be used to avoid some parsing
  63 problems.
  64
  65 The \NT{metadecl} portion of the metavariable declaration defines various
  66 types of metavariables that will be used for matching in the transformation
  67 section.
  68
  69 \begin{grammar}
  70   \RULE{\rt{metavariables}}
  71   \CASE{@@ \any{\NT{metadecl}} @@}
  72   \CASE{@ \NT{rulename} @ \any{\NT{metadecl}} @@}
  73
  74   \RULE{\rt{rulename}}
  75   \CASE{\T{id} \OPT{extends \T{id}} \OPT{depends on \NT{dep}} \opt{\NT{iso}}
  76     \opt{\NT{disable-iso}} \opt{\NT{exists}} \opt{expression}}
  77   \CASE{script:\T{language} \OPT{depends on \NT{dep}}}
  78
  79   \RULE{\rt{script\_init\_final}}
  80   \CASE{initialize:\T{language}}
  81   \CASE{finalize:\T{language}}
  82
  83   \RULE{\rt{dep}}
  84   \CASE{\NT{pnrule}}
  85   \CASE{\NT{dep} \&\& \NT{dep}}
  86   \CASE{\NT{dep} || \NT{dep}}
  87
  88   \RULE{\rt{pnrule}}
  89   \CASE{\T{id}}
  90   \CASE{!\T{id}}
  91   \CASE{ever \T{id}}
  92   \CASE{never \T{id}}
  93   \CASE{(\NT{dep})}
  94
  95   \RULE{\rt{iso}}
  96   \CASE{using \NT{string} \ANY{, \NT{string}}}
  97
  98   \RULE{\rt{disable-iso}}
  99   \CASE{disable \NT{COMMA\_LIST}\mth{(}\T{id}\mth{)}}
 100
 101   \RULE{\rt{exists}}
 102   \CASE{exists}
 103   \CASE{forall}
 104 %  \CASE{\opt{reverse} forall}
 105
 106   \RULE{\rt{COMMA\_LIST}\mth{(}\rt{elem}\mth{)}}
 107   \CASE{\NT{elem} \ANY{, \NT{elem}}}
 108 \end{grammar}
 109
 110 The keyword \KW{disable} is normally used with the names of
 111 isomorphisms defined in standard.iso or whatever isomorphism file has been
 112 included.  There are, however, some other isomorphisms that are built into
 113 the implementation of Coccinelle and that can be disabled as well.  Their
 114 names are given below.  In each case, the text descibes the standard
 115 behavior.  Using \NT{disable-iso} with the given name disables this behavior.
 116
 117 \begin{itemize}
 118 \item \KW{optional\_storage}: A SmPL function definition that does not
 119   specify any visibility (i.e., static or extern), or a SmPL variable
 120   declaration that does not specify any storage (i.e., auto, static,
 121   register, or extern), matches a function declaration or variable
 122   declaration with any visibility or storage, respectively.
 123 \item \KW{optional\_qualifier}: This is similar to \KW{optional\_storage},
 124   except that here is it the qualifier (i.e., const or volatile) that does
 125   not have to be specified in the SmPL code, but may be present in the C code.
 126 \item \KW{value\_format}: Integers in various formats, e.g., 1 and 0x1, are
 127   considered to be equivalent in the matching process.
 128 \item \KW{comm\_assoc}: An expression of the form \NT{exp} \NT{bin\_op}
 129   \KW{...}, where \NT{bin\_op} is commutative and associative, is
 130   considered to match any top-level sequence of \NT{bin\_op} operators
 131   containing \NT{exp} as the top-level argument.
 132 \end{itemize}
 133
 134 The possible types of metavariable declarations are defined by the grammar
 135 rule below.  Metavariables should occur at least once in the transformation
 136 immediately following their declaration.  Fresh metavariables must only be
 137 used in {\tt +} code.  These properties are not expressed in the grammar,
 138 but are checked by a subsequent analysis.  The metavariables are designated
 139 according to the kind of terms they can match, such as a statement, an
 140 identifier, or an expression.  An expression metavariable can be further
 141 constrained by its type.
 142
 143 \begin{grammar}
 144   \RULE{\rt{metadecl}}
 145   \CASE{fresh identifier \NT{ids} ;}
 146   \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
 147   \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 148   \CASE{parameter \opt{list} \NT{ids} ;}
 149   \CASE{parameter list [ \NT{id} ] \NT{ids} ;}
 150   \CASE{type \NT{ids} ;}
 151   \CASE{statement \opt{list} \NT{ids} ;}
 152   \CASE{typedef \NT{ids} ;}
 153   \CASE{declarer name \NT{ids} ;}
 154 %  \CASE{\opt{local} function \NT{pmid\_with\_not\_eq\_list} ;}
 155   \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
 156   \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 157   \CASE{iterator name \NT{ids} ;}
 158   \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
 159   \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 160 %  \CASE{error \NT{pmid\_with\_not\_eq\_list} ; }
 161   \CASE{\opt{local} idexpression \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 162   \CASE{\opt{local} idexpression \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 163   \CASE{\opt{local} idexpression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 164   \CASE{expression list \NT{ids} ;}
 165   \CASE{expression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 166   \CASE{expression \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
 167   \CASE{expression list [ ident ] \NT{ids} ;}
 168   \CASE{\NT{ctype} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 169   \CASE{\NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
 170   \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
 171   \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 172   \CASE{constant \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 173   \CASE{constant \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
 174   \CASE{position \opt{any} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq\_mid}\mth{)} ;}
 175 \end{grammar}
 176
 177 \begin{grammar}
 178   \RULE{\rt{ids}}
 179   \CASE{\NT{COMMA\_LIST}\mth{(}\NT{pmid}\mth{)}}
 180
 181   \RULE{\rt{pmid}}
 182   \CASE{\T{id}}
 183   \CASE{\NT{mid}}
 184 %   \CASE{list}
 185 %   \CASE{error}
 186 %   \CASE{type}
 187
 188   \RULE{\rt{mid}}  \CASE{\T{rulename\_id}.\T{id}}
 189
 190   \RULE{\rt{pmid\_with\_regexp}}
 191   \CASE{\NT{pmid} \~{}= \NT{regexp}}
 192
 193   \RULE{\rt{pmid\_with\_not\_eq}}
 194   \CASE{\NT{pmid} \OPT{!= \T{id}}}
 195   \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\T{id}\mth{)} \ttrb}}
 196
 197   \RULE{\rt{pmid\_with\_not\_ceq}}
 198   \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_cst}}}
 199   \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_cst}\mth{)} \ttrb}}
 200
 201   \RULE{\rt{id\_or\_cst}}
 202   \CASE{\T{id}}
 203   \CASE{\T{integer}}
 204
 205   \RULE{\rt{pmid\_with\_not\_eq\_mid}}
 206   \CASE{\NT{pmid} \OPT{!= \NT{mid}}}
 207   \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{mid}\mth{)} \ttrb}}
 208 \end{grammar}
 209
 210 Subsequently, we refer to arbitrary metavariables as
 211 \mth{\msf{metaid}^{\mbox{\scriptsize{\it{ty}}}}}, where {\it{ty}}
 212 indicates the {\it metakind} used in the declaration of the variable.
 213 For example, \mth{\msf{metaid}^{\ssf{Type}}} refers to a metavariable
 214 that was declared using \texttt{type} and stands for any type.
 215
 216 The \NT{ctype} and \NT{ctypes} nonterminals are used by both the grammar of
 217 metavariable declarations and the grammar of transformations, and are
 218 defined on page~\pageref{types}.
 219
 220 \section{Metavariables for scripts}
 221
 222 Metavariables for scripts can only be inherited from transformation rules.
 223 In the spirit of scripting languages such as Python that use dynamic
 224 typing, metavariables for scripts do not include type declarations.
 225
 226 \begin{grammar}
 227   \RULE{\rt{script\_metavariables}}
 228   \CASE{@ script:\NT{language} \OPT{depends on \NT{dep}} @
 229         \any{\NT{script\_metadecl}} @@}
 230   \CASE{@ initialize:\NT{language} @}
 231   \CASE{@ finalize:\NT{language} @}
 232
 233   \RULE{\rt{language}} \CASE{python}
 234
 235   \RULE{\rt{script\_metadecl}} \CASE{\T{id} <{}< \T{rulename\_id}.\T{id} ;}
 236 \end{grammar}
 237
 238 Currently, the only scripting language that is supported is Python.  The
 239 set of available scripting languages may be extended at some point.
 240
 241 Script rules declared with \KW{initialize} are run before the treatment of
 242 any file.  Script rules declared with \KW{finalize} are run when the
 243 treatment of all of the files has completed.  There can be at most one of
 244 each per scripting language (thus currently at most one of each).
 245 Initialize and finalize script rules do not have access to SmPL
 246 metavariables.  Nevertheless, a finalize script rule can access any
 247 variables initialized by the other script rules, allowing information to be
 248 transmitted from the matching process to the finalize rule.
 249
 250 \section{Transformation}
 251
 252 The transformation specification essentially has the form of C code,
 253 except that lines to remove are annotated with \verb+-+ in the first
 254 column, and lines to add are annotated with \verb-+-.  A
 255 transformation specification can also use {\em dots}, ``\verb-...-'',
 256 describing an arbitrary sequence of function arguments or instructions
 257 within a control-flow path.  Dots may be modified with a {\tt when}
 258 clause, indicating a pattern that should not occur anywhere within the
 259 matched sequence.  Finally, a transformation can specify a disjunction
 260 of patterns, of the form \mtt{( \mth{\mita{pat}_1} | \mita{\ldots} |
 261   \mth{\mita{pat}_n} )} where each \texttt{(}, \texttt{|} or
 262 \texttt{)} is in column 0 or preceded by \texttt{\textbackslash}.
 263
 264 The grammar that we present for the transformation is not actually the
 265 grammar of the SmPL code that can be written by the programmer, but is
 266 instead the grammar of the slice of this consisting of the {\tt -}
 267 annotated and the unannotated code (the context of the transformed lines),
 268 or the {\tt +} annotated code and the unannotated code.  For example, for
 269 parsing purposes, the following transformation
 270 %presented in Section \ref{sec:seq2}
 271 is split into the two variants shown below and each is parsed
 272 separately.
 273
 274 \begin{center}
 275 \begin{tabular}{c}
 276 \begin{lstlisting}[language=Cocci]
 277   proc_info_func(...) {
 278     <...
 279 @--    hostno
 280 @++    hostptr->host_no
 281     ...>
 282  }
 283 \end{lstlisting}\\
 284 \end{tabular}
 285 \end{center}
 286
 287 {%\sizecodebis
 288 \begin{center}
 289 \begin{tabular}{p{5cm}p{3cm}p{5cm}}
 290 \begin{lstlisting}[language=Cocci]
 291   proc_info_func(...) {
 292     <...
 293 @--    hostno
 294     ...>
 295  }
 296 \end{lstlisting}
 297 &&
 298 \begin{lstlisting}[language=Cocci]
 299   proc_info_func(...) {
 300     <...
 301 @++    hostptr->host_no
 302     ...>
 303  }
 304 \end{lstlisting}
 305 \end{tabular}
 306 \end{center}
 307 }
 308
 309 \noindent
 310 Requiring that both slices parse correctly ensures that the rule matches
 311 syntactically valid C code and that it produces syntactically valid C code.
 312 The generated parse trees are then merged for use in the subsequent
 313 matching and transformation process.
 314
 315 The grammar for the minus or plus slice of a transformation is as follows:
 316
 317 \begin{grammar}
 318
 319   \RULE{\rt{transformation}}
 320   \CASE{\some{\NT{include}}}
 321   \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
 322   \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
 323   \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{fundecl}, \NT{when}\mth{)}}
 324
 325   \RULE{\rt{include}}
 326   \CASE{\#include \T{include\_string}}
 327
 328 %  \RULE{\rt{fun\_decl\_stmt}}
 329 %  \CASE{\NT{decl\_stmt}}
 330 %  \CASE{\NT{fundecl}}
 331
 332 %  \CASE{\NT{ctype}}
 333 %  \CASE{\ttlb \NT{initialize\_list} \ttrb}
 334 %  \CASE{\NT{toplevel\_seq\_start\_after\_dots\_init}}
 335 %
 336 %  \RULE{\rt{toplevel\_seq\_start\_after\_dots\_init}}
 337 %  \CASE{\NT{stmt\_dots} \NT{toplevel\_after\_dots}}
 338 %  \CASE{\NT{expr} \opt{\NT{toplevel\_after\_exp}}}
 339 %  \CASE{\NT{decl\_stmt\_expr} \opt{\NT{toplevel\_after\_stmt}}}
 340 %
 341 %  \RULE{\rt{stmt\_dots}}
 342 %  \CASE{... \any{\NT{when}}}
 343 %  \CASE{<... \any{\NT{when}} \NT{nest\_after\_dots} ...>}
 344 %  \CASE{<+... \any{\NT{when}} \NT{nest\_after\_dots} ...+>}
 345
 346   \RULE{\rt{when}}
 347   \CASE{when != \NT{when\_code}}
 348   \CASE{when = \NT{rule\_elem\_stmt}}
 349   \CASE{when \NT{COMMA\_LIST}\mth{(}\NT{any\_strict}\mth{)}}
 350   \CASE{when true != \NT{expr}}
 351   \CASE{when false != \NT{expr}}
 352
 353   \RULE{\rt{when\_code}}
 354   \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
 355   \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
 356
 357   \RULE{\rt{rule\_elem\_stmt}}
 358   \CASE{\NT{one\_decl}}
 359   \CASE{\NT{expr};}
 360   \CASE{return \opt{\NT{expr}};}
 361   \CASE{break;}
 362   \CASE{continue;}
 363   \CASE{\bs(\NT{rule\_elem\_stmt} \SOME{\bs| \NT{rule\_elem\_stmt}}\bs)}
 364
 365   \RULE{\rt{any\_strict}}
 366   \CASE{any}
 367   \CASE{strict}
 368   \CASE{forall}
 369   \CASE{exists}
 370
 371 %  \RULE{\rt{nest\_after\_dots}}
 372 %  \CASE{\NT{decl\_stmt\_exp} \opt{\NT{nest\_after\_stmt}}}
 373 %  \CASE{\opt{\NT{exp}} \opt{\NT{nest\_after\_exp}}}
 374 %
 375 %  \RULE{\rt{nest\_after\_stmt}}
 376 %  \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
 377 %  \CASE{\NT{decl\_stmt} \opt{\NT{nest\_after\_stmt}}}
 378 %
 379 %  \RULE{\rt{nest\_after\_exp}}
 380 %  \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
 381 %
 382 %  \RULE{\rt{toplevel\_after\_dots}}
 383 %  \CASE{\opt{\NT{toplevel\_after\_exp}}}
 384 %  \CASE{\NT{exp} \opt{\NT{toplevel\_after\_exp}}}
 385 %  \CASE{\NT{decl\_stmt\_expr} \NT{toplevel\_after\_stmt}}
 386 %
 387 %  \RULE{\rt{toplevel\_after\_exp}}
 388 %  \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
 389 %
 390 %  \RULE{\rt{decl\_stmt\_expr}}
 391 %  \CASE{TMetaStmList$^\ddag$}
 392 %  \CASE{\NT{decl\_var}}
 393 %  \CASE{\NT{stmt}}
 394 %  \CASE{(\NT{stmt\_seq} \ANY{| \NT{stmt\_seq}})}
 395 %
 396 %  \RULE{\rt{toplevel\_after\_stmt}}
 397 %  \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
 398 %  \CASE{\NT{decl\_stmt} \NT{toplevel\_after\_stmt}}
 399
 400 \end{grammar}
 401
 402 \begin{grammar}
 403   \RULE{\rt{OPTDOTSEQ}\mth{(}\rt{grammar\_ds}, \rt{when\_ds}\mth{)}}
 404   \CASE{}\multicolumn{3}{r}{\hspace{1cm}
 405   \KW{\opt{... \opt{\NT{when\_ds}}} \NT{grammar\_ds}
 406     \ANY{... \opt{\NT{when\_ds}} \NT{grammar\_ds}}
 407     \opt{... \opt{\NT{when\_ds}}}}
 408   }
 409
 410 %  \CASE{\opt{... \opt{\NT{when\_ds}}} \NT{grammar}
 411 %    \ANY{... \opt{\NT{when\_ds}} \NT{grammar}}
 412 %    \opt{... \opt{\NT{when\_ds}}}}
 413 %  \CASE{<... \any{\NT{when\_ds}} \NT{grammar} ...>}
 414 %  \CASE{<+... \any{\NT{when\_ds}} \NT{grammar} ...+>}
 415
 416 \end{grammar}
 417
 418 \noindent
 419 Lines may be annotated with an element of the set $\{\mtt{-}, \mtt{+},
 420 \mtt{*}\}$ or the singleton $\mtt{?}$, or one of each set. \mtt{?}
 421 represents at most one match of the given pattern. \mtt{*} is used for
 422 semantic match, \emph{i.e.}, a pattern that highlights the fragments
 423 annotated with \mtt{*}, but does not perform any modification of the
 424 matched code. \mtt{*} cannot be mixed with \mtt{-} and \mtt{+}.  There are
 425 some constraints on the use of these annotations:
 426 \begin{itemize}
 427 \item Dots, {\em i.e.} \texttt{...}, cannot occur on a line marked
 428   \texttt{+}.
 429 \item Nested dots, {\em i.e.}, dots enclosed in {\tt <} and {\tt >}, cannot
 430   occur on a line with any marking.
 431 \end{itemize}
 432
 433 Each element of a disjunction must be a proper term like an
 434 expression, a statement, an identifier or a declaration. Thus, the
 435 rule on the left below is not a syntaxically correct SmPL rule. One may
 436 use the rule on the right instead.
 437
 438 \begin{center}
 439   \begin{tabular}{l@{\hspace{5cm}}r}
 440 \begin{lstlisting}[language=Cocci]
 441 @@
 442 type T;
 443 T b;
 444 @@
 445
 446 (
 447  writeb(...,
 448 |
 449  readb(
 450 )
 451 @--(T)
 452  b)
 453 \end{lstlisting}
 454     &
 455 \begin{lstlisting}[language=Cocci]
 456 @@
 457 type T;
 458 T b;
 459 @@
 460
 461 (
 462 read
 463 |
 464 write
 465 )
 466  (...,
 467 @-- (T)
 468   b)
 469 \end{lstlisting}
 470     \\
 471   \end{tabular}
 472 \end{center}
 473
 474 \section{Types}
 475 \label{types}
 476
 477 \begin{grammar}
 478
 479   \RULE{\rt{ctypes}}
 480   \CASE{\NT{COMMA\_LIST}\mth{(}\NT{ctype}\mth{)}}
 481
 482   \RULE{\rt{ctype}}
 483   \CASE{\opt{\NT{const\_vol}} \NT{generic\_ctype} \any{*}}
 484   \CASE{\opt{\NT{const\_vol}} void \some{*}}
 485   \CASE{(\NT{ctype} \ANY{| \NT{ctype}})}
 486
 487   \RULE{\rt{const\_vol}}
 488   \CASE{const}
 489   \CASE{volatile}
 490
 491   \RULE{\rt{generic\_ctype}}
 492   \CASE{\NT{ctype\_qualif}}
 493   \CASE{\opt{\NT{ctype\_qualif}} char}
 494   \CASE{\opt{\NT{ctype\_qualif}} short}
 495   \CASE{\opt{\NT{ctype\_qualif}} int}
 496   \CASE{\opt{\NT{ctype\_qualif}} long}
 497   \CASE{\opt{\NT{ctype\_qualif}} long long}
 498   \CASE{double}
 499   \CASE{float}
 500   \CASE{\OPT{struct\OR union} \T{id} \OPT{\{ \any{\NT{struct\_decl\_list}} \}}}
 501
 502   \RULE{\rt{ctype\_qualif}}
 503   \CASE{unsigned}
 504   \CASE{signed}
 505
 506   \RULE{\rt{struct\_decl\_list}}
 507   \CASE{\NT{struct\_decl\_list\_start}}
 508
 509   \RULE{\rt{struct\_decl\_list\_start}}
 510   \CASE{\NT{struct\_decl}}
 511   \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
 512   \CASE{... \opt{when != \NT{struct\_decl}}$^\dag$ \opt{\NT{continue\_struct\_decl\_list}}}
 513
 514   \RULE{\rt{continue\_struct\_decl\_list}}
 515   \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
 516   \CASE{\NT{struct\_decl}}
 517
 518   \RULE{\rt{struct\_decl}}
 519   \CASE{\NT{ctype} \NT{d\_ident};}
 520   \CASE{\NT{fn\_ctype} (* \NT{d\_ident}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)});)}
 521   \CASE{\opt{\NT{const\_vol}} \T{id} \NT{d\_ident};}
 522
 523   \RULE{\rt{d\_ident}}
 524   \CASE{\NT{id} \any{[\opt{\NT{expr}}]}}
 525
 526   \RULE{\rt{fn\_ctype}}
 527   \CASE{\NT{generic\_ctype} \any{*}}
 528   \CASE{void \any{*}}
 529
 530   \RULE{\rt{name\_opt\_decl}}
 531   \CASE{\NT{decl}}
 532   \CASE{\NT{ctype}}
 533   \CASE{\NT{fn\_ctype}}
 534 \end{grammar}
 535
 536 $^\dag$ The optional \texttt{when} construct ends at the end of the line.
 537
 538 \section{Function declarations}
 539
 540 \begin{grammar}
 541
 542   \RULE{\rt{fundecl}}
 543   \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
 544     (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}})
 545     \ttlb~\opt{\NT{stmt\_seq}} \ttrb}
 546
 547   \RULE{\rt{funproto}}
 548   \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
 549     (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}});}
 550
 551   \RULE{\rt{funinfo}}
 552   \CASE{inline}
 553   \CASE{\NT{storage}}
 554 %   \CASE{\NT{attr}}
 555
 556   \RULE{\rt{storage}}
 557   \CASE{static}
 558   \CASE{auto}
 559   \CASE{register}
 560   \CASE{extern}
 561
 562   \RULE{\rt{funid}}
 563   \CASE{\T{id}}
 564   \CASE{\mth{\T{metaid}^{\ssf{Id}}}}
 565 %   \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
 566 %   \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
 567
 568   \RULE{\rt{param}}
 569   \CASE{\NT{type} \T{id}}
 570   \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
 571   \CASE{\mth{\T{metaid}^{\ssf{ParamList}}}}
 572
 573   \RULE{\rt{decl}}
 574   \CASE{\NT{ctype} \NT{id}}
 575   \CASE{\NT{fn\_ctype} (* \NT{id}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)})}
 576   \CASE{void}
 577   \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
 578 \end{grammar}
 579
 580 \begin{grammar}
 581   \RULE{\rt{PARAMSEQ}\mth{(}\rt{gram\_p}, \rt{when\_p}\mth{)}}
 582   \CASE{\NT{COMMA\_LIST}\mth{(}\NT{gram\_p} \OR \ldots \opt{\NT{when\_p}}\mth{)}}
 583 \end{grammar}
 584
 585 %\newpage
 586
 587 \section{Declarations}
 588
 589 \begin{grammar}
 590   \RULE{\rt{decl\_var}}
 591 %  \CASE{\NT{type} \opt{\NT{id} \opt{[\opt{\NT{dot\_expr}}]}
 592 %      \ANY{, \NT{id} \opt{[ \opt{\NT{dot\_expr}}]}}};}
 593   \CASE{\NT{common\_decl}}
 594   \CASE{\opt{\NT{storage}} \NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
 595   \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
 596   \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) = \NT{initialize} ;}
 597   \CASE{typedef \NT{ctype} \NT{typedef\_ident} ;}
 598
 599   \RULE{\rt{one\_decl}}
 600   \CASE{\NT{common\_decl}}
 601   \CASE{\opt{\NT{storage}} \NT{ctype} \NT{id};}
 602 %  \CASE{\NT{storage} \NT{ctype} \NT{id} \opt{[\opt{\NT{dot\\_expr}}]} = \NT{nest\\_expr};}
 603   \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} ;}
 604
 605   \RULE{\rt{common\_decl}}
 606   \CASE{\NT{ctype};}
 607   \CASE{\NT{funproto}}
 608   \CASE{\opt{\NT{storage}} \NT{ctype} \NT{d\_ident} = \NT{initialize} ;}
 609   \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} = \NT{initialize} ;}
 610   \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) ;}
 611   \CASE{\NT{decl\_ident} ( \OPT{\NT{COMMA\_LIST}\mth{(}\NT{expr}\mth{)}} ) ;}
 612
 613   \RULE{\rt{initialize}}
 614   \CASE{\NT{dot\_expr}}
 615   \CASE{\ttlb~\opt{\NT{COMMA\_LIST}\mth{(}\NT{dot\_expr}\mth{)}}~\ttrb}
 616
 617   \RULE{\rt{decl\_ident}}
 618   \CASE{\T{DeclarerId}}
 619   \CASE{\mth{\T{metaid}^{\ssf{Declarer}}}}
 620 \end{grammar}
 621
 622 \section{Statements}
 623
 624 The first rule {\em statement} describes the various forms of a statement.
 625 The remaining rules implement the constraints that are sensitive to the
 626 context in which the statement occurs: {\em single\_statement} for a
 627 context in which only one statement is allowed, and {\em decl\_statement}
 628 for a context in which a declaration, statement, or sequence thereof is
 629 allowed.
 630
 631 \begin{grammar}
 632   \RULE{\rt{stmt}}
 633   \CASE{\NT{include}}
 634   \CASE{\mth{\T{metaid}^{\ssf{Stmt}}}}
 635   \CASE{\NT{expr};}
 636   \CASE{if (\NT{dot\_expr}) \NT{single\_stmt} \opt{else \NT{single\_stmt}}}
 637   \CASE{for (\opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}})
 638     \NT{single\_stmt}}
 639   \CASE{while (\NT{dot\_expr}) \NT{single\_stmt}}
 640   \CASE{do \NT{single\_stmt} while (\NT{dot\_expr});}
 641   \CASE{\NT{iter\_ident} (\any{\NT{dot\_expr}}) \NT{single\_stmt}}
 642   \CASE{switch (\opt{\NT{dot\_expr}}) \ttlb \any{\NT{case\_line}} \ttrb}
 643   \CASE{return \opt{\NT{dot\_expr}};}
 644   \CASE{\ttlb~\opt{\NT{stmt\_seq}} \ttrb}
 645   \CASE{\NT{NEST}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
 646   \CASE{\NT{NEST}\mth{(}\NT{expr}, \NT{when}\mth{)}}
 647   \CASE{break;}
 648   \CASE{continue;}
 649   \CASE{\NT{id}:}
 650   \CASE{goto \NT{id};}
 651   \CASE{\ttlb \NT{stmt\_seq} \ttrb}
 652
 653   \RULE{\rt{single\_stmt}}
 654   \CASE{\NT{stmt}}
 655   \CASE{\NT{OR}\mth{(}\NT{stmt}\mth{)}}
 656
 657   \RULE{\rt{decl\_stmt}}
 658   \CASE{\mth{\T{metaid}^{\ssf{StmtList}}}}
 659   \CASE{\NT{decl\_var}}
 660   \CASE{\NT{stmt}}
 661   \CASE{\NT{OR}\mth{(}\NT{stmt\_seq}\mth{)}}
 662
 663   \RULE{\rt{stmt\_seq}}
 664   \CASE{\any{\NT{decl\_stmt}}
 665     \opt{\NT{DOTSEQ}\mth{(}\some{\NT{decl\_stmt}},
 666       \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
 667   \CASE{\any{\NT{decl\_stmt}}
 668     \opt{\NT{DOTSEQ}\mth{(}\NT{expr},
 669       \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
 670
 671   \RULE{\rt{case\_line}}
 672   \CASE{default :~\NT{stmt\_seq}}
 673   \CASE{case \NT{dot\_expr} :~\NT{stmt\_seq}}
 674
 675   \RULE{\rt{iter\_ident}}
 676   \CASE{\T{IteratorId}}
 677   \CASE{\mth{\T{metaid}^{\ssf{Iterator}}}}
 678 \end{grammar}
 679
 680 \begin{grammar}
 681   \RULE{\rt{OR}\mth{(}\rt{gram\_o}\mth{)}}
 682   \CASE{( \NT{gram\_o} \ANY{\ttmid \NT{gram\_o}})}
 683
 684   \RULE{\rt{DOTSEQ}\mth{(}\rt{gram\_d}, \rt{when\_d}\mth{)}}
 685   \CASE{\ldots \opt{\NT{when\_d}} \ANY{\NT{gram\_d} \ldots \opt{\NT{when\_d}}}}
 686
 687   \RULE{\rt{NEST}\mth{(}\rt{gram\_n}, \rt{when\_n}\mth{)}}
 688   \CASE{<\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots>}
 689   \CASE{<+\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots+>}
 690 \end{grammar}
 691
 692 \noindent
 693 OR is a macro that generates a disjunction of patterns.  The three
 694 tokens \T{(}, \T{\ttmid}, and \T{)} must appear in the leftmost
 695 column, to differentiate them from the parentheses and bit-or tokens
 696 that can appear within expressions (and cannot appear in the leftmost
 697 column). These token may also be preceded by \texttt{\bs}
 698 when they are used in an other column.  These tokens are furthermore
 699 different from (, \(\mid\), and ), which are part of the grammar
 700 metalanguage.
 701
 702 \section{Expressions}
 703
 704 A nest or a single ellipsis is allowed in some expression contexts, and
 705 causes ambiguity in others.  For example, in a sequence \mtt{\ldots
 706 \mita{expr} \ldots}, the nonterminal \mita{expr} must be instantiated as an
 707 explicit C-language expression, while in an array reference,
 708 \mtt{\mth{\mita{expr}_1} \mtt{[} \mth{\mita{expr}_2} \mtt{]}}, the
 709 nonterminal \mth{\mita{expr}_2}, because it is delimited by brackets, can
 710 be also instantiated as \mtt{\ldots}, representing an arbitrary expression.  To
 711 distinguish between the various possibilities, we define three nonterminals
 712 for expressions: {\em expr} does not allow either top-level nests or
 713 ellipses, {\em nest\_expr} allows a nest but not an ellipsis, and {\em
 714 dot\_expr} allows both.  The EXPR macro is used to express these variants
 715 in a concise way.
 716
 717 \begin{grammar}
 718   \RULE{\rt{expr}}
 719   \CASE{\NT{EXPR}\mth{(}\NT{expr}\mth{)}}
 720
 721   \RULE{\rt{nest\_expr}}
 722   \CASE{\NT{EXPR}\mth{(}\NT{nest\_expr}\mth{)}}
 723   \CASE{\NT{NEST}\mth{(}\NT{nest\_expr}, \NT{exp\_whencode}\mth{)}}
 724
 725   \RULE{\rt{dot\_expr}}
 726   \CASE{\NT{EXPR}\mth{(}\NT{dot\_expr}\mth{)}}
 727   \CASE{\NT{NEST}\mth{(}\NT{dot\_expr}, \NT{exp\_whencode}\mth{)}}
 728   \CASE{...~\opt{\NT{exp\_whencode}}}
 729
 730   \RULE{\rt{EXPR}\mth{(}\rt{exp}\mth{)}}
 731   \CASE{\NT{exp} \NT{assign\_op} \NT{exp}}
 732   \CASE{\NT{exp}++}
 733   \CASE{\NT{exp}--}
 734   \CASE{\NT{unary\_op} \NT{exp}}
 735   \CASE{\NT{exp} \NT{bin\_op} \NT{exp}}
 736   \CASE{\NT{exp} ?~\NT{dot\_expr} :~\NT{exp}}
 737   \CASE{(\NT{type}) \NT{exp}}
 738   \CASE{\NT{exp} [\NT{dot\_expr}]}
 739   \CASE{\NT{exp} .~\NT{id}}
 740   \CASE{\NT{exp} -> \NT{id}}
 741   \CASE{\NT{exp}(\opt{\NT{PARAMSEQ}\mth{(}\NT{arg}, \NT{exp\_whencode}\mth{)}})}
 742   \CASE{\NT{id}}
 743 %   \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
 744 %   \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
 745   \CASE{\mth{\T{metaid}^{\ssf{Exp}}}}
 746 %   \CASE{\mth{\T{metaid}^{\ssf{Err}}}}
 747   \CASE{\mth{\T{metaid}^{\ssf{Const}}}}
 748   \CASE{\NT{const}}
 749   \CASE{(\NT{dot\_expr})}
 750   \CASE{\NT{OR}\mth{(}\NT{exp}\mth{)}}
 751
 752   \RULE{\rt{arg}}
 753   \CASE{\NT{nest\_expr}}
 754   \CASE{\mth{\T{metaid}^{\ssf{ExpList}}}}
 755
 756   \RULE{\rt{exp\_whencode}}
 757   \CASE{when != \NT{expr}}
 758
 759   \RULE{\rt{assign\_op}}
 760   \CASE{= \OR -= \OR += \OR *= \OR /= \OR \%=}
 761   \CASE{\&= \OR |= \OR \caret= \OR \lt\lt= \OR \gt\gt=}
 762
 763   \RULE{\rt{bin\_op}}
 764   \CASE{* \OR / \OR \% \OR + \OR -}
 765   \CASE{\lt\lt \OR \gt\gt \OR \caret\xspace \OR \& \OR \ttmid}
 766   \CASE{< \OR > \OR <= \OR >= \OR == \OR != \OR \&\& \OR \ttmid\ttmid}
 767
 768   \RULE{\rt{unary\_op}}
 769   \CASE{++ \OR -- \OR \& \OR * \OR + \OR - \OR !}
 770
 771 \end{grammar}
 772
 773 \section{Constant, Identifiers and Types for Transformations}
 774
 775 \begin{grammar}
 776   \RULE{\rt{const}}
 777   \CASE{\NT{string}}
 778   \CASE{[0-9]+}
 779   \CASE{\mth{\cdots}}
 780
 781   \RULE{\rt{string}}
 782   \CASE{"\any{[\^{}"]}"}
 783
 784   \RULE{\rt{id}}
 785   \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Id}}}}
 786
 787   \RULE{\rt{typedef\_ident}}
 788   \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Type}}}}
 789
 790   \RULE{\rt{type}}
 791   \CASE{\NT{ctype} \OR \mth{\T{metaid}^{\ssf{Type}}}}
 792
 793   \RULE{\rt{pathToIsoFile}}
 794   \CASE{<.*>}
 795
 796   \RULE{\rt{regexp}}
 797   \CASE{"\any{[\^{}"]}"}
 798 \end{grammar}
 799
 800
 801 %%% Local Variables:
 802 %%% mode: LaTeX
 803 %%% TeX-master: "main_grammar"
 804 %%% coding: latin-9
 805 %%% TeX-PDF-mode: t
 806 %%% ispell-local-dictionary: "american"
 807 %%% End: