Release coccinelle-0.2.0rc1
[bpt/coccinelle.git] / docs / manual / cocci_syntax.tex
1
2 %\section{The SmPL Grammar}
3
4 % This section presents the SmPL grammar. This definition follows closely
5 % our implementation using the Menhir parser generator \cite{menhir}.
6
7 This document presents the grammar of the SmPL language used by the
8 \href{http://coccinelle.lip6.fr/}{Coccinelle tool}. For the most
9 part, the grammar is written using standard notation. In some rules,
10 however, the left-hand side is in all uppercase letters. These are
11 macros, which take one or more grammar rule right-hand-sides as
12 arguments. The grammar also uses some unspecified nonterminals, such
13 as \T{id}, \T{const}, etc. These refer to the sets suggested by
14 the name, {\em i.e.}, \T{id} refers to the set of possible
15 C-language identifiers, while \T{const} refers to the set of
16 possible C-language constants.
17 %
18 \ifhevea
19 A PDF version of this documentation is available at
20 \url{http://coccinelle.lip6.fr/docs/main_grammar.pdf}.
21 \else
22 A HTML version of this documentation is available online at
23 \url{http://coccinelle.lip6.fr/docs/main_grammar.html}.
24 \fi
25
26 \section{Program}
27
28 \begin{grammar}
29 \RULE{\rt{program}}
30 \CASE{\any{\NT{include\_cocci}} \some{\NT{changeset}}}
31
32 \RULE{\rt{include\_cocci}}
33 \CASE{using \NT{string}}
34 \CASE{using \NT{pathToIsoFile}}
35
36 \RULE{\rt{changeset}}
37 \CASE{\NT{metavariables} \NT{transformation}}
38 \CASE{\NT{script\_metavariables} \T{script\_code}}
39 % \CASE{\NT{metavariables} \ANY{--- filename +++ filename} \NT{transformation}}
40 \end{grammar}
41
42 \noindent
43 \T{script\_code} is any code in the chosen scripting language. Parsing of
44 the semantic patch does not check the validity of this code; any errors are
45 first detected when the code is executed. Furthermore, \texttt{@} should
46 not be use in this code. Spatch scans the script code for the next
47 \texttt{@} and considers that to be the beginning of the next rule, even if
48 \texttt{@} occurs within e.g., a string or a comment.
49
50 % Between the metavariables and the transformation rule, there can be a
51 % specification of constraints on the names of the old and new files,
52 % analogous to the filename specifications in the standard patch syntax.
53 % (see Figure \ref{scsiglue_patch}).
54
55 \section{Metavariables for transformations}
56
57 The \NT{rulename} portion of the metavariable declaration can specify
58 properties of a rule such as its name, the names of the rules that it
59 depends on, the isomorphisms to be used in processing the rule, and whether
60 quantification over paths should be universal or existential. The optional
61 annotation {\tt expression} indicates that the pattern is to be considered
62 as matching an expression, and thus can be used to avoid some parsing
63 problems.
64
65 The \NT{metadecl} portion of the metavariable declaration defines various
66 types of metavariables that will be used for matching in the transformation
67 section.
68
69 \begin{grammar}
70 \RULE{\rt{metavariables}}
71 \CASE{@@ \any{\NT{metadecl}} @@}
72 \CASE{@ \NT{rulename} @ \any{\NT{metadecl}} @@}
73
74 \RULE{\rt{rulename}}
75 \CASE{\T{id} \OPT{extends \T{id}} \OPT{depends on \NT{dep}} \opt{\NT{iso}}
76 \opt{\NT{disable-iso}} \opt{\NT{exists}} \opt{expression}}
77 \CASE{script:\T{language} \OPT{depends on \NT{dep}}}
78
79 \RULE{\rt{script\_init\_final}}
80 \CASE{initialize:\T{language}}
81 \CASE{finalize:\T{language}}
82
83 \RULE{\rt{dep}}
84 \CASE{\NT{pnrule}}
85 \CASE{\NT{dep} \&\& \NT{dep}}
86 \CASE{\NT{dep} || \NT{dep}}
87
88 \RULE{\rt{pnrule}}
89 \CASE{\T{id}}
90 \CASE{!\T{id}}
91 \CASE{ever \T{id}}
92 \CASE{never \T{id}}
93 \CASE{(\NT{dep})}
94
95 \RULE{\rt{iso}}
96 \CASE{using \NT{string} \ANY{, \NT{string}}}
97
98 \RULE{\rt{disable-iso}}
99 \CASE{disable \NT{COMMA\_LIST}\mth{(}\T{id}\mth{)}}
100
101 \RULE{\rt{exists}}
102 \CASE{exists}
103 \CASE{forall}
104 % \CASE{\opt{reverse} forall}
105
106 \RULE{\rt{COMMA\_LIST}\mth{(}\rt{elem}\mth{)}}
107 \CASE{\NT{elem} \ANY{, \NT{elem}}}
108 \end{grammar}
109
110 The keyword \KW{disable} is normally used with the names of
111 isomorphisms defined in standard.iso or whatever isomorphism file has been
112 included. There are, however, some other isomorphisms that are built into
113 the implementation of Coccinelle and that can be disabled as well. Their
114 names are given below. In each case, the text descibes the standard
115 behavior. Using \NT{disable-iso} with the given name disables this behavior.
116
117 \begin{itemize}
118 \item \KW{optional\_storage}: A SmPL function definition that does not
119 specify any visibility (i.e., static or extern), or a SmPL variable
120 declaration that does not specify any storage (i.e., auto, static,
121 register, or extern), matches a function declaration or variable
122 declaration with any visibility or storage, respectively.
123 \item \KW{optional\_qualifier}: This is similar to \KW{optional\_storage},
124 except that here is it the qualifier (i.e., const or volatile) that does
125 not have to be specified in the SmPL code, but may be present in the C code.
126 \item \KW{value\_format}: Integers in various formats, e.g., 1 and 0x1, are
127 considered to be equivalent in the matching process.
128 \item \KW{comm\_assoc}: An expression of the form \NT{exp} \NT{bin\_op}
129 \KW{...}, where \NT{bin\_op} is commutative and associative, is
130 considered to match any top-level sequence of \NT{bin\_op} operators
131 containing \NT{exp} as the top-level argument.
132 \end{itemize}
133
134 The possible types of metavariable declarations are defined by the grammar
135 rule below. Metavariables should occur at least once in the transformation
136 immediately following their declaration. Fresh metavariables must only be
137 used in {\tt +} code. These properties are not expressed in the grammar,
138 but are checked by a subsequent analysis. The metavariables are designated
139 according to the kind of terms they can match, such as a statement, an
140 identifier, or an expression. An expression metavariable can be further
141 constrained by its type.
142
143 \begin{grammar}
144 \RULE{\rt{metadecl}}
145 \CASE{fresh identifier \NT{ids} ;}
146 \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
147 \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
148 \CASE{parameter \opt{list} \NT{ids} ;}
149 \CASE{parameter list [ \NT{id} ] \NT{ids} ;}
150 \CASE{type \NT{ids} ;}
151 \CASE{statement \opt{list} \NT{ids} ;}
152 \CASE{typedef \NT{ids} ;}
153 \CASE{declarer name \NT{ids} ;}
154 % \CASE{\opt{local} function \NT{pmid\_with\_not\_eq\_list} ;}
155 \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
156 \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
157 \CASE{iterator name \NT{ids} ;}
158 \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;}
159 \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
160 % \CASE{error \NT{pmid\_with\_not\_eq\_list} ; }
161 \CASE{\opt{local} idexpression \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
162 \CASE{\opt{local} idexpression \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
163 \CASE{\opt{local} idexpression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
164 \CASE{expression list \NT{ids} ;}
165 \CASE{expression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
166 \CASE{expression \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
167 \CASE{expression list [ ident ] \NT{ids} ;}
168 \CASE{\NT{ctype} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
169 \CASE{\NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
170 \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
171 \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
172 \CASE{constant \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
173 \CASE{constant \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
174 \CASE{position \opt{any} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq\_mid}\mth{)} ;}
175 \end{grammar}
176
177 \begin{grammar}
178 \RULE{\rt{ids}}
179 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{pmid}\mth{)}}
180
181 \RULE{\rt{pmid}}
182 \CASE{\T{id}}
183 \CASE{\NT{mid}}
184 % \CASE{list}
185 % \CASE{error}
186 % \CASE{type}
187
188 \RULE{\rt{mid}} \CASE{\T{rulename\_id}.\T{id}}
189
190 \RULE{\rt{pmid\_with\_regexp}}
191 \CASE{\NT{pmid} \~{}= \NT{regexp}}
192
193 \RULE{\rt{pmid\_with\_not\_eq}}
194 \CASE{\NT{pmid} \OPT{!= \T{id}}}
195 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\T{id}\mth{)} \ttrb}}
196
197 \RULE{\rt{pmid\_with\_not\_ceq}}
198 \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_cst}}}
199 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_cst}\mth{)} \ttrb}}
200
201 \RULE{\rt{id\_or\_cst}}
202 \CASE{\T{id}}
203 \CASE{\T{integer}}
204
205 \RULE{\rt{pmid\_with\_not\_eq\_mid}}
206 \CASE{\NT{pmid} \OPT{!= \NT{mid}}}
207 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{mid}\mth{)} \ttrb}}
208 \end{grammar}
209
210 Subsequently, we refer to arbitrary metavariables as
211 \mth{\msf{metaid}^{\mbox{\scriptsize{\it{ty}}}}}, where {\it{ty}}
212 indicates the {\it metakind} used in the declaration of the variable.
213 For example, \mth{\msf{metaid}^{\ssf{Type}}} refers to a metavariable
214 that was declared using \texttt{type} and stands for any type.
215
216 The \NT{ctype} and \NT{ctypes} nonterminals are used by both the grammar of
217 metavariable declarations and the grammar of transformations, and are
218 defined on page~\pageref{types}.
219
220 \section{Metavariables for scripts}
221
222 Metavariables for scripts can only be inherited from transformation rules.
223 In the spirit of scripting languages such as Python that use dynamic
224 typing, metavariables for scripts do not include type declarations.
225
226 \begin{grammar}
227 \RULE{\rt{script\_metavariables}}
228 \CASE{@ script:\NT{language} \OPT{depends on \NT{dep}} @
229 \any{\NT{script\_metadecl}} @@}
230 \CASE{@ initialize:\NT{language} @}
231 \CASE{@ finalize:\NT{language} @}
232
233 \RULE{\rt{language}} \CASE{python}
234
235 \RULE{\rt{script\_metadecl}} \CASE{\T{id} <{}< \T{rulename\_id}.\T{id} ;}
236 \end{grammar}
237
238 Currently, the only scripting language that is supported is Python. The
239 set of available scripting languages may be extended at some point.
240
241 Script rules declared with \KW{initialize} are run before the treatment of
242 any file. Script rules declared with \KW{finalize} are run when the
243 treatment of all of the files has completed. There can be at most one of
244 each per scripting language (thus currently at most one of each).
245 Initialize and finalize script rules do not have access to SmPL
246 metavariables. Nevertheless, a finalize script rule can access any
247 variables initialized by the other script rules, allowing information to be
248 transmitted from the matching process to the finalize rule.
249
250 \section{Transformation}
251
252 The transformation specification essentially has the form of C code,
253 except that lines to remove are annotated with \verb+-+ in the first
254 column, and lines to add are annotated with \verb-+-. A
255 transformation specification can also use {\em dots}, ``\verb-...-'',
256 describing an arbitrary sequence of function arguments or instructions
257 within a control-flow path. Dots may be modified with a {\tt when}
258 clause, indicating a pattern that should not occur anywhere within the
259 matched sequence. Finally, a transformation can specify a disjunction
260 of patterns, of the form \mtt{( \mth{\mita{pat}_1} | \mita{\ldots} |
261 \mth{\mita{pat}_n} )} where each \texttt{(}, \texttt{|} or
262 \texttt{)} is in column 0 or preceded by \texttt{\textbackslash}.
263
264 The grammar that we present for the transformation is not actually the
265 grammar of the SmPL code that can be written by the programmer, but is
266 instead the grammar of the slice of this consisting of the {\tt -}
267 annotated and the unannotated code (the context of the transformed lines),
268 or the {\tt +} annotated code and the unannotated code. For example, for
269 parsing purposes, the following transformation
270 %presented in Section \ref{sec:seq2}
271 is split into the two variants shown below and each is parsed
272 separately.
273
274 \begin{center}
275 \begin{tabular}{c}
276 \begin{lstlisting}[language=Cocci]
277 proc_info_func(...) {
278 <...
279 @-- hostno
280 @++ hostptr->host_no
281 ...>
282 }
283 \end{lstlisting}\\
284 \end{tabular}
285 \end{center}
286
287 {%\sizecodebis
288 \begin{center}
289 \begin{tabular}{p{5cm}p{3cm}p{5cm}}
290 \begin{lstlisting}[language=Cocci]
291 proc_info_func(...) {
292 <...
293 @-- hostno
294 ...>
295 }
296 \end{lstlisting}
297 &&
298 \begin{lstlisting}[language=Cocci]
299 proc_info_func(...) {
300 <...
301 @++ hostptr->host_no
302 ...>
303 }
304 \end{lstlisting}
305 \end{tabular}
306 \end{center}
307 }
308
309 \noindent
310 Requiring that both slices parse correctly ensures that the rule matches
311 syntactically valid C code and that it produces syntactically valid C code.
312 The generated parse trees are then merged for use in the subsequent
313 matching and transformation process.
314
315 The grammar for the minus or plus slice of a transformation is as follows:
316
317 \begin{grammar}
318
319 \RULE{\rt{transformation}}
320 \CASE{\some{\NT{include}}}
321 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
322 \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
323 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{fundecl}, \NT{when}\mth{)}}
324
325 \RULE{\rt{include}}
326 \CASE{\#include \T{include\_string}}
327
328 % \RULE{\rt{fun\_decl\_stmt}}
329 % \CASE{\NT{decl\_stmt}}
330 % \CASE{\NT{fundecl}}
331
332 % \CASE{\NT{ctype}}
333 % \CASE{\ttlb \NT{initialize\_list} \ttrb}
334 % \CASE{\NT{toplevel\_seq\_start\_after\_dots\_init}}
335 %
336 % \RULE{\rt{toplevel\_seq\_start\_after\_dots\_init}}
337 % \CASE{\NT{stmt\_dots} \NT{toplevel\_after\_dots}}
338 % \CASE{\NT{expr} \opt{\NT{toplevel\_after\_exp}}}
339 % \CASE{\NT{decl\_stmt\_expr} \opt{\NT{toplevel\_after\_stmt}}}
340 %
341 % \RULE{\rt{stmt\_dots}}
342 % \CASE{... \any{\NT{when}}}
343 % \CASE{<... \any{\NT{when}} \NT{nest\_after\_dots} ...>}
344 % \CASE{<+... \any{\NT{when}} \NT{nest\_after\_dots} ...+>}
345
346 \RULE{\rt{when}}
347 \CASE{when != \NT{when\_code}}
348 \CASE{when = \NT{rule\_elem\_stmt}}
349 \CASE{when \NT{COMMA\_LIST}\mth{(}\NT{any\_strict}\mth{)}}
350 \CASE{when true != \NT{expr}}
351 \CASE{when false != \NT{expr}}
352
353 \RULE{\rt{when\_code}}
354 \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
355 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
356
357 \RULE{\rt{rule\_elem\_stmt}}
358 \CASE{\NT{one\_decl}}
359 \CASE{\NT{expr};}
360 \CASE{return \opt{\NT{expr}};}
361 \CASE{break;}
362 \CASE{continue;}
363 \CASE{\bs(\NT{rule\_elem\_stmt} \SOME{\bs| \NT{rule\_elem\_stmt}}\bs)}
364
365 \RULE{\rt{any\_strict}}
366 \CASE{any}
367 \CASE{strict}
368 \CASE{forall}
369 \CASE{exists}
370
371 % \RULE{\rt{nest\_after\_dots}}
372 % \CASE{\NT{decl\_stmt\_exp} \opt{\NT{nest\_after\_stmt}}}
373 % \CASE{\opt{\NT{exp}} \opt{\NT{nest\_after\_exp}}}
374 %
375 % \RULE{\rt{nest\_after\_stmt}}
376 % \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
377 % \CASE{\NT{decl\_stmt} \opt{\NT{nest\_after\_stmt}}}
378 %
379 % \RULE{\rt{nest\_after\_exp}}
380 % \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
381 %
382 % \RULE{\rt{toplevel\_after\_dots}}
383 % \CASE{\opt{\NT{toplevel\_after\_exp}}}
384 % \CASE{\NT{exp} \opt{\NT{toplevel\_after\_exp}}}
385 % \CASE{\NT{decl\_stmt\_expr} \NT{toplevel\_after\_stmt}}
386 %
387 % \RULE{\rt{toplevel\_after\_exp}}
388 % \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
389 %
390 % \RULE{\rt{decl\_stmt\_expr}}
391 % \CASE{TMetaStmList$^\ddag$}
392 % \CASE{\NT{decl\_var}}
393 % \CASE{\NT{stmt}}
394 % \CASE{(\NT{stmt\_seq} \ANY{| \NT{stmt\_seq}})}
395 %
396 % \RULE{\rt{toplevel\_after\_stmt}}
397 % \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
398 % \CASE{\NT{decl\_stmt} \NT{toplevel\_after\_stmt}}
399
400 \end{grammar}
401
402 \begin{grammar}
403 \RULE{\rt{OPTDOTSEQ}\mth{(}\rt{grammar\_ds}, \rt{when\_ds}\mth{)}}
404 \CASE{}\multicolumn{3}{r}{\hspace{1cm}
405 \KW{\opt{... \opt{\NT{when\_ds}}} \NT{grammar\_ds}
406 \ANY{... \opt{\NT{when\_ds}} \NT{grammar\_ds}}
407 \opt{... \opt{\NT{when\_ds}}}}
408 }
409
410 % \CASE{\opt{... \opt{\NT{when\_ds}}} \NT{grammar}
411 % \ANY{... \opt{\NT{when\_ds}} \NT{grammar}}
412 % \opt{... \opt{\NT{when\_ds}}}}
413 % \CASE{<... \any{\NT{when\_ds}} \NT{grammar} ...>}
414 % \CASE{<+... \any{\NT{when\_ds}} \NT{grammar} ...+>}
415
416 \end{grammar}
417
418 \noindent
419 Lines may be annotated with an element of the set $\{\mtt{-}, \mtt{+},
420 \mtt{*}\}$ or the singleton $\mtt{?}$, or one of each set. \mtt{?}
421 represents at most one match of the given pattern. \mtt{*} is used for
422 semantic match, \emph{i.e.}, a pattern that highlights the fragments
423 annotated with \mtt{*}, but does not perform any modification of the
424 matched code. \mtt{*} cannot be mixed with \mtt{-} and \mtt{+}. There are
425 some constraints on the use of these annotations:
426 \begin{itemize}
427 \item Dots, {\em i.e.} \texttt{...}, cannot occur on a line marked
428 \texttt{+}.
429 \item Nested dots, {\em i.e.}, dots enclosed in {\tt <} and {\tt >}, cannot
430 occur on a line with any marking.
431 \end{itemize}
432
433 Each element of a disjunction must be a proper term like an
434 expression, a statement, an identifier or a declaration. Thus, the
435 rule on the left below is not a syntaxically correct SmPL rule. One may
436 use the rule on the right instead.
437
438 \begin{center}
439 \begin{tabular}{l@{\hspace{5cm}}r}
440 \begin{lstlisting}[language=Cocci]
441 @@
442 type T;
443 T b;
444 @@
445
446 (
447 writeb(...,
448 |
449 readb(
450 )
451 @--(T)
452 b)
453 \end{lstlisting}
454 &
455 \begin{lstlisting}[language=Cocci]
456 @@
457 type T;
458 T b;
459 @@
460
461 (
462 read
463 |
464 write
465 )
466 (...,
467 @-- (T)
468 b)
469 \end{lstlisting}
470 \\
471 \end{tabular}
472 \end{center}
473
474 \section{Types}
475 \label{types}
476
477 \begin{grammar}
478
479 \RULE{\rt{ctypes}}
480 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{ctype}\mth{)}}
481
482 \RULE{\rt{ctype}}
483 \CASE{\opt{\NT{const\_vol}} \NT{generic\_ctype} \any{*}}
484 \CASE{\opt{\NT{const\_vol}} void \some{*}}
485 \CASE{(\NT{ctype} \ANY{| \NT{ctype}})}
486
487 \RULE{\rt{const\_vol}}
488 \CASE{const}
489 \CASE{volatile}
490
491 \RULE{\rt{generic\_ctype}}
492 \CASE{\NT{ctype\_qualif}}
493 \CASE{\opt{\NT{ctype\_qualif}} char}
494 \CASE{\opt{\NT{ctype\_qualif}} short}
495 \CASE{\opt{\NT{ctype\_qualif}} int}
496 \CASE{\opt{\NT{ctype\_qualif}} long}
497 \CASE{\opt{\NT{ctype\_qualif}} long long}
498 \CASE{double}
499 \CASE{float}
500 \CASE{\OPT{struct\OR union} \T{id} \OPT{\{ \any{\NT{struct\_decl\_list}} \}}}
501
502 \RULE{\rt{ctype\_qualif}}
503 \CASE{unsigned}
504 \CASE{signed}
505
506 \RULE{\rt{struct\_decl\_list}}
507 \CASE{\NT{struct\_decl\_list\_start}}
508
509 \RULE{\rt{struct\_decl\_list\_start}}
510 \CASE{\NT{struct\_decl}}
511 \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
512 \CASE{... \opt{when != \NT{struct\_decl}}$^\dag$ \opt{\NT{continue\_struct\_decl\_list}}}
513
514 \RULE{\rt{continue\_struct\_decl\_list}}
515 \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
516 \CASE{\NT{struct\_decl}}
517
518 \RULE{\rt{struct\_decl}}
519 \CASE{\NT{ctype} \NT{d\_ident};}
520 \CASE{\NT{fn\_ctype} (* \NT{d\_ident}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)});)}
521 \CASE{\opt{\NT{const\_vol}} \T{id} \NT{d\_ident};}
522
523 \RULE{\rt{d\_ident}}
524 \CASE{\NT{id} \any{[\opt{\NT{expr}}]}}
525
526 \RULE{\rt{fn\_ctype}}
527 \CASE{\NT{generic\_ctype} \any{*}}
528 \CASE{void \any{*}}
529
530 \RULE{\rt{name\_opt\_decl}}
531 \CASE{\NT{decl}}
532 \CASE{\NT{ctype}}
533 \CASE{\NT{fn\_ctype}}
534 \end{grammar}
535
536 $^\dag$ The optional \texttt{when} construct ends at the end of the line.
537
538 \section{Function declarations}
539
540 \begin{grammar}
541
542 \RULE{\rt{fundecl}}
543 \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
544 (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}})
545 \ttlb~\opt{\NT{stmt\_seq}} \ttrb}
546
547 \RULE{\rt{funproto}}
548 \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
549 (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}});}
550
551 \RULE{\rt{funinfo}}
552 \CASE{inline}
553 \CASE{\NT{storage}}
554 % \CASE{\NT{attr}}
555
556 \RULE{\rt{storage}}
557 \CASE{static}
558 \CASE{auto}
559 \CASE{register}
560 \CASE{extern}
561
562 \RULE{\rt{funid}}
563 \CASE{\T{id}}
564 \CASE{\mth{\T{metaid}^{\ssf{Id}}}}
565 % \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
566 % \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
567
568 \RULE{\rt{param}}
569 \CASE{\NT{type} \T{id}}
570 \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
571 \CASE{\mth{\T{metaid}^{\ssf{ParamList}}}}
572
573 \RULE{\rt{decl}}
574 \CASE{\NT{ctype} \NT{id}}
575 \CASE{\NT{fn\_ctype} (* \NT{id}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)})}
576 \CASE{void}
577 \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
578 \end{grammar}
579
580 \begin{grammar}
581 \RULE{\rt{PARAMSEQ}\mth{(}\rt{gram\_p}, \rt{when\_p}\mth{)}}
582 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{gram\_p} \OR \ldots \opt{\NT{when\_p}}\mth{)}}
583 \end{grammar}
584
585 %\newpage
586
587 \section{Declarations}
588
589 \begin{grammar}
590 \RULE{\rt{decl\_var}}
591 % \CASE{\NT{type} \opt{\NT{id} \opt{[\opt{\NT{dot\_expr}}]}
592 % \ANY{, \NT{id} \opt{[ \opt{\NT{dot\_expr}}]}}};}
593 \CASE{\NT{common\_decl}}
594 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
595 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
596 \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) = \NT{initialize} ;}
597 \CASE{typedef \NT{ctype} \NT{typedef\_ident} ;}
598
599 \RULE{\rt{one\_decl}}
600 \CASE{\NT{common\_decl}}
601 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{id};}
602 % \CASE{\NT{storage} \NT{ctype} \NT{id} \opt{[\opt{\NT{dot\\_expr}}]} = \NT{nest\\_expr};}
603 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} ;}
604
605 \RULE{\rt{common\_decl}}
606 \CASE{\NT{ctype};}
607 \CASE{\NT{funproto}}
608 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{d\_ident} = \NT{initialize} ;}
609 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} = \NT{initialize} ;}
610 \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) ;}
611 \CASE{\NT{decl\_ident} ( \OPT{\NT{COMMA\_LIST}\mth{(}\NT{expr}\mth{)}} ) ;}
612
613 \RULE{\rt{initialize}}
614 \CASE{\NT{dot\_expr}}
615 \CASE{\ttlb~\opt{\NT{COMMA\_LIST}\mth{(}\NT{dot\_expr}\mth{)}}~\ttrb}
616
617 \RULE{\rt{decl\_ident}}
618 \CASE{\T{DeclarerId}}
619 \CASE{\mth{\T{metaid}^{\ssf{Declarer}}}}
620 \end{grammar}
621
622 \section{Statements}
623
624 The first rule {\em statement} describes the various forms of a statement.
625 The remaining rules implement the constraints that are sensitive to the
626 context in which the statement occurs: {\em single\_statement} for a
627 context in which only one statement is allowed, and {\em decl\_statement}
628 for a context in which a declaration, statement, or sequence thereof is
629 allowed.
630
631 \begin{grammar}
632 \RULE{\rt{stmt}}
633 \CASE{\NT{include}}
634 \CASE{\mth{\T{metaid}^{\ssf{Stmt}}}}
635 \CASE{\NT{expr};}
636 \CASE{if (\NT{dot\_expr}) \NT{single\_stmt} \opt{else \NT{single\_stmt}}}
637 \CASE{for (\opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}})
638 \NT{single\_stmt}}
639 \CASE{while (\NT{dot\_expr}) \NT{single\_stmt}}
640 \CASE{do \NT{single\_stmt} while (\NT{dot\_expr});}
641 \CASE{\NT{iter\_ident} (\any{\NT{dot\_expr}}) \NT{single\_stmt}}
642 \CASE{switch (\opt{\NT{dot\_expr}}) \ttlb \any{\NT{case\_line}} \ttrb}
643 \CASE{return \opt{\NT{dot\_expr}};}
644 \CASE{\ttlb~\opt{\NT{stmt\_seq}} \ttrb}
645 \CASE{\NT{NEST}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
646 \CASE{\NT{NEST}\mth{(}\NT{expr}, \NT{when}\mth{)}}
647 \CASE{break;}
648 \CASE{continue;}
649 \CASE{\NT{id}:}
650 \CASE{goto \NT{id};}
651 \CASE{\ttlb \NT{stmt\_seq} \ttrb}
652
653 \RULE{\rt{single\_stmt}}
654 \CASE{\NT{stmt}}
655 \CASE{\NT{OR}\mth{(}\NT{stmt}\mth{)}}
656
657 \RULE{\rt{decl\_stmt}}
658 \CASE{\mth{\T{metaid}^{\ssf{StmtList}}}}
659 \CASE{\NT{decl\_var}}
660 \CASE{\NT{stmt}}
661 \CASE{\NT{OR}\mth{(}\NT{stmt\_seq}\mth{)}}
662
663 \RULE{\rt{stmt\_seq}}
664 \CASE{\any{\NT{decl\_stmt}}
665 \opt{\NT{DOTSEQ}\mth{(}\some{\NT{decl\_stmt}},
666 \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
667 \CASE{\any{\NT{decl\_stmt}}
668 \opt{\NT{DOTSEQ}\mth{(}\NT{expr},
669 \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
670
671 \RULE{\rt{case\_line}}
672 \CASE{default :~\NT{stmt\_seq}}
673 \CASE{case \NT{dot\_expr} :~\NT{stmt\_seq}}
674
675 \RULE{\rt{iter\_ident}}
676 \CASE{\T{IteratorId}}
677 \CASE{\mth{\T{metaid}^{\ssf{Iterator}}}}
678 \end{grammar}
679
680 \begin{grammar}
681 \RULE{\rt{OR}\mth{(}\rt{gram\_o}\mth{)}}
682 \CASE{( \NT{gram\_o} \ANY{\ttmid \NT{gram\_o}})}
683
684 \RULE{\rt{DOTSEQ}\mth{(}\rt{gram\_d}, \rt{when\_d}\mth{)}}
685 \CASE{\ldots \opt{\NT{when\_d}} \ANY{\NT{gram\_d} \ldots \opt{\NT{when\_d}}}}
686
687 \RULE{\rt{NEST}\mth{(}\rt{gram\_n}, \rt{when\_n}\mth{)}}
688 \CASE{<\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots>}
689 \CASE{<+\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots+>}
690 \end{grammar}
691
692 \noindent
693 OR is a macro that generates a disjunction of patterns. The three
694 tokens \T{(}, \T{\ttmid}, and \T{)} must appear in the leftmost
695 column, to differentiate them from the parentheses and bit-or tokens
696 that can appear within expressions (and cannot appear in the leftmost
697 column). These token may also be preceded by \texttt{\bs}
698 when they are used in an other column. These tokens are furthermore
699 different from (, \(\mid\), and ), which are part of the grammar
700 metalanguage.
701
702 \section{Expressions}
703
704 A nest or a single ellipsis is allowed in some expression contexts, and
705 causes ambiguity in others. For example, in a sequence \mtt{\ldots
706 \mita{expr} \ldots}, the nonterminal \mita{expr} must be instantiated as an
707 explicit C-language expression, while in an array reference,
708 \mtt{\mth{\mita{expr}_1} \mtt{[} \mth{\mita{expr}_2} \mtt{]}}, the
709 nonterminal \mth{\mita{expr}_2}, because it is delimited by brackets, can
710 be also instantiated as \mtt{\ldots}, representing an arbitrary expression. To
711 distinguish between the various possibilities, we define three nonterminals
712 for expressions: {\em expr} does not allow either top-level nests or
713 ellipses, {\em nest\_expr} allows a nest but not an ellipsis, and {\em
714 dot\_expr} allows both. The EXPR macro is used to express these variants
715 in a concise way.
716
717 \begin{grammar}
718 \RULE{\rt{expr}}
719 \CASE{\NT{EXPR}\mth{(}\NT{expr}\mth{)}}
720
721 \RULE{\rt{nest\_expr}}
722 \CASE{\NT{EXPR}\mth{(}\NT{nest\_expr}\mth{)}}
723 \CASE{\NT{NEST}\mth{(}\NT{nest\_expr}, \NT{exp\_whencode}\mth{)}}
724
725 \RULE{\rt{dot\_expr}}
726 \CASE{\NT{EXPR}\mth{(}\NT{dot\_expr}\mth{)}}
727 \CASE{\NT{NEST}\mth{(}\NT{dot\_expr}, \NT{exp\_whencode}\mth{)}}
728 \CASE{...~\opt{\NT{exp\_whencode}}}
729
730 \RULE{\rt{EXPR}\mth{(}\rt{exp}\mth{)}}
731 \CASE{\NT{exp} \NT{assign\_op} \NT{exp}}
732 \CASE{\NT{exp}++}
733 \CASE{\NT{exp}--}
734 \CASE{\NT{unary\_op} \NT{exp}}
735 \CASE{\NT{exp} \NT{bin\_op} \NT{exp}}
736 \CASE{\NT{exp} ?~\NT{dot\_expr} :~\NT{exp}}
737 \CASE{(\NT{type}) \NT{exp}}
738 \CASE{\NT{exp} [\NT{dot\_expr}]}
739 \CASE{\NT{exp} .~\NT{id}}
740 \CASE{\NT{exp} -> \NT{id}}
741 \CASE{\NT{exp}(\opt{\NT{PARAMSEQ}\mth{(}\NT{arg}, \NT{exp\_whencode}\mth{)}})}
742 \CASE{\NT{id}}
743 % \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
744 % \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
745 \CASE{\mth{\T{metaid}^{\ssf{Exp}}}}
746 % \CASE{\mth{\T{metaid}^{\ssf{Err}}}}
747 \CASE{\mth{\T{metaid}^{\ssf{Const}}}}
748 \CASE{\NT{const}}
749 \CASE{(\NT{dot\_expr})}
750 \CASE{\NT{OR}\mth{(}\NT{exp}\mth{)}}
751
752 \RULE{\rt{arg}}
753 \CASE{\NT{nest\_expr}}
754 \CASE{\mth{\T{metaid}^{\ssf{ExpList}}}}
755
756 \RULE{\rt{exp\_whencode}}
757 \CASE{when != \NT{expr}}
758
759 \RULE{\rt{assign\_op}}
760 \CASE{= \OR -= \OR += \OR *= \OR /= \OR \%=}
761 \CASE{\&= \OR |= \OR \caret= \OR \lt\lt= \OR \gt\gt=}
762
763 \RULE{\rt{bin\_op}}
764 \CASE{* \OR / \OR \% \OR + \OR -}
765 \CASE{\lt\lt \OR \gt\gt \OR \caret\xspace \OR \& \OR \ttmid}
766 \CASE{< \OR > \OR <= \OR >= \OR == \OR != \OR \&\& \OR \ttmid\ttmid}
767
768 \RULE{\rt{unary\_op}}
769 \CASE{++ \OR -- \OR \& \OR * \OR + \OR - \OR !}
770
771 \end{grammar}
772
773 \section{Constant, Identifiers and Types for Transformations}
774
775 \begin{grammar}
776 \RULE{\rt{const}}
777 \CASE{\NT{string}}
778 \CASE{[0-9]+}
779 \CASE{\mth{\cdots}}
780
781 \RULE{\rt{string}}
782 \CASE{"\any{[\^{}"]}"}
783
784 \RULE{\rt{id}}
785 \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Id}}}}
786
787 \RULE{\rt{typedef\_ident}}
788 \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Type}}}}
789
790 \RULE{\rt{type}}
791 \CASE{\NT{ctype} \OR \mth{\T{metaid}^{\ssf{Type}}}}
792
793 \RULE{\rt{pathToIsoFile}}
794 \CASE{<.*>}
795
796 \RULE{\rt{regexp}}
797 \CASE{"\any{[\^{}"]}"}
798 \end{grammar}
799
800
801 %%% Local Variables:
802 %%% mode: LaTeX
803 %%% TeX-master: "main_grammar"
804 %%% coding: latin-9
805 %%% TeX-PDF-mode: t
806 %%% ispell-local-dictionary: "american"
807 %%% End: