Release coccinelle-0.1.8
[bpt/coccinelle.git] / docs / manual / cocci_syntax.tex
1
2 %\section{The SmPL Grammar}
3
4 % This section presents the SmPL grammar. This definition follows closely
5 % our implementation using the Menhir parser generator \cite{menhir}.
6
7 This document presents the grammar of the SmPL language used by the
8 \href{http://www.emn.fr/x-info/coccinelle}{Coccinelle tool}. For the most
9 part, the grammar is written using standard notation. In some rules,
10 however, the left-hand side is in all uppercase letters. These are
11 macros, which take one or more grammar rule right-hand-sides as
12 arguments. The grammar also uses some unspecified nonterminals, such
13 as \T{id}, \T{const}, etc. These refer to the sets suggested by
14 the name, {\em i.e.}, \T{id} refers to the set of possible
15 C-language identifiers, while \T{const} refers to the set of
16 possible C-language constants.
17 %
18 \ifhevea
19 A PDF version of this documentation is available at
20 \url{http://www.emn.fr/x-info/coccinelle/docs/cocci_syntax.pdf}.
21 \else
22 A HTML version of this documentation is available online at
23 \url{http://www.emn.fr/x-info/coccinelle/docs/cocci_syntax.html}.
24 \fi
25
26 %% \ifhevea A PDF
27 %% version of this documentation is available at
28 %% \url{http://localhost:8080/coccinelle/cocci_syntax.pdf}.\else A HTML
29 %% version of this documentation is available online at
30 %% \url{http://localhost:8080/coccinelle/cocci_syntax.html}. \fi
31
32 \section{Program}
33
34 \begin{grammar}
35 \RULE{\rt{program}}
36 \CASE{\any{\NT{include\_cocci}} \some{\NT{changeset}}}
37
38 \RULE{\rt{include\_cocci}}
39 \CASE{using \NT{string}}
40 \CASE{using \NT{pathToIsoFile}}
41
42 \RULE{\rt{changeset}}
43 \CASE{\NT{metavariables} \NT{transformation}}
44 \CASE{\NT{script\_metavariables} \T{script\_code}}
45 % \CASE{\NT{metavariables} \ANY{--- filename +++ filename} \NT{transformation}}
46 \end{grammar}
47
48 \noindent
49 \T{script\_code} is any code in the chosen scripting language. Parsing of
50 the semantic patch does not check the validity of this code; any errors are
51 first detected when the code is executed.
52
53 % Between the metavariables and the transformation rule, there can be a
54 % specification of constraints on the names of the old and new files,
55 % analogous to the filename specifications in the standard patch syntax.
56 % (see Figure \ref{scsiglue_patch}).
57
58 \section{Metavariables for transformations}
59
60 The \NT{rulename} portion of the metavariable declaration can specify
61 properties of a rule such as its name, the names of the rules that it
62 depends on, the isomorphisms to be used in processing the rule, and whether
63 quantification over paths should be universal or existential. The optional
64 annotation {\tt expression} indicates that the pattern is to be considered
65 as matching an expression, and thus can be used to avoid some parsing
66 problems.
67
68 The \NT{metadecl} portion of the metavariable declaration defines various
69 types of metavariables that will be used for matching in the transformation
70 section.
71
72 \begin{grammar}
73 \RULE{\rt{metavariables}}
74 \CASE{@@ \any{\NT{metadecl}} @@}
75 \CASE{@ \NT{rulename} @ \any{\NT{metadecl}} @@}
76
77 \RULE{\rt{rulename}}
78 \CASE{\T{id} \OPT{extends \T{id}} \OPT{depends on \NT{dep}} \opt{\NT{iso}}
79 \opt{\NT{disable-iso}} \opt{\NT{exists}} \opt{expression}}
80 \CASE{script:\T{language} \OPT{depends on \NT{dep}}}
81
82 \RULE{\rt{script\_init\_final}}
83 \CASE{initialize:\T{language}}
84 \CASE{finalize:\T{language}}
85
86 \RULE{\rt{dep}}
87 \CASE{\NT{pnrule}}
88 \CASE{\NT{dep} \&\& \NT{dep}}
89 \CASE{\NT{dep} || \NT{dep}}
90
91 \RULE{\rt{pnrule}}
92 \CASE{\T{id}}
93 \CASE{!\T{id}}
94 \CASE{ever \T{id}}
95 \CASE{never \T{id}}
96 \CASE{(\NT{dep})}
97
98 \RULE{\rt{iso}}
99 \CASE{using \NT{string} \ANY{, \NT{string}}}
100
101 \RULE{\rt{disable-iso}}
102 \CASE{disable \NT{COMMA\_LIST}\mth{(}\T{id}\mth{)}}
103
104 \RULE{\rt{exists}}
105 \CASE{exists}
106 \CASE{forall}
107 % \CASE{\opt{reverse} forall}
108
109 \RULE{\rt{COMMA\_LIST}\mth{(}\rt{elem}\mth{)}}
110 \CASE{\NT{elem} \ANY{, \NT{elem}}}
111 \end{grammar}
112
113 The keyword \KW{disable} is normally used with the names of
114 isomorphisms defined in standard.iso or whatever isomorphism file has been
115 included. There are, however, some other isomorphisms that are built into
116 the implementation of Coccinelle and that can be disabled as well. Their
117 names are given below. In each case, the text descibes the standard
118 behavior. Using \NT{disable-iso} with the given name disables this behavior.
119
120 \begin{itemize}
121 \item \KW{optional\_storage}: A SmPL function definition that does not
122 specify any visibility (i.e., static or extern), or a SmPL variable
123 declaration that does not specify any storage (i.e., auto, static,
124 register, or extern), matches a function declaration or variable
125 declaration with any visibility or storage, respectively.
126 \item \KW{optional\_qualifier}: This is similar to \KW{optional\_storage},
127 except that here is it the qualifier (i.e., const or volatile) that does
128 not have to be specified in the SmPL code, but may be present in the C code.
129 \item \KW{value\_format}: Integers in various formats, e.g., 1 and 0x1, are
130 considered to be equivalent in the matching process.
131 \item \KW{comm\_assoc}: An expression of the form \NT{exp} \NT{bin\_op}
132 \KW{...}, where \NT{bin\_op} is commutative and associative, is
133 considered to match any top-level sequence of \NT{bin\_op} operators
134 containing \NT{exp} as the top-level argument.
135 \end{itemize}
136
137 The possible types of metavariable declarations are defined by the grammar
138 rule below. Metavariables should occur at least once in the transformation
139 immediately following their declaration. Fresh metavariables must only be
140 used in {\tt +} code. These properties are not expressed in the grammar,
141 but are checked by a subsequent analysis. The metavariables are designated
142 according to the kind of terms they can match, such as a statement, an
143 identifier, or an expression. An expression metavariable can be further
144 constrained by its type.
145
146 \begin{grammar}
147 \RULE{\rt{metadecl}}
148 \CASE{fresh identifier \NT{ids} ;}
149 \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
150 \CASE{parameter \opt{list} \NT{ids} ;}
151 \CASE{parameter list [ \NT{id} ] \NT{ids} ;}
152 \CASE{type \NT{ids} ;}
153 \CASE{statement \opt{list} \NT{ids} ;}
154 \CASE{typedef \NT{ids} ;}
155 \CASE{declarer name \NT{ids} ;}
156 % \CASE{\opt{local} function \NT{pmid\_with\_not\_eq\_list} ;}
157 \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
158 \CASE{iterator name \NT{ids} ;}
159 \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
160 % \CASE{error \NT{pmid\_with\_not\_eq\_list} ; }
161 \CASE{\opt{local} idexpression \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
162 \CASE{\opt{local} idexpression \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
163 \CASE{\opt{local} idexpression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
164 \CASE{expression list \NT{ids} ;}
165 \CASE{expression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
166 \CASE{expression \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
167 \CASE{expression list [ ident ] \NT{ids} ;}
168 \CASE{\NT{ctype} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
169 \CASE{\NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
170 \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;}
171 \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
172 \CASE{constant \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
173 \CASE{constant \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;}
174 \CASE{position \opt{any} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq\_mid}\mth{)} ;}
175 \end{grammar}
176
177 \begin{grammar}
178 \RULE{\rt{ids}}
179 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{pmid}\mth{)}}
180
181 \RULE{\rt{pmid}}
182 \CASE{\T{id}}
183 \CASE{\NT{mid}}
184 % \CASE{list}
185 % \CASE{error}
186 % \CASE{type}
187
188 \RULE{\rt{mid}} \CASE{\T{rulename\_id}.\T{id}}
189
190 \RULE{\rt{pmid\_with\_not\_eq}}
191 \CASE{\NT{pmid} \OPT{!= \T{id}}}
192 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\T{id}\mth{)} \ttrb}}
193
194 \RULE{\rt{pmid\_with\_not\_ceq}}
195 \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_cst}}}
196 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_cst}\mth{)} \ttrb}}
197
198 \RULE{\rt{id\_or\_cst}}
199 \CASE{\T{id}}
200 \CASE{\T{integer}}
201
202 \RULE{\rt{pmid\_with\_not\_eq\_mid}}
203 \CASE{\NT{pmid} \OPT{!= \NT{mid}}}
204 \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{mid}\mth{)} \ttrb}}
205 \end{grammar}
206
207 Subsequently, we refer to arbitrary metavariables as
208 \mth{\msf{metaid}^{\mbox{\scriptsize{\it{ty}}}}}, where {\it{ty}}
209 indicates the {\it metakind} used in the declaration of the variable.
210 For example, \mth{\msf{metaid}^{\ssf{Type}}} refers to a metavariable
211 that was declared using \texttt{type} and stands for any type.
212
213 The \NT{ctype} and \NT{ctypes} nonterminals are used by both the grammar of
214 metavariable declarations and the grammar of transformations, and are
215 defined on page~\pageref{types}.
216
217 \section{Metavariables for scripts}
218
219 Metavariables for scripts can only be inherited from transformation rules.
220 In the spirit of scripting languages such as Python that use dynamic
221 typing, metavariables for scripts do not include type declarations.
222
223 \begin{grammar}
224 \RULE{\rt{script\_metavariables}}
225 \CASE{@ script:\NT{language} \OPT{depends on \NT{dep}} @
226 \any{\NT{script\_metadecl}} @@}
227 \CASE{@ initialize:\NT{language} @}
228 \CASE{@ finalize:\NT{language} @}
229
230 \RULE{\rt{language}} \CASE{python}
231
232 \RULE{\rt{script\_metadecl}} \CASE{\T{id} <{}< \T{rulename\_id}.\T{id} ;}
233 \end{grammar}
234
235 Currently, the only scripting language that is supported is Python. The
236 set of available scripting languages may be extended at some point.
237
238 Script rules declared with \KW{initialize} are run before the treatment of
239 any file. Script rules declared with \KW{finalize} are run when the
240 treatment of all of the files has completed. There can be at most one of
241 each per scripting language (thus currently at most one of each).
242 Initialize and finalize script rules do not have access to SmPL
243 metavariables. Nevertheless, a finalize script rule can access any
244 variables initialized by the other script rules, allowing information to be
245 transmitted from the matching process to the finalize rule.
246
247 \section{Transformation}
248
249 The transformation specification essentially has the form of C code,
250 except that lines to remove are annotated with \verb+-+ in the first
251 column, and lines to add are annotated with \verb-+-. A
252 transformation specification can also use {\em dots}, ``\verb-...-'',
253 describing an arbitrary sequence of function arguments or instructions
254 within a control-flow path. Dots may be modified with a {\tt when}
255 clause, indicating a pattern that should not occur anywhere within the
256 matched sequence. Finally, a transformation can specify a disjunction
257 of patterns, of the form \mtt{( \mth{\mita{pat}_1} | \mita{\ldots} |
258 \mth{\mita{pat}_n} )} where each \texttt{(}, \texttt{|} or
259 \texttt{)} is in column 0 or preceded by \texttt{\textbackslash}.
260
261 The grammar that we present for the transformation is not actually the
262 grammar of the SmPL code that can be written by the programmer, but is
263 instead the grammar of the slice of this consisting of the {\tt -}
264 annotated and the unannotated code (the context of the transformed lines),
265 or the {\tt +} annotated code and the unannotated code. For example, for
266 parsing purposes, the following transformation
267 %presented in Section \ref{sec:seq2}
268 is split into the two variants shown below and each is parsed
269 separately.
270
271 \begin{center}
272 \begin{tabular}{c}
273 \begin{lstlisting}[language=Cocci]
274 proc_info_func(...) {
275 <...
276 @-- hostno
277 @++ hostptr->host_no
278 ...>
279 }
280 \end{lstlisting}\\
281 \end{tabular}
282 \end{center}
283
284 {%\sizecodebis
285 \begin{center}
286 \begin{tabular}{p{5cm}p{3cm}p{5cm}}
287 \begin{lstlisting}[language=Cocci]
288 proc_info_func(...) {
289 <...
290 @-- hostno
291 ...>
292 }
293 \end{lstlisting}
294 &&
295 \begin{lstlisting}[language=Cocci]
296 proc_info_func(...) {
297 <...
298 @++ hostptr->host_no
299 ...>
300 }
301 \end{lstlisting}
302 \end{tabular}
303 \end{center}
304 }
305
306 \noindent
307 Requiring that both slices parse correctly ensures that the rule matches
308 syntactically valid C code and that it produces syntactically valid C code.
309 The generated parse trees are then merged for use in the subsequent
310 matching and transformation process.
311
312 The grammar for the minus or plus slice of a transformation is as follows:
313
314 \begin{grammar}
315
316 \RULE{\rt{transformation}}
317 \CASE{\some{\NT{include}}}
318 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
319 \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
320 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{fundecl}, \NT{when}\mth{)}}
321
322 \RULE{\rt{include}}
323 \CASE{\#include \T{include\_string}}
324
325 % \RULE{\rt{fun\_decl\_stmt}}
326 % \CASE{\NT{decl\_stmt}}
327 % \CASE{\NT{fundecl}}
328
329 % \CASE{\NT{ctype}}
330 % \CASE{\ttlb \NT{initialize\_list} \ttrb}
331 % \CASE{\NT{toplevel\_seq\_start\_after\_dots\_init}}
332 %
333 % \RULE{\rt{toplevel\_seq\_start\_after\_dots\_init}}
334 % \CASE{\NT{stmt\_dots} \NT{toplevel\_after\_dots}}
335 % \CASE{\NT{expr} \opt{\NT{toplevel\_after\_exp}}}
336 % \CASE{\NT{decl\_stmt\_expr} \opt{\NT{toplevel\_after\_stmt}}}
337 %
338 % \RULE{\rt{stmt\_dots}}
339 % \CASE{... \any{\NT{when}}}
340 % \CASE{<... \any{\NT{when}} \NT{nest\_after\_dots} ...>}
341 % \CASE{<+... \any{\NT{when}} \NT{nest\_after\_dots} ...+>}
342
343 \RULE{\rt{when}}
344 \CASE{when != \NT{when\_code}}
345 \CASE{when = \NT{rule\_elem\_stmt}}
346 \CASE{when \NT{COMMA\_LIST}\mth{(}\NT{any\_strict}\mth{)}}
347 \CASE{when true != \NT{expr}}
348 \CASE{when false != \NT{expr}}
349
350 \RULE{\rt{when\_code}}
351 \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
352 \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}}
353
354 \RULE{\rt{rule\_elem\_stmt}}
355 \CASE{\NT{one\_decl}}
356 \CASE{\NT{expr};}
357 \CASE{return \opt{\NT{expr}};}
358 \CASE{break;}
359 \CASE{continue;}
360 \CASE{\bs(\NT{rule\_elem\_stmt} \SOME{\bs| \NT{rule\_elem\_stmt}}\bs)}
361
362 \RULE{\rt{any\_strict}}
363 \CASE{any}
364 \CASE{strict}
365 \CASE{forall}
366 \CASE{exists}
367
368 % \RULE{\rt{nest\_after\_dots}}
369 % \CASE{\NT{decl\_stmt\_exp} \opt{\NT{nest\_after\_stmt}}}
370 % \CASE{\opt{\NT{exp}} \opt{\NT{nest\_after\_exp}}}
371 %
372 % \RULE{\rt{nest\_after\_stmt}}
373 % \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
374 % \CASE{\NT{decl\_stmt} \opt{\NT{nest\_after\_stmt}}}
375 %
376 % \RULE{\rt{nest\_after\_exp}}
377 % \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}}
378 %
379 % \RULE{\rt{toplevel\_after\_dots}}
380 % \CASE{\opt{\NT{toplevel\_after\_exp}}}
381 % \CASE{\NT{exp} \opt{\NT{toplevel\_after\_exp}}}
382 % \CASE{\NT{decl\_stmt\_expr} \NT{toplevel\_after\_stmt}}
383 %
384 % \RULE{\rt{toplevel\_after\_exp}}
385 % \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
386 %
387 % \RULE{\rt{decl\_stmt\_expr}}
388 % \CASE{TMetaStmList$^\ddag$}
389 % \CASE{\NT{decl\_var}}
390 % \CASE{\NT{stmt}}
391 % \CASE{(\NT{stmt\_seq} \ANY{| \NT{stmt\_seq}})}
392 %
393 % \RULE{\rt{toplevel\_after\_stmt}}
394 % \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}}
395 % \CASE{\NT{decl\_stmt} \NT{toplevel\_after\_stmt}}
396
397 \end{grammar}
398
399 \begin{grammar}
400 \RULE{\rt{OPTDOTSEQ}\mth{(}\rt{grammar\_ds}, \rt{when\_ds}\mth{)}}
401 \CASE{}\multicolumn{3}{r}{\hspace{1cm}
402 \KW{\opt{... \opt{\NT{when\_ds}}} \NT{grammar\_ds}
403 \ANY{... \opt{\NT{when\_ds}} \NT{grammar\_ds}}
404 \opt{... \opt{\NT{when\_ds}}}}
405 }
406
407 % \CASE{\opt{... \opt{\NT{when\_ds}}} \NT{grammar}
408 % \ANY{... \opt{\NT{when\_ds}} \NT{grammar}}
409 % \opt{... \opt{\NT{when\_ds}}}}
410 % \CASE{<... \any{\NT{when\_ds}} \NT{grammar} ...>}
411 % \CASE{<+... \any{\NT{when\_ds}} \NT{grammar} ...+>}
412
413 \end{grammar}
414
415 \noindent
416 Lines may be annotated with an element of the set $\{\mtt{-}, \mtt{+},
417 \mtt{*}\}$ or the singleton $\mtt{?}$, or one of each set. \mtt{?}
418 represents at most one match of the given pattern. \mtt{*} is used for
419 semantic match, \emph{i.e.}, a pattern that highlights the fragments
420 annotated with \mtt{*}, but does not perform any modification of the
421 matched code. \mtt{*} cannot be mixed with \mtt{-} and \mtt{+}. There are
422 some constraints on the use of these annotations:
423 \begin{itemize}
424 \item Dots, {\em i.e.} \texttt{...}, cannot occur on a line marked
425 \texttt{+}.
426 \item Nested dots, {\em i.e.}, dots enclosed in {\tt <} and {\tt >}, cannot
427 occur on a line with any marking.
428 \end{itemize}
429
430 Each element of a disjunction must be a proper term like an
431 expression, a statement, an identifier or a declaration. Thus, the
432 rule on the left below is not a syntaxically correct SmPL rule. One may
433 use the rule on the right instead.
434
435 \begin{center}
436 \begin{tabular}{l@{\hspace{5cm}}r}
437 \begin{lstlisting}[language=Cocci]
438 @@
439 type T;
440 T b;
441 @@
442
443 (
444 writeb(...,
445 |
446 readb(
447 )
448 @--(T)
449 b)
450 \end{lstlisting}
451 &
452 \begin{lstlisting}[language=Cocci]
453 @@
454 type T;
455 T b;
456 @@
457
458 (
459 read
460 |
461 write
462 )
463 (...,
464 @-- (T)
465 b)
466 \end{lstlisting}
467 \\
468 \end{tabular}
469 \end{center}
470
471 \section{Types}
472 \label{types}
473
474 \begin{grammar}
475
476 \RULE{\rt{ctypes}}
477 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{ctype}\mth{)}}
478
479 \RULE{\rt{ctype}}
480 \CASE{\opt{\NT{const\_vol}} \NT{generic\_ctype} \any{*}}
481 \CASE{\opt{\NT{const\_vol}} void \some{*}}
482 \CASE{(\NT{ctype} \ANY{| \NT{ctype}})}
483
484 \RULE{\rt{const\_vol}}
485 \CASE{const}
486 \CASE{volatile}
487
488 \RULE{\rt{generic\_ctype}}
489 \CASE{\NT{ctype\_qualif}}
490 \CASE{\opt{\NT{ctype\_qualif}} char}
491 \CASE{\opt{\NT{ctype\_qualif}} short}
492 \CASE{\opt{\NT{ctype\_qualif}} int}
493 \CASE{\opt{\NT{ctype\_qualif}} long}
494 \CASE{\opt{\NT{ctype\_qualif}} long long}
495 \CASE{double}
496 \CASE{float}
497 \CASE{\OPT{struct\OR union} \T{id} \OPT{\{ \any{\NT{struct\_decl\_list}} \}}}
498
499 \RULE{\rt{ctype\_qualif}}
500 \CASE{unsigned}
501 \CASE{signed}
502
503 \RULE{\rt{struct\_decl\_list}}
504 \CASE{\NT{struct\_decl\_list\_start}}
505
506 \RULE{\rt{struct\_decl\_list\_start}}
507 \CASE{\NT{struct\_decl}}
508 \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
509 \CASE{... \opt{when != \NT{struct\_decl}}$^\dag$ \opt{\NT{continue\_struct\_decl\_list}}}
510
511 \RULE{\rt{continue\_struct\_decl\_list}}
512 \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}}
513 \CASE{\NT{struct\_decl}}
514
515 \RULE{\rt{struct\_decl}}
516 \CASE{\NT{ctype} \NT{d\_ident};}
517 \CASE{\NT{fn\_ctype} (* \NT{d\_ident}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)});)}
518 \CASE{\opt{\NT{const\_vol}} \T{id} \NT{d\_ident};}
519
520 \RULE{\rt{d\_ident}}
521 \CASE{\NT{id} \any{[\opt{\NT{expr}}]}}
522
523 \RULE{\rt{fn\_ctype}}
524 \CASE{\NT{generic\_ctype} \any{*}}
525 \CASE{void \any{*}}
526
527 \RULE{\rt{name\_opt\_decl}}
528 \CASE{\NT{decl}}
529 \CASE{\NT{ctype}}
530 \CASE{\NT{fn\_ctype}}
531 \end{grammar}
532
533 $^\dag$ The optional \texttt{when} construct ends at the end of the line.
534
535 \section{Function declarations}
536
537 \begin{grammar}
538
539 \RULE{\rt{fundecl}}
540 \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
541 (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}})
542 \ttlb~\opt{\NT{stmt\_seq}} \ttrb}
543
544 \RULE{\rt{funproto}}
545 \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid}
546 (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}});}
547
548 \RULE{\rt{funinfo}}
549 \CASE{inline}
550 \CASE{\NT{storage}}
551 % \CASE{\NT{attr}}
552
553 \RULE{\rt{storage}}
554 \CASE{static}
555 \CASE{auto}
556 \CASE{register}
557 \CASE{extern}
558
559 \RULE{\rt{funid}}
560 \CASE{\T{id}}
561 \CASE{\mth{\T{metaid}^{\ssf{Id}}}}
562 % \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
563 % \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
564
565 \RULE{\rt{param}}
566 \CASE{\NT{type} \T{id}}
567 \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
568 \CASE{\mth{\T{metaid}^{\ssf{ParamList}}}}
569
570 \RULE{\rt{decl}}
571 \CASE{\NT{ctype} \NT{id}}
572 \CASE{\NT{fn\_ctype} (* \NT{id}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)})}
573 \CASE{void}
574 \CASE{\mth{\T{metaid}^{\ssf{Param}}}}
575 \end{grammar}
576
577 \begin{grammar}
578 \RULE{\rt{PARAMSEQ}\mth{(}\rt{gram\_p}, \rt{when\_p}\mth{)}}
579 \CASE{\NT{COMMA\_LIST}\mth{(}\NT{gram\_p} \OR \ldots \opt{\NT{when\_p}}\mth{)}}
580 \end{grammar}
581
582 %\newpage
583
584 \section{Declarations}
585
586 \begin{grammar}
587 \RULE{\rt{decl\_var}}
588 % \CASE{\NT{type} \opt{\NT{id} \opt{[\opt{\NT{dot\_expr}}]}
589 % \ANY{, \NT{id} \opt{[ \opt{\NT{dot\_expr}}]}}};}
590 \CASE{\NT{common\_decl}}
591 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
592 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;}
593 \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) = \NT{initialize} ;}
594 \CASE{typedef \NT{ctype} \NT{typedef\_ident} ;}
595
596 \RULE{\rt{one\_decl}}
597 \CASE{\NT{common\_decl}}
598 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{id};}
599 % \CASE{\NT{storage} \NT{ctype} \NT{id} \opt{[\opt{\NT{dot\\_expr}}]} = \NT{nest\\_expr};}
600 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} ;}
601
602 \RULE{\rt{common\_decl}}
603 \CASE{\NT{ctype};}
604 \CASE{\NT{funproto}}
605 \CASE{\opt{\NT{storage}} \NT{ctype} \NT{d\_ident} = \NT{initialize} ;}
606 \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} = \NT{initialize} ;}
607 \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) ;}
608 \CASE{\NT{decl\_ident} ( \OPT{\NT{COMMA\_LIST}\mth{(}\NT{expr}\mth{)}} ) ;}
609
610 \RULE{\rt{initialize}}
611 \CASE{\NT{dot\_expr}}
612 \CASE{\ttlb~\opt{\NT{COMMA\_LIST}\mth{(}\NT{dot\_expr}\mth{)}}~\ttrb}
613
614 \RULE{\rt{decl\_ident}}
615 \CASE{\T{DeclarerId}}
616 \CASE{\mth{\T{metaid}^{\ssf{Declarer}}}}
617 \end{grammar}
618
619 \section{Statements}
620
621 The first rule {\em statement} describes the various forms of a statement.
622 The remaining rules implement the constraints that are sensitive to the
623 context in which the statement occurs: {\em single\_statement} for a
624 context in which only one statement is allowed, and {\em decl\_statement}
625 for a context in which a declaration, statement, or sequence thereof is
626 allowed.
627
628 \begin{grammar}
629 \RULE{\rt{stmt}}
630 \CASE{\NT{include}}
631 \CASE{\mth{\T{metaid}^{\ssf{Stmt}}}}
632 \CASE{\NT{expr};}
633 \CASE{if (\NT{dot\_expr}) \NT{single\_stmt} \opt{else \NT{single\_stmt}}}
634 \CASE{for (\opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}})
635 \NT{single\_stmt}}
636 \CASE{while (\NT{dot\_expr}) \NT{single\_stmt}}
637 \CASE{do \NT{single\_stmt} while (\NT{dot\_expr});}
638 \CASE{\NT{iter\_ident} (\any{\NT{dot\_expr}}) \NT{single\_stmt}}
639 \CASE{switch (\opt{\NT{dot\_expr}}) \ttlb \any{\NT{case\_line}} \ttrb}
640 \CASE{return \opt{\NT{dot\_expr}};}
641 \CASE{\ttlb~\opt{\NT{stmt\_seq}} \ttrb}
642 \CASE{\NT{NEST}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}}
643 \CASE{\NT{NEST}\mth{(}\NT{expr}, \NT{when}\mth{)}}
644 \CASE{break;}
645 \CASE{continue;}
646 \CASE{\NT{id}:}
647 \CASE{goto \NT{id};}
648 \CASE{\ttlb \NT{stmt\_seq} \ttrb}
649
650 \RULE{\rt{single\_stmt}}
651 \CASE{\NT{stmt}}
652 \CASE{\NT{OR}\mth{(}\NT{stmt}\mth{)}}
653
654 \RULE{\rt{decl\_stmt}}
655 \CASE{\mth{\T{metaid}^{\ssf{StmtList}}}}
656 \CASE{\NT{decl\_var}}
657 \CASE{\NT{stmt}}
658 \CASE{\NT{OR}\mth{(}\NT{stmt\_seq}\mth{)}}
659
660 \RULE{\rt{stmt\_seq}}
661 \CASE{\any{\NT{decl\_stmt}}
662 \opt{\NT{DOTSEQ}\mth{(}\some{\NT{decl\_stmt}},
663 \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
664 \CASE{\any{\NT{decl\_stmt}}
665 \opt{\NT{DOTSEQ}\mth{(}\NT{expr},
666 \NT{when}\mth{)} \any{\NT{decl\_stmt}}}}
667
668 \RULE{\rt{case\_line}}
669 \CASE{default :~\NT{stmt\_seq}}
670 \CASE{case \NT{dot\_expr} :~\NT{stmt\_seq}}
671
672 \RULE{\rt{iter\_ident}}
673 \CASE{\T{IteratorId}}
674 \CASE{\mth{\T{metaid}^{\ssf{Iterator}}}}
675 \end{grammar}
676
677 \begin{grammar}
678 \RULE{\rt{OR}\mth{(}\rt{gram\_o}\mth{)}}
679 \CASE{( \NT{gram\_o} \ANY{\ttmid \NT{gram\_o}})}
680
681 \RULE{\rt{DOTSEQ}\mth{(}\rt{gram\_d}, \rt{when\_d}\mth{)}}
682 \CASE{\ldots \opt{\NT{when\_d}} \ANY{\NT{gram\_d} \ldots \opt{\NT{when\_d}}}}
683
684 \RULE{\rt{NEST}\mth{(}\rt{gram\_n}, \rt{when\_n}\mth{)}}
685 \CASE{<\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots>}
686 \CASE{<+\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots+>}
687 \end{grammar}
688
689 \noindent
690 OR is a macro that generates a disjunction of patterns. The three
691 tokens \T{(}, \T{\ttmid}, and \T{)} must appear in the leftmost
692 column, to differentiate them from the parentheses and bit-or tokens
693 that can appear within expressions (and cannot appear in the leftmost
694 column). These token may also be preceded by \texttt{\bs}
695 when they are used in an other column. These tokens are furthermore
696 different from (, \(\mid\), and ), which are part of the grammar
697 metalanguage.
698
699 \section{Expressions}
700
701 A nest or a single ellipsis is allowed in some expression contexts, and
702 causes ambiguity in others. For example, in a sequence \mtt{\ldots
703 \mita{expr} \ldots}, the nonterminal \mita{expr} must be instantiated as an
704 explicit C-language expression, while in an array reference,
705 \mtt{\mth{\mita{expr}_1} \mtt{[} \mth{\mita{expr}_2} \mtt{]}}, the
706 nonterminal \mth{\mita{expr}_2}, because it is delimited by brackets, can
707 be also instantiated as \mtt{\ldots}, representing an arbitrary expression. To
708 distinguish between the various possibilities, we define three nonterminals
709 for expressions: {\em expr} does not allow either top-level nests or
710 ellipses, {\em nest\_expr} allows a nest but not an ellipsis, and {\em
711 dot\_expr} allows both. The EXPR macro is used to express these variants
712 in a concise way.
713
714 \begin{grammar}
715 \RULE{\rt{expr}}
716 \CASE{\NT{EXPR}\mth{(}\NT{expr}\mth{)}}
717
718 \RULE{\rt{nest\_expr}}
719 \CASE{\NT{EXPR}\mth{(}\NT{nest\_expr}\mth{)}}
720 \CASE{\NT{NEST}\mth{(}\NT{nest\_expr}, \NT{exp\_whencode}\mth{)}}
721
722 \RULE{\rt{dot\_expr}}
723 \CASE{\NT{EXPR}\mth{(}\NT{dot\_expr}\mth{)}}
724 \CASE{\NT{NEST}\mth{(}\NT{dot\_expr}, \NT{exp\_whencode}\mth{)}}
725 \CASE{...~\opt{\NT{exp\_whencode}}}
726
727 \RULE{\rt{EXPR}\mth{(}\rt{exp}\mth{)}}
728 \CASE{\NT{exp} \NT{assign\_op} \NT{exp}}
729 \CASE{\NT{exp}++}
730 \CASE{\NT{exp}--}
731 \CASE{\NT{unary\_op} \NT{exp}}
732 \CASE{\NT{exp} \NT{bin\_op} \NT{exp}}
733 \CASE{\NT{exp} ?~\NT{dot\_expr} :~\NT{exp}}
734 \CASE{(\NT{type}) \NT{exp}}
735 \CASE{\NT{exp} [\NT{dot\_expr}]}
736 \CASE{\NT{exp} .~\NT{id}}
737 \CASE{\NT{exp} -> \NT{id}}
738 \CASE{\NT{exp}(\opt{\NT{PARAMSEQ}\mth{(}\NT{arg}, \NT{exp\_whencode}\mth{)}})}
739 \CASE{\NT{id}}
740 % \CASE{\mth{\T{metaid}^{\ssf{Func}}}}
741 % \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}}
742 \CASE{\mth{\T{metaid}^{\ssf{Exp}}}}
743 % \CASE{\mth{\T{metaid}^{\ssf{Err}}}}
744 \CASE{\mth{\T{metaid}^{\ssf{Const}}}}
745 \CASE{\NT{const}}
746 \CASE{(\NT{dot\_expr})}
747 \CASE{\NT{OR}\mth{(}\NT{exp}\mth{)}}
748
749 \RULE{\rt{arg}}
750 \CASE{\NT{nest\_expr}}
751 \CASE{\mth{\T{metaid}^{\ssf{ExpList}}}}
752
753 \RULE{\rt{exp\_whencode}}
754 \CASE{when != \NT{expr}}
755
756 \RULE{\rt{assign\_op}}
757 \CASE{= \OR -= \OR += \OR *= \OR /= \OR \%=}
758 \CASE{\&= \OR |= \OR \caret= \OR \lt\lt= \OR \gt\gt=}
759
760 \RULE{\rt{bin\_op}}
761 \CASE{* \OR / \OR \% \OR + \OR -}
762 \CASE{\lt\lt \OR \gt\gt \OR \caret\xspace \OR \& \OR \ttmid}
763 \CASE{< \OR > \OR <= \OR >= \OR == \OR != \OR \&\& \OR \ttmid\ttmid}
764
765 \RULE{\rt{unary\_op}}
766 \CASE{++ \OR -- \OR \& \OR * \OR + \OR - \OR !}
767
768 \end{grammar}
769
770 \section{Constant, Identifiers and Types for Transformations}
771
772 \begin{grammar}
773 \RULE{\rt{const}}
774 \CASE{\NT{string}}
775 \CASE{[0-9]+}
776 \CASE{\mth{\cdots}}
777
778 \RULE{\rt{string}}
779 \CASE{"\any{[\^{}"]}"}
780
781 \RULE{\rt{id}}
782 \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Id}}}}
783
784 \RULE{\rt{typedef\_ident}}
785 \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Type}}}}
786
787 \RULE{\rt{type}}
788 \CASE{\NT{ctype} \OR \mth{\T{metaid}^{\ssf{Type}}}}
789
790 \RULE{\rt{pathToIsoFile}}
791 \CASE{<.*>}
792 \end{grammar}
793
794
795 %%% Local Variables:
796 %%% mode: LaTeX
797 %%% TeX-master: "main_grammar"
798 %%% coding: latin-9
799 %%% TeX-PDF-mode: t
800 %%% ispell-local-dictionary: "american"
801 %%% End: