Commit | Line | Data |
---|---|---|
faf9a90c C |
1 | |
2 | %\section{The SmPL Grammar} | |
3 | ||
4 | % This section presents the SmPL grammar. This definition follows closely | |
5 | % our implementation using the Menhir parser generator \cite{menhir}. | |
6 | ||
7 | This document presents the grammar of the SmPL language used by the | |
8 | \href{http://www.emn.fr/x-info/coccinelle}{Coccinelle tool}. For the most | |
9 | part, the grammar is written using standard notation. In some rules, | |
10 | however, the left-hand side is in all uppercase letters. These are | |
11 | macros, which take one or more grammar rule right-hand-sides as | |
12 | arguments. The grammar also uses some unspecified nonterminals, such | |
b1b2de81 C |
13 | as \T{id}, \T{const}, etc. These refer to the sets suggested by |
14 | the name, {\em i.e.}, \T{id} refers to the set of possible | |
15 | C-language identifiers, while \T{const} refers to the set of | |
708f4980 C |
16 | possible C-language constants. |
17 | % | |
18 | \ifhevea | |
19 | A PDF version of this documentation is available at | |
20 | \url{http://www.emn.fr/x-info/coccinelle/docs/cocci_syntax.pdf}. | |
21 | \else | |
faf9a90c | 22 | A HTML version of this documentation is available online at |
708f4980 C |
23 | \url{http://www.emn.fr/x-info/coccinelle/docs/cocci_syntax.html}. |
24 | \fi | |
faf9a90c C |
25 | |
26 | %% \ifhevea A PDF | |
27 | %% version of this documentation is available at | |
28 | %% \url{http://localhost:8080/coccinelle/cocci_syntax.pdf}.\else A HTML | |
29 | %% version of this documentation is available online at | |
30 | %% \url{http://localhost:8080/coccinelle/cocci_syntax.html}. \fi | |
31 | ||
32 | \section{Program} | |
33 | ||
34 | \begin{grammar} | |
35 | \RULE{\rt{program}} | |
36 | \CASE{\any{\NT{include\_cocci}} \some{\NT{changeset}}} | |
37 | ||
38 | \RULE{\rt{include\_cocci}} | |
39 | \CASE{using \NT{string}} | |
40 | \CASE{using \NT{pathToIsoFile}} | |
41 | ||
42 | \RULE{\rt{changeset}} | |
43 | \CASE{\NT{metavariables} \NT{transformation}} | |
b1b2de81 | 44 | \CASE{\NT{script\_metavariables} \T{script\_code}} |
faf9a90c | 45 | % \CASE{\NT{metavariables} \ANY{--- filename +++ filename} \NT{transformation}} |
faf9a90c C |
46 | \end{grammar} |
47 | ||
b1b2de81 C |
48 | \noindent |
49 | \T{script\_code} is any code in the chosen scripting language. Parsing of | |
50 | the semantic patch does not check the validity of this code; any errors are | |
51 | first detected when the code is executed. | |
52 | ||
faf9a90c C |
53 | % Between the metavariables and the transformation rule, there can be a |
54 | % specification of constraints on the names of the old and new files, | |
55 | % analogous to the filename specifications in the standard patch syntax. | |
56 | % (see Figure \ref{scsiglue_patch}). | |
57 | ||
b1b2de81 | 58 | \section{Metavariables for transformations} |
faf9a90c C |
59 | |
60 | The \NT{rulename} portion of the metavariable declaration can specify | |
61 | properties of a rule such as its name, the names of the rules that it | |
62 | depends on, the isomorphisms to be used in processing the rule, and whether | |
63 | quantification over paths should be universal or existential. The optional | |
64 | annotation {\tt expression} indicates that the pattern is to be considered | |
65 | as matching an expression, and thus can be used to avoid some parsing | |
66 | problems. | |
67 | ||
68 | The \NT{metadecl} portion of the metavariable declaration defines various | |
69 | types of metavariables that will be used for matching in the transformation | |
70 | section. | |
71 | ||
72 | \begin{grammar} | |
73 | \RULE{\rt{metavariables}} | |
74 | \CASE{@@ \any{\NT{metadecl}} @@} | |
75 | \CASE{@ \NT{rulename} @ \any{\NT{metadecl}} @@} | |
76 | ||
77 | \RULE{\rt{rulename}} | |
78 | \CASE{\T{id} \OPT{extends \T{id}} \OPT{depends on \NT{dep}} \opt{\NT{iso}} | |
79 | \opt{\NT{disable-iso}} \opt{\NT{exists}} \opt{expression}} | |
80 | \CASE{script:\T{language} \OPT{depends on \NT{dep}}} | |
81 | ||
b1b2de81 C |
82 | \RULE{\rt{script\_init\_final}} |
83 | \CASE{initialize:\T{language}} | |
84 | \CASE{finalize:\T{language}} | |
85 | ||
faf9a90c C |
86 | \RULE{\rt{dep}} |
87 | \CASE{\NT{pnrule}} | |
88 | \CASE{\NT{dep} \&\& \NT{dep}} | |
89 | \CASE{\NT{dep} || \NT{dep}} | |
90 | ||
91 | \RULE{\rt{pnrule}} | |
92 | \CASE{\T{id}} | |
93 | \CASE{!\T{id}} | |
94 | \CASE{ever \T{id}} | |
95 | \CASE{never \T{id}} | |
96 | \CASE{(\NT{dep})} | |
97 | ||
98 | \RULE{\rt{iso}} | |
99 | \CASE{using \NT{string} \ANY{, \NT{string}}} | |
100 | ||
101 | \RULE{\rt{disable-iso}} | |
102 | \CASE{disable \NT{COMMA\_LIST}\mth{(}\T{id}\mth{)}} | |
103 | ||
104 | \RULE{\rt{exists}} | |
105 | \CASE{exists} | |
106 | \CASE{forall} | |
107 | % \CASE{\opt{reverse} forall} | |
108 | ||
109 | \RULE{\rt{COMMA\_LIST}\mth{(}\rt{elem}\mth{)}} | |
110 | \CASE{\NT{elem} \ANY{, \NT{elem}}} | |
111 | \end{grammar} | |
112 | ||
b1b2de81 | 113 | The keyword \KW{disable} is normally used with the names of |
faf9a90c C |
114 | isomorphisms defined in standard.iso or whatever isomorphism file has been |
115 | included. There are, however, some other isomorphisms that are built into | |
116 | the implementation of Coccinelle and that can be disabled as well. Their | |
117 | names are given below. In each case, the text descibes the standard | |
118 | behavior. Using \NT{disable-iso} with the given name disables this behavior. | |
119 | ||
120 | \begin{itemize} | |
121 | \item \KW{optional\_storage}: A SmPL function definition that does not | |
122 | specify any visibility (i.e., static or extern), or a SmPL variable | |
123 | declaration that does not specify any storage (i.e., auto, static, | |
124 | register, or extern), matches a function declaration or variable | |
125 | declaration with any visibility or storage, respectively. | |
126 | \item \KW{optional\_qualifier}: This is similar to \KW{optional\_storage}, | |
127 | except that here is it the qualifier (i.e., const or volatile) that does | |
128 | not have to be specified in the SmPL code, but may be present in the C code. | |
129 | \item \KW{value\_format}: Integers in various formats, e.g., 1 and 0x1, are | |
130 | considered to be equivalent in the matching process. | |
131 | \item \KW{comm\_assoc}: An expression of the form \NT{exp} \NT{bin\_op} | |
132 | \KW{...}, where \NT{bin\_op} is commutative and associative, is | |
133 | considered to match any top-level sequence of \NT{bin\_op} operators | |
134 | containing \NT{exp} as the top-level argument. | |
135 | \end{itemize} | |
136 | ||
137 | The possible types of metavariable declarations are defined by the grammar | |
138 | rule below. Metavariables should occur at least once in the transformation | |
139 | immediately following their declaration. Fresh metavariables must only be | |
140 | used in {\tt +} code. These properties are not expressed in the grammar, | |
141 | but are checked by a subsequent analysis. The metavariables are designated | |
142 | according to the kind of terms they can match, such as a statement, an | |
143 | identifier, or an expression. An expression metavariable can be further | |
144 | constrained by its type. | |
145 | ||
146 | \begin{grammar} | |
147 | \RULE{\rt{metadecl}} | |
148 | \CASE{fresh identifier \NT{ids} ;} | |
149 | \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
150 | \CASE{parameter \opt{list} \NT{ids} ;} | |
151 | \CASE{parameter list [ \NT{id} ] \NT{ids} ;} | |
152 | \CASE{type \NT{ids} ;} | |
153 | \CASE{statement \opt{list} \NT{ids} ;} | |
154 | \CASE{typedef \NT{ids} ;} | |
155 | \CASE{declarer name \NT{ids} ;} | |
156 | % \CASE{\opt{local} function \NT{pmid\_with\_not\_eq\_list} ;} | |
157 | \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
158 | \CASE{iterator name \NT{ids} ;} | |
159 | \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
160 | % \CASE{error \NT{pmid\_with\_not\_eq\_list} ; } | |
161 | \CASE{\opt{local} idexpression \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
162 | \CASE{\opt{local} idexpression \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
163 | \CASE{\opt{local} idexpression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
164 | \CASE{expression list \NT{ids} ;} | |
165 | \CASE{expression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
166 | \CASE{expression \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;} | |
167 | \CASE{expression list [ ident ] \NT{ids} ;} | |
168 | \CASE{\NT{ctype} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
169 | \CASE{\NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;} | |
170 | \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;} | |
171 | \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
172 | \CASE{constant \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
173 | \CASE{constant \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
174 | \CASE{position \opt{any} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq\_mid}\mth{)} ;} | |
175 | \end{grammar} | |
176 | ||
177 | \begin{grammar} | |
178 | \RULE{\rt{ids}} | |
179 | \CASE{\NT{COMMA\_LIST}\mth{(}\NT{pmid}\mth{)}} | |
180 | ||
181 | \RULE{\rt{pmid}} | |
182 | \CASE{\T{id}} | |
183 | \CASE{\NT{mid}} | |
184 | % \CASE{list} | |
185 | % \CASE{error} | |
186 | % \CASE{type} | |
187 | ||
188 | \RULE{\rt{mid}} \CASE{\T{rulename\_id}.\T{id}} | |
189 | ||
190 | \RULE{\rt{pmid\_with\_not\_eq}} | |
191 | \CASE{\NT{pmid} \OPT{!= \T{id}}} | |
192 | \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\T{id}\mth{)} \ttrb}} | |
193 | ||
194 | \RULE{\rt{pmid\_with\_not\_ceq}} | |
195 | \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_cst}}} | |
196 | \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_cst}\mth{)} \ttrb}} | |
197 | ||
198 | \RULE{\rt{id\_or\_cst}} | |
199 | \CASE{\T{id}} | |
200 | \CASE{\T{integer}} | |
201 | ||
202 | \RULE{\rt{pmid\_with\_not\_eq\_mid}} | |
203 | \CASE{\NT{pmid} \OPT{!= \NT{mid}}} | |
204 | \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{mid}\mth{)} \ttrb}} | |
205 | \end{grammar} | |
206 | ||
207 | Subsequently, we refer to arbitrary metavariables as | |
208 | \mth{\msf{metaid}^{\mbox{\scriptsize{\it{ty}}}}}, where {\it{ty}} | |
209 | indicates the {\it metakind} used in the declaration of the variable. | |
210 | For example, \mth{\msf{metaid}^{\ssf{Type}}} refers to a metavariable | |
211 | that was declared using \texttt{type} and stands for any type. | |
212 | ||
213 | The \NT{ctype} and \NT{ctypes} nonterminals are used by both the grammar of | |
214 | metavariable declarations and the grammar of transformations, and are | |
215 | defined on page~\pageref{types}. | |
216 | ||
b1b2de81 C |
217 | \section{Metavariables for scripts} |
218 | ||
219 | Metavariables for scripts can only be inherited from transformation rules. | |
220 | In the spirit of scripting languages such as Python that use dynamic | |
221 | typing, metavariables for scripts do not include type declarations. | |
222 | ||
223 | \begin{grammar} | |
224 | \RULE{\rt{script\_metavariables}} | |
225 | \CASE{@ script:\NT{language} \OPT{depends on \NT{dep}} @ | |
226 | \any{\NT{script\_metadecl}} @@} | |
227 | \CASE{@ initialize:\NT{language} @} | |
228 | \CASE{@ finalize:\NT{language} @} | |
229 | ||
230 | \RULE{\rt{language}} \CASE{python} | |
231 | ||
232 | \RULE{\rt{script\_metadecl}} \CASE{\T{id} <{}< \T{rulename\_id}.\T{id} ;} | |
233 | \end{grammar} | |
234 | ||
235 | Currently, the only scripting language that is supported is Python. The | |
236 | set of available scripting languages may be extended at some point. | |
237 | ||
238 | Script rules declared with \KW{initialize} are run before the treatment of | |
239 | any file. Script rules declared with \KW{finalize} are run when the | |
240 | treatment of all of the files has completed. There can be at most one of | |
241 | each per scripting language (thus currently at most one of each). | |
242 | Initialize and finalize script rules do not have access to SmPL | |
243 | metavariables. Nevertheless, a finalize script rule can access any | |
244 | variables initialized by the other script rules, allowing information to be | |
245 | transmitted from the matching process to the finalize rule. | |
246 | ||
faf9a90c C |
247 | \section{Transformation} |
248 | ||
249 | The transformation specification essentially has the form of C code, | |
250 | except that lines to remove are annotated with \verb+-+ in the first | |
251 | column, and lines to add are annotated with \verb-+-. A | |
252 | transformation specification can also use {\em dots}, ``\verb-...-'', | |
253 | describing an arbitrary sequence of function arguments or instructions | |
254 | within a control-flow path. Dots may be modified with a {\tt when} | |
255 | clause, indicating a pattern that should not occur anywhere within the | |
256 | matched sequence. Finally, a transformation can specify a disjunction | |
257 | of patterns, of the form \mtt{( \mth{\mita{pat}_1} | \mita{\ldots} | | |
258 | \mth{\mita{pat}_n} )} where each \texttt{(}, \texttt{|} or | |
259 | \texttt{)} is in column 0 or preceded by \texttt{\textbackslash}. | |
260 | ||
261 | The grammar that we present for the transformation is not actually the | |
262 | grammar of the SmPL code that can be written by the programmer, but is | |
263 | instead the grammar of the slice of this consisting of the {\tt -} | |
264 | annotated and the unannotated code (the context of the transformed lines), | |
265 | or the {\tt +} annotated code and the unannotated code. For example, for | |
266 | parsing purposes, the following transformation | |
267 | %presented in Section \ref{sec:seq2} | |
268 | is split into the two variants shown below and each is parsed | |
269 | separately. | |
270 | ||
271 | \begin{center} | |
272 | \begin{tabular}{c} | |
273 | \begin{lstlisting}[language=Cocci] | |
274 | proc_info_func(...) { | |
275 | <... | |
276 | @-- hostno | |
277 | @++ hostptr->host_no | |
278 | ...> | |
279 | } | |
280 | \end{lstlisting}\\ | |
281 | \end{tabular} | |
282 | \end{center} | |
283 | ||
284 | {%\sizecodebis | |
285 | \begin{center} | |
286 | \begin{tabular}{p{5cm}p{3cm}p{5cm}} | |
287 | \begin{lstlisting}[language=Cocci] | |
288 | proc_info_func(...) { | |
289 | <... | |
290 | @-- hostno | |
291 | ...> | |
292 | } | |
293 | \end{lstlisting} | |
294 | && | |
295 | \begin{lstlisting}[language=Cocci] | |
296 | proc_info_func(...) { | |
297 | <... | |
298 | @++ hostptr->host_no | |
299 | ...> | |
300 | } | |
301 | \end{lstlisting} | |
302 | \end{tabular} | |
303 | \end{center} | |
304 | } | |
305 | ||
306 | \noindent | |
307 | Requiring that both slices parse correctly ensures that the rule matches | |
308 | syntactically valid C code and that it produces syntactically valid C code. | |
309 | The generated parse trees are then merged for use in the subsequent | |
310 | matching and transformation process. | |
311 | ||
312 | The grammar for the minus or plus slice of a transformation is as follows: | |
313 | ||
314 | \begin{grammar} | |
315 | ||
316 | \RULE{\rt{transformation}} | |
317 | \CASE{\some{\NT{include}}} | |
318 | \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}} | |
319 | \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}} | |
320 | \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{fundecl}, \NT{when}\mth{)}} | |
321 | ||
322 | \RULE{\rt{include}} | |
323 | \CASE{\#include \T{include\_string}} | |
324 | ||
325 | % \RULE{\rt{fun\_decl\_stmt}} | |
326 | % \CASE{\NT{decl\_stmt}} | |
327 | % \CASE{\NT{fundecl}} | |
328 | ||
329 | % \CASE{\NT{ctype}} | |
330 | % \CASE{\ttlb \NT{initialize\_list} \ttrb} | |
331 | % \CASE{\NT{toplevel\_seq\_start\_after\_dots\_init}} | |
332 | % | |
333 | % \RULE{\rt{toplevel\_seq\_start\_after\_dots\_init}} | |
334 | % \CASE{\NT{stmt\_dots} \NT{toplevel\_after\_dots}} | |
335 | % \CASE{\NT{expr} \opt{\NT{toplevel\_after\_exp}}} | |
336 | % \CASE{\NT{decl\_stmt\_expr} \opt{\NT{toplevel\_after\_stmt}}} | |
337 | % | |
338 | % \RULE{\rt{stmt\_dots}} | |
339 | % \CASE{... \any{\NT{when}}} | |
340 | % \CASE{<... \any{\NT{when}} \NT{nest\_after\_dots} ...>} | |
341 | % \CASE{<+... \any{\NT{when}} \NT{nest\_after\_dots} ...+>} | |
342 | ||
343 | \RULE{\rt{when}} | |
344 | \CASE{when != \NT{when\_code}} | |
345 | \CASE{when = \NT{rule\_elem\_stmt}} | |
346 | \CASE{when \NT{COMMA\_LIST}\mth{(}\NT{any\_strict}\mth{)}} | |
347 | \CASE{when true != \NT{expr}} | |
348 | \CASE{when false != \NT{expr}} | |
349 | ||
350 | \RULE{\rt{when\_code}} | |
351 | \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}} | |
352 | \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}} | |
353 | ||
354 | \RULE{\rt{rule\_elem\_stmt}} | |
355 | \CASE{\NT{one\_decl}} | |
356 | \CASE{\NT{expr};} | |
357 | \CASE{return \opt{\NT{expr}};} | |
358 | \CASE{break;} | |
359 | \CASE{continue;} | |
360 | \CASE{\bs(\NT{rule\_elem\_stmt} \SOME{\bs| \NT{rule\_elem\_stmt}}\bs)} | |
361 | ||
362 | \RULE{\rt{any\_strict}} | |
363 | \CASE{any} | |
364 | \CASE{strict} | |
365 | \CASE{forall} | |
366 | \CASE{exists} | |
367 | ||
368 | % \RULE{\rt{nest\_after\_dots}} | |
369 | % \CASE{\NT{decl\_stmt\_exp} \opt{\NT{nest\_after\_stmt}}} | |
370 | % \CASE{\opt{\NT{exp}} \opt{\NT{nest\_after\_exp}}} | |
371 | % | |
372 | % \RULE{\rt{nest\_after\_stmt}} | |
373 | % \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}} | |
374 | % \CASE{\NT{decl\_stmt} \opt{\NT{nest\_after\_stmt}}} | |
375 | % | |
376 | % \RULE{\rt{nest\_after\_exp}} | |
377 | % \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}} | |
378 | % | |
379 | % \RULE{\rt{toplevel\_after\_dots}} | |
380 | % \CASE{\opt{\NT{toplevel\_after\_exp}}} | |
381 | % \CASE{\NT{exp} \opt{\NT{toplevel\_after\_exp}}} | |
382 | % \CASE{\NT{decl\_stmt\_expr} \NT{toplevel\_after\_stmt}} | |
383 | % | |
384 | % \RULE{\rt{toplevel\_after\_exp}} | |
385 | % \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}} | |
386 | % | |
387 | % \RULE{\rt{decl\_stmt\_expr}} | |
388 | % \CASE{TMetaStmList$^\ddag$} | |
389 | % \CASE{\NT{decl\_var}} | |
390 | % \CASE{\NT{stmt}} | |
391 | % \CASE{(\NT{stmt\_seq} \ANY{| \NT{stmt\_seq}})} | |
392 | % | |
393 | % \RULE{\rt{toplevel\_after\_stmt}} | |
394 | % \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}} | |
395 | % \CASE{\NT{decl\_stmt} \NT{toplevel\_after\_stmt}} | |
396 | ||
397 | \end{grammar} | |
398 | ||
399 | \begin{grammar} | |
400 | \RULE{\rt{OPTDOTSEQ}\mth{(}\rt{grammar\_ds}, \rt{when\_ds}\mth{)}} | |
401 | \CASE{}\multicolumn{3}{r}{\hspace{1cm} | |
402 | \KW{\opt{... \opt{\NT{when\_ds}}} \NT{grammar\_ds} | |
403 | \ANY{... \opt{\NT{when\_ds}} \NT{grammar\_ds}} | |
404 | \opt{... \opt{\NT{when\_ds}}}} | |
405 | } | |
406 | ||
407 | % \CASE{\opt{... \opt{\NT{when\_ds}}} \NT{grammar} | |
408 | % \ANY{... \opt{\NT{when\_ds}} \NT{grammar}} | |
409 | % \opt{... \opt{\NT{when\_ds}}}} | |
410 | % \CASE{<... \any{\NT{when\_ds}} \NT{grammar} ...>} | |
411 | % \CASE{<+... \any{\NT{when\_ds}} \NT{grammar} ...+>} | |
412 | ||
413 | \end{grammar} | |
414 | ||
415 | \noindent | |
416 | Lines may be annotated with an element of the set $\{\mtt{-}, \mtt{+}, | |
417 | \mtt{*}\}$ or the singleton $\mtt{?}$, or one of each set. \mtt{?} | |
418 | represents at most one match of the given pattern. \mtt{*} is used for | |
419 | semantic match, \emph{i.e.}, a pattern that highlights the fragments | |
420 | annotated with \mtt{*}, but does not perform any modification of the | |
421 | matched code. \mtt{*} cannot be mixed with \mtt{-} and \mtt{+}. There are | |
422 | some constraints on the use of these annotations: | |
423 | \begin{itemize} | |
424 | \item Dots, {\em i.e.} \texttt{...}, cannot occur on a line marked | |
425 | \texttt{+}. | |
426 | \item Nested dots, {\em i.e.}, dots enclosed in {\tt <} and {\tt >}, cannot | |
427 | occur on a line with any marking. | |
428 | \end{itemize} | |
429 | ||
0708f913 C |
430 | Each element of a disjunction must be a proper term like an |
431 | expression, a statement, an identifier or a declaration. Thus, the | |
432 | rule on the left below is not a syntaxically correct SmPL rule. One may | |
433 | use the rule on the right instead. | |
434 | ||
435 | \begin{center} | |
436 | \begin{tabular}{l@{\hspace{5cm}}r} | |
437 | \begin{lstlisting}[language=Cocci] | |
438 | @@ | |
439 | type T; | |
440 | T b; | |
441 | @@ | |
442 | ||
443 | ( | |
444 | writeb(..., | |
445 | | | |
446 | readb( | |
447 | ) | |
448 | @--(T) | |
449 | b) | |
450 | \end{lstlisting} | |
451 | & | |
452 | \begin{lstlisting}[language=Cocci] | |
453 | @@ | |
454 | type T; | |
455 | T b; | |
456 | @@ | |
457 | ||
458 | ( | |
459 | read | |
460 | | | |
461 | write | |
462 | ) | |
463 | (..., | |
464 | @-- (T) | |
465 | b) | |
466 | \end{lstlisting} | |
467 | \\ | |
468 | \end{tabular} | |
469 | \end{center} | |
470 | ||
faf9a90c C |
471 | \section{Types} |
472 | \label{types} | |
473 | ||
474 | \begin{grammar} | |
475 | ||
476 | \RULE{\rt{ctypes}} | |
477 | \CASE{\NT{COMMA\_LIST}\mth{(}\NT{ctype}\mth{)}} | |
478 | ||
479 | \RULE{\rt{ctype}} | |
480 | \CASE{\opt{\NT{const\_vol}} \NT{generic\_ctype} \any{*}} | |
481 | \CASE{\opt{\NT{const\_vol}} void \some{*}} | |
482 | \CASE{(\NT{ctype} \ANY{| \NT{ctype}})} | |
483 | ||
484 | \RULE{\rt{const\_vol}} | |
485 | \CASE{const} | |
486 | \CASE{volatile} | |
487 | ||
488 | \RULE{\rt{generic\_ctype}} | |
489 | \CASE{\NT{ctype\_qualif}} | |
490 | \CASE{\opt{\NT{ctype\_qualif}} char} | |
491 | \CASE{\opt{\NT{ctype\_qualif}} short} | |
492 | \CASE{\opt{\NT{ctype\_qualif}} int} | |
493 | \CASE{\opt{\NT{ctype\_qualif}} long} | |
494 | \CASE{\opt{\NT{ctype\_qualif}} long long} | |
495 | \CASE{double} | |
496 | \CASE{float} | |
497 | \CASE{\OPT{struct\OR union} \T{id} \OPT{\{ \any{\NT{struct\_decl\_list}} \}}} | |
498 | ||
499 | \RULE{\rt{ctype\_qualif}} | |
500 | \CASE{unsigned} | |
501 | \CASE{signed} | |
502 | ||
503 | \RULE{\rt{struct\_decl\_list}} | |
504 | \CASE{\NT{struct\_decl\_list\_start}} | |
505 | ||
506 | \RULE{\rt{struct\_decl\_list\_start}} | |
507 | \CASE{\NT{struct\_decl}} | |
508 | \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}} | |
509 | \CASE{... \opt{when != \NT{struct\_decl}}$^\dag$ \opt{\NT{continue\_struct\_decl\_list}}} | |
510 | ||
511 | \RULE{\rt{continue\_struct\_decl\_list}} | |
512 | \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}} | |
513 | \CASE{\NT{struct\_decl}} | |
514 | ||
515 | \RULE{\rt{struct\_decl}} | |
516 | \CASE{\NT{ctype} \NT{d\_ident};} | |
517 | \CASE{\NT{fn\_ctype} (* \NT{d\_ident}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)});)} | |
518 | \CASE{\opt{\NT{const\_vol}} \T{id} \NT{d\_ident};} | |
519 | ||
520 | \RULE{\rt{d\_ident}} | |
521 | \CASE{\NT{id} \any{[\opt{\NT{expr}}]}} | |
522 | ||
523 | \RULE{\rt{fn\_ctype}} | |
524 | \CASE{\NT{generic\_ctype} \any{*}} | |
525 | \CASE{void \any{*}} | |
526 | ||
527 | \RULE{\rt{name\_opt\_decl}} | |
528 | \CASE{\NT{decl}} | |
529 | \CASE{\NT{ctype}} | |
530 | \CASE{\NT{fn\_ctype}} | |
531 | \end{grammar} | |
532 | ||
533 | $^\dag$ The optional \texttt{when} construct ends at the end of the line. | |
534 | ||
535 | \section{Function declarations} | |
536 | ||
537 | \begin{grammar} | |
538 | ||
539 | \RULE{\rt{fundecl}} | |
540 | \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid} | |
541 | (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}}) | |
542 | \ttlb~\opt{\NT{stmt\_seq}} \ttrb} | |
543 | ||
544 | \RULE{\rt{funproto}} | |
545 | \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid} | |
546 | (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}});} | |
547 | ||
548 | \RULE{\rt{funinfo}} | |
549 | \CASE{inline} | |
550 | \CASE{\NT{storage}} | |
551 | % \CASE{\NT{attr}} | |
552 | ||
553 | \RULE{\rt{storage}} | |
554 | \CASE{static} | |
555 | \CASE{auto} | |
556 | \CASE{register} | |
557 | \CASE{extern} | |
558 | ||
559 | \RULE{\rt{funid}} | |
560 | \CASE{\T{id}} | |
561 | \CASE{\mth{\T{metaid}^{\ssf{Id}}}} | |
562 | % \CASE{\mth{\T{metaid}^{\ssf{Func}}}} | |
563 | % \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}} | |
564 | ||
565 | \RULE{\rt{param}} | |
566 | \CASE{\NT{type} \T{id}} | |
567 | \CASE{\mth{\T{metaid}^{\ssf{Param}}}} | |
568 | \CASE{\mth{\T{metaid}^{\ssf{ParamList}}}} | |
569 | ||
570 | \RULE{\rt{decl}} | |
571 | \CASE{\NT{ctype} \NT{id}} | |
572 | \CASE{\NT{fn\_ctype} (* \NT{id}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)})} | |
573 | \CASE{void} | |
574 | \CASE{\mth{\T{metaid}^{\ssf{Param}}}} | |
575 | \end{grammar} | |
576 | ||
577 | \begin{grammar} | |
578 | \RULE{\rt{PARAMSEQ}\mth{(}\rt{gram\_p}, \rt{when\_p}\mth{)}} | |
579 | \CASE{\NT{COMMA\_LIST}\mth{(}\NT{gram\_p} \OR \ldots \opt{\NT{when\_p}}\mth{)}} | |
580 | \end{grammar} | |
581 | ||
582 | %\newpage | |
583 | ||
584 | \section{Declarations} | |
585 | ||
586 | \begin{grammar} | |
587 | \RULE{\rt{decl\_var}} | |
588 | % \CASE{\NT{type} \opt{\NT{id} \opt{[\opt{\NT{dot\_expr}}]} | |
589 | % \ANY{, \NT{id} \opt{[ \opt{\NT{dot\_expr}}]}}};} | |
590 | \CASE{\NT{common\_decl}} | |
591 | \CASE{\opt{\NT{storage}} \NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;} | |
592 | \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;} | |
593 | \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) = \NT{initialize} ;} | |
594 | \CASE{typedef \NT{ctype} \NT{typedef\_ident} ;} | |
595 | ||
596 | \RULE{\rt{one\_decl}} | |
597 | \CASE{\NT{common\_decl}} | |
598 | \CASE{\opt{\NT{storage}} \NT{ctype} \NT{id};} | |
599 | % \CASE{\NT{storage} \NT{ctype} \NT{id} \opt{[\opt{\NT{dot\\_expr}}]} = \NT{nest\\_expr};} | |
600 | \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} ;} | |
601 | ||
602 | \RULE{\rt{common\_decl}} | |
603 | \CASE{\NT{ctype};} | |
604 | \CASE{\NT{funproto}} | |
605 | \CASE{\opt{\NT{storage}} \NT{ctype} \NT{d\_ident} = \NT{initialize} ;} | |
606 | \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} = \NT{initialize} ;} | |
607 | \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) ;} | |
608 | \CASE{\NT{decl\_ident} ( \OPT{\NT{COMMA\_LIST}\mth{(}\NT{expr}\mth{)}} ) ;} | |
609 | ||
610 | \RULE{\rt{initialize}} | |
611 | \CASE{\NT{dot\_expr}} | |
612 | \CASE{\ttlb~\opt{\NT{COMMA\_LIST}\mth{(}\NT{dot\_expr}\mth{)}}~\ttrb} | |
613 | ||
614 | \RULE{\rt{decl\_ident}} | |
615 | \CASE{\T{DeclarerId}} | |
616 | \CASE{\mth{\T{metaid}^{\ssf{Declarer}}}} | |
617 | \end{grammar} | |
618 | ||
619 | \section{Statements} | |
620 | ||
621 | The first rule {\em statement} describes the various forms of a statement. | |
622 | The remaining rules implement the constraints that are sensitive to the | |
623 | context in which the statement occurs: {\em single\_statement} for a | |
624 | context in which only one statement is allowed, and {\em decl\_statement} | |
625 | for a context in which a declaration, statement, or sequence thereof is | |
626 | allowed. | |
627 | ||
628 | \begin{grammar} | |
629 | \RULE{\rt{stmt}} | |
630 | \CASE{\NT{include}} | |
631 | \CASE{\mth{\T{metaid}^{\ssf{Stmt}}}} | |
632 | \CASE{\NT{expr};} | |
633 | \CASE{if (\NT{dot\_expr}) \NT{single\_stmt} \opt{else \NT{single\_stmt}}} | |
634 | \CASE{for (\opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}}) | |
635 | \NT{single\_stmt}} | |
636 | \CASE{while (\NT{dot\_expr}) \NT{single\_stmt}} | |
637 | \CASE{do \NT{single\_stmt} while (\NT{dot\_expr});} | |
638 | \CASE{\NT{iter\_ident} (\any{\NT{dot\_expr}}) \NT{single\_stmt}} | |
639 | \CASE{switch (\opt{\NT{dot\_expr}}) \ttlb \any{\NT{case\_line}} \ttrb} | |
640 | \CASE{return \opt{\NT{dot\_expr}};} | |
641 | \CASE{\ttlb~\opt{\NT{stmt\_seq}} \ttrb} | |
642 | \CASE{\NT{NEST}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}} | |
643 | \CASE{\NT{NEST}\mth{(}\NT{expr}, \NT{when}\mth{)}} | |
644 | \CASE{break;} | |
645 | \CASE{continue;} | |
646 | \CASE{\NT{id}:} | |
647 | \CASE{goto \NT{id};} | |
648 | \CASE{\ttlb \NT{stmt\_seq} \ttrb} | |
649 | ||
650 | \RULE{\rt{single\_stmt}} | |
651 | \CASE{\NT{stmt}} | |
652 | \CASE{\NT{OR}\mth{(}\NT{stmt}\mth{)}} | |
653 | ||
654 | \RULE{\rt{decl\_stmt}} | |
655 | \CASE{\mth{\T{metaid}^{\ssf{StmtList}}}} | |
656 | \CASE{\NT{decl\_var}} | |
657 | \CASE{\NT{stmt}} | |
658 | \CASE{\NT{OR}\mth{(}\NT{stmt\_seq}\mth{)}} | |
659 | ||
660 | \RULE{\rt{stmt\_seq}} | |
661 | \CASE{\any{\NT{decl\_stmt}} | |
662 | \opt{\NT{DOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, | |
663 | \NT{when}\mth{)} \any{\NT{decl\_stmt}}}} | |
664 | \CASE{\any{\NT{decl\_stmt}} | |
665 | \opt{\NT{DOTSEQ}\mth{(}\NT{expr}, | |
666 | \NT{when}\mth{)} \any{\NT{decl\_stmt}}}} | |
667 | ||
668 | \RULE{\rt{case\_line}} | |
669 | \CASE{default :~\NT{stmt\_seq}} | |
670 | \CASE{case \NT{dot\_expr} :~\NT{stmt\_seq}} | |
671 | ||
672 | \RULE{\rt{iter\_ident}} | |
673 | \CASE{\T{IteratorId}} | |
674 | \CASE{\mth{\T{metaid}^{\ssf{Iterator}}}} | |
675 | \end{grammar} | |
676 | ||
677 | \begin{grammar} | |
678 | \RULE{\rt{OR}\mth{(}\rt{gram\_o}\mth{)}} | |
679 | \CASE{( \NT{gram\_o} \ANY{\ttmid \NT{gram\_o}})} | |
680 | ||
681 | \RULE{\rt{DOTSEQ}\mth{(}\rt{gram\_d}, \rt{when\_d}\mth{)}} | |
682 | \CASE{\ldots \opt{\NT{when\_d}} \ANY{\NT{gram\_d} \ldots \opt{\NT{when\_d}}}} | |
683 | ||
684 | \RULE{\rt{NEST}\mth{(}\rt{gram\_n}, \rt{when\_n}\mth{)}} | |
685 | \CASE{<\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots>} | |
686 | \CASE{<+\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots+>} | |
687 | \end{grammar} | |
688 | ||
689 | \noindent | |
690 | OR is a macro that generates a disjunction of patterns. The three | |
691 | tokens \T{(}, \T{\ttmid}, and \T{)} must appear in the leftmost | |
692 | column, to differentiate them from the parentheses and bit-or tokens | |
693 | that can appear within expressions (and cannot appear in the leftmost | |
694 | column). These token may also be preceded by \texttt{\bs} | |
695 | when they are used in an other column. These tokens are furthermore | |
696 | different from (, \(\mid\), and ), which are part of the grammar | |
697 | metalanguage. | |
698 | ||
699 | \section{Expressions} | |
700 | ||
701 | A nest or a single ellipsis is allowed in some expression contexts, and | |
702 | causes ambiguity in others. For example, in a sequence \mtt{\ldots | |
703 | \mita{expr} \ldots}, the nonterminal \mita{expr} must be instantiated as an | |
704 | explicit C-language expression, while in an array reference, | |
705 | \mtt{\mth{\mita{expr}_1} \mtt{[} \mth{\mita{expr}_2} \mtt{]}}, the | |
706 | nonterminal \mth{\mita{expr}_2}, because it is delimited by brackets, can | |
707 | be also instantiated as \mtt{\ldots}, representing an arbitrary expression. To | |
708 | distinguish between the various possibilities, we define three nonterminals | |
709 | for expressions: {\em expr} does not allow either top-level nests or | |
710 | ellipses, {\em nest\_expr} allows a nest but not an ellipsis, and {\em | |
711 | dot\_expr} allows both. The EXPR macro is used to express these variants | |
712 | in a concise way. | |
713 | ||
714 | \begin{grammar} | |
715 | \RULE{\rt{expr}} | |
716 | \CASE{\NT{EXPR}\mth{(}\NT{expr}\mth{)}} | |
717 | ||
718 | \RULE{\rt{nest\_expr}} | |
719 | \CASE{\NT{EXPR}\mth{(}\NT{nest\_expr}\mth{)}} | |
720 | \CASE{\NT{NEST}\mth{(}\NT{nest\_expr}, \NT{exp\_whencode}\mth{)}} | |
721 | ||
722 | \RULE{\rt{dot\_expr}} | |
723 | \CASE{\NT{EXPR}\mth{(}\NT{dot\_expr}\mth{)}} | |
724 | \CASE{\NT{NEST}\mth{(}\NT{dot\_expr}, \NT{exp\_whencode}\mth{)}} | |
725 | \CASE{...~\opt{\NT{exp\_whencode}}} | |
726 | ||
727 | \RULE{\rt{EXPR}\mth{(}\rt{exp}\mth{)}} | |
728 | \CASE{\NT{exp} \NT{assign\_op} \NT{exp}} | |
729 | \CASE{\NT{exp}++} | |
730 | \CASE{\NT{exp}--} | |
731 | \CASE{\NT{unary\_op} \NT{exp}} | |
732 | \CASE{\NT{exp} \NT{bin\_op} \NT{exp}} | |
733 | \CASE{\NT{exp} ?~\NT{dot\_expr} :~\NT{exp}} | |
734 | \CASE{(\NT{type}) \NT{exp}} | |
735 | \CASE{\NT{exp} [\NT{dot\_expr}]} | |
736 | \CASE{\NT{exp} .~\NT{id}} | |
737 | \CASE{\NT{exp} -> \NT{id}} | |
738 | \CASE{\NT{exp}(\opt{\NT{PARAMSEQ}\mth{(}\NT{arg}, \NT{exp\_whencode}\mth{)}})} | |
739 | \CASE{\NT{id}} | |
740 | % \CASE{\mth{\T{metaid}^{\ssf{Func}}}} | |
741 | % \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}} | |
742 | \CASE{\mth{\T{metaid}^{\ssf{Exp}}}} | |
743 | % \CASE{\mth{\T{metaid}^{\ssf{Err}}}} | |
744 | \CASE{\mth{\T{metaid}^{\ssf{Const}}}} | |
745 | \CASE{\NT{const}} | |
746 | \CASE{(\NT{dot\_expr})} | |
747 | \CASE{\NT{OR}\mth{(}\NT{exp}\mth{)}} | |
748 | ||
749 | \RULE{\rt{arg}} | |
750 | \CASE{\NT{nest\_expr}} | |
751 | \CASE{\mth{\T{metaid}^{\ssf{ExpList}}}} | |
752 | ||
753 | \RULE{\rt{exp\_whencode}} | |
754 | \CASE{when != \NT{expr}} | |
755 | ||
756 | \RULE{\rt{assign\_op}} | |
757 | \CASE{= \OR -= \OR += \OR *= \OR /= \OR \%=} | |
758 | \CASE{\&= \OR |= \OR \caret= \OR \lt\lt= \OR \gt\gt=} | |
759 | ||
760 | \RULE{\rt{bin\_op}} | |
761 | \CASE{* \OR / \OR \% \OR + \OR -} | |
762 | \CASE{\lt\lt \OR \gt\gt \OR \caret\xspace \OR \& \OR \ttmid} | |
763 | \CASE{< \OR > \OR <= \OR >= \OR == \OR != \OR \&\& \OR \ttmid\ttmid} | |
764 | ||
765 | \RULE{\rt{unary\_op}} | |
766 | \CASE{++ \OR -- \OR \& \OR * \OR + \OR - \OR !} | |
767 | ||
768 | \end{grammar} | |
769 | ||
770 | \section{Constant, Identifiers and Types for Transformations} | |
771 | ||
772 | \begin{grammar} | |
773 | \RULE{\rt{const}} | |
774 | \CASE{\NT{string}} | |
775 | \CASE{[0-9]+} | |
776 | \CASE{\mth{\cdots}} | |
777 | ||
778 | \RULE{\rt{string}} | |
779 | \CASE{"\any{[\^{}"]}"} | |
780 | ||
781 | \RULE{\rt{id}} | |
782 | \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Id}}}} | |
783 | ||
784 | \RULE{\rt{typedef\_ident}} | |
785 | \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Type}}}} | |
786 | ||
787 | \RULE{\rt{type}} | |
788 | \CASE{\NT{ctype} \OR \mth{\T{metaid}^{\ssf{Type}}}} | |
789 | ||
790 | \RULE{\rt{pathToIsoFile}} | |
791 | \CASE{<.*>} | |
792 | \end{grammar} | |
793 | ||
faf9a90c C |
794 | |
795 | %%% Local Variables: | |
796 | %%% mode: LaTeX | |
708f4980 | 797 | %%% TeX-master: "main_grammar" |
faf9a90c C |
798 | %%% coding: latin-9 |
799 | %%% TeX-PDF-mode: t | |
800 | %%% ispell-local-dictionary: "american" | |
801 | %%% End: |