Commit | Line | Data |
---|---|---|
faf9a90c C |
1 | |
2 | %\section{The SmPL Grammar} | |
3 | ||
4 | % This section presents the SmPL grammar. This definition follows closely | |
5 | % our implementation using the Menhir parser generator \cite{menhir}. | |
6 | ||
7 | This document presents the grammar of the SmPL language used by the | |
7f004419 | 8 | \href{http://coccinelle.lip6.fr/}{Coccinelle tool}. For the most |
faf9a90c C |
9 | part, the grammar is written using standard notation. In some rules, |
10 | however, the left-hand side is in all uppercase letters. These are | |
11 | macros, which take one or more grammar rule right-hand-sides as | |
12 | arguments. The grammar also uses some unspecified nonterminals, such | |
b1b2de81 C |
13 | as \T{id}, \T{const}, etc. These refer to the sets suggested by |
14 | the name, {\em i.e.}, \T{id} refers to the set of possible | |
15 | C-language identifiers, while \T{const} refers to the set of | |
978fd7e5 | 16 | possible C-language constants. |
708f4980 | 17 | % |
978fd7e5 | 18 | \ifhevea |
708f4980 | 19 | A PDF version of this documentation is available at |
951c7801 | 20 | \url{http://coccinelle.lip6.fr/docs/main_grammar.pdf}. |
708f4980 | 21 | \else |
faf9a90c | 22 | A HTML version of this documentation is available online at |
951c7801 | 23 | \url{http://coccinelle.lip6.fr/docs/main_grammar.html}. |
708f4980 | 24 | \fi |
faf9a90c | 25 | |
faf9a90c C |
26 | \section{Program} |
27 | ||
28 | \begin{grammar} | |
29 | \RULE{\rt{program}} | |
30 | \CASE{\any{\NT{include\_cocci}} \some{\NT{changeset}}} | |
31 | ||
32 | \RULE{\rt{include\_cocci}} | |
33 | \CASE{using \NT{string}} | |
34 | \CASE{using \NT{pathToIsoFile}} | |
35 | ||
36 | \RULE{\rt{changeset}} | |
37 | \CASE{\NT{metavariables} \NT{transformation}} | |
b1b2de81 | 38 | \CASE{\NT{script\_metavariables} \T{script\_code}} |
faf9a90c | 39 | % \CASE{\NT{metavariables} \ANY{--- filename +++ filename} \NT{transformation}} |
faf9a90c C |
40 | \end{grammar} |
41 | ||
b1b2de81 C |
42 | \noindent |
43 | \T{script\_code} is any code in the chosen scripting language. Parsing of | |
44 | the semantic patch does not check the validity of this code; any errors are | |
978fd7e5 C |
45 | first detected when the code is executed. Furthermore, \texttt{@} should |
46 | not be use in this code. Spatch scans the script code for the next | |
47 | \texttt{@} and considers that to be the beginning of the next rule, even if | |
48 | \texttt{@} occurs within e.g., a string or a comment. | |
b1b2de81 | 49 | |
faf9a90c C |
50 | % Between the metavariables and the transformation rule, there can be a |
51 | % specification of constraints on the names of the old and new files, | |
52 | % analogous to the filename specifications in the standard patch syntax. | |
53 | % (see Figure \ref{scsiglue_patch}). | |
54 | ||
b1b2de81 | 55 | \section{Metavariables for transformations} |
faf9a90c C |
56 | |
57 | The \NT{rulename} portion of the metavariable declaration can specify | |
58 | properties of a rule such as its name, the names of the rules that it | |
59 | depends on, the isomorphisms to be used in processing the rule, and whether | |
60 | quantification over paths should be universal or existential. The optional | |
61 | annotation {\tt expression} indicates that the pattern is to be considered | |
62 | as matching an expression, and thus can be used to avoid some parsing | |
63 | problems. | |
64 | ||
65 | The \NT{metadecl} portion of the metavariable declaration defines various | |
66 | types of metavariables that will be used for matching in the transformation | |
67 | section. | |
68 | ||
69 | \begin{grammar} | |
70 | \RULE{\rt{metavariables}} | |
71 | \CASE{@@ \any{\NT{metadecl}} @@} | |
72 | \CASE{@ \NT{rulename} @ \any{\NT{metadecl}} @@} | |
73 | ||
74 | \RULE{\rt{rulename}} | |
75 | \CASE{\T{id} \OPT{extends \T{id}} \OPT{depends on \NT{dep}} \opt{\NT{iso}} | |
76 | \opt{\NT{disable-iso}} \opt{\NT{exists}} \opt{expression}} | |
77 | \CASE{script:\T{language} \OPT{depends on \NT{dep}}} | |
78 | ||
b1b2de81 C |
79 | \RULE{\rt{script\_init\_final}} |
80 | \CASE{initialize:\T{language}} | |
81 | \CASE{finalize:\T{language}} | |
82 | ||
faf9a90c C |
83 | \RULE{\rt{dep}} |
84 | \CASE{\NT{pnrule}} | |
85 | \CASE{\NT{dep} \&\& \NT{dep}} | |
86 | \CASE{\NT{dep} || \NT{dep}} | |
87 | ||
88 | \RULE{\rt{pnrule}} | |
89 | \CASE{\T{id}} | |
90 | \CASE{!\T{id}} | |
91 | \CASE{ever \T{id}} | |
92 | \CASE{never \T{id}} | |
93 | \CASE{(\NT{dep})} | |
94 | ||
95 | \RULE{\rt{iso}} | |
96 | \CASE{using \NT{string} \ANY{, \NT{string}}} | |
97 | ||
98 | \RULE{\rt{disable-iso}} | |
99 | \CASE{disable \NT{COMMA\_LIST}\mth{(}\T{id}\mth{)}} | |
100 | ||
101 | \RULE{\rt{exists}} | |
102 | \CASE{exists} | |
103 | \CASE{forall} | |
104 | % \CASE{\opt{reverse} forall} | |
105 | ||
106 | \RULE{\rt{COMMA\_LIST}\mth{(}\rt{elem}\mth{)}} | |
107 | \CASE{\NT{elem} \ANY{, \NT{elem}}} | |
108 | \end{grammar} | |
109 | ||
b1b2de81 | 110 | The keyword \KW{disable} is normally used with the names of |
faf9a90c C |
111 | isomorphisms defined in standard.iso or whatever isomorphism file has been |
112 | included. There are, however, some other isomorphisms that are built into | |
113 | the implementation of Coccinelle and that can be disabled as well. Their | |
114 | names are given below. In each case, the text descibes the standard | |
115 | behavior. Using \NT{disable-iso} with the given name disables this behavior. | |
116 | ||
117 | \begin{itemize} | |
118 | \item \KW{optional\_storage}: A SmPL function definition that does not | |
119 | specify any visibility (i.e., static or extern), or a SmPL variable | |
120 | declaration that does not specify any storage (i.e., auto, static, | |
121 | register, or extern), matches a function declaration or variable | |
122 | declaration with any visibility or storage, respectively. | |
123 | \item \KW{optional\_qualifier}: This is similar to \KW{optional\_storage}, | |
124 | except that here is it the qualifier (i.e., const or volatile) that does | |
125 | not have to be specified in the SmPL code, but may be present in the C code. | |
126 | \item \KW{value\_format}: Integers in various formats, e.g., 1 and 0x1, are | |
127 | considered to be equivalent in the matching process. | |
128 | \item \KW{comm\_assoc}: An expression of the form \NT{exp} \NT{bin\_op} | |
129 | \KW{...}, where \NT{bin\_op} is commutative and associative, is | |
130 | considered to match any top-level sequence of \NT{bin\_op} operators | |
131 | containing \NT{exp} as the top-level argument. | |
132 | \end{itemize} | |
133 | ||
134 | The possible types of metavariable declarations are defined by the grammar | |
135 | rule below. Metavariables should occur at least once in the transformation | |
136 | immediately following their declaration. Fresh metavariables must only be | |
137 | used in {\tt +} code. These properties are not expressed in the grammar, | |
138 | but are checked by a subsequent analysis. The metavariables are designated | |
139 | according to the kind of terms they can match, such as a statement, an | |
140 | identifier, or an expression. An expression metavariable can be further | |
141 | constrained by its type. | |
142 | ||
143 | \begin{grammar} | |
144 | \RULE{\rt{metadecl}} | |
145 | \CASE{fresh identifier \NT{ids} ;} | |
951c7801 | 146 | \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;} |
faf9a90c C |
147 | \CASE{identifier \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} |
148 | \CASE{parameter \opt{list} \NT{ids} ;} | |
149 | \CASE{parameter list [ \NT{id} ] \NT{ids} ;} | |
150 | \CASE{type \NT{ids} ;} | |
151 | \CASE{statement \opt{list} \NT{ids} ;} | |
152 | \CASE{typedef \NT{ids} ;} | |
153 | \CASE{declarer name \NT{ids} ;} | |
154 | % \CASE{\opt{local} function \NT{pmid\_with\_not\_eq\_list} ;} | |
951c7801 | 155 | \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;} |
faf9a90c C |
156 | \CASE{declarer \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} |
157 | \CASE{iterator name \NT{ids} ;} | |
951c7801 | 158 | \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_regexp}\mth{)} ;} |
faf9a90c C |
159 | \CASE{iterator \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} |
160 | % \CASE{error \NT{pmid\_with\_not\_eq\_list} ; } | |
161 | \CASE{\opt{local} idexpression \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
162 | \CASE{\opt{local} idexpression \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
163 | \CASE{\opt{local} idexpression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
164 | \CASE{expression list \NT{ids} ;} | |
165 | \CASE{expression \some{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
166 | \CASE{expression \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;} | |
167 | \CASE{expression list [ ident ] \NT{ids} ;} | |
168 | \CASE{\NT{ctype} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
169 | \CASE{\NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;} | |
170 | \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_ceq}\mth{)} ;} | |
171 | \CASE{\ttlb \NT{ctypes}\ttrb~\any{*} [ ] \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
172 | \CASE{constant \opt{\NT{ctype}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
173 | \CASE{constant \OPT{\ttlb \NT{ctypes}\ttrb~\any{*}} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq}\mth{)} ;} | |
174 | \CASE{position \opt{any} \NT{COMMA\_LIST}\mth{(}\NT{pmid\_with\_not\_eq\_mid}\mth{)} ;} | |
175 | \end{grammar} | |
176 | ||
177 | \begin{grammar} | |
178 | \RULE{\rt{ids}} | |
179 | \CASE{\NT{COMMA\_LIST}\mth{(}\NT{pmid}\mth{)}} | |
180 | ||
181 | \RULE{\rt{pmid}} | |
182 | \CASE{\T{id}} | |
183 | \CASE{\NT{mid}} | |
184 | % \CASE{list} | |
185 | % \CASE{error} | |
186 | % \CASE{type} | |
187 | ||
188 | \RULE{\rt{mid}} \CASE{\T{rulename\_id}.\T{id}} | |
189 | ||
951c7801 C |
190 | \RULE{\rt{pmid\_with\_regexp}} |
191 | \CASE{\NT{pmid} \~{}= \NT{regexp}} | |
192 | ||
faf9a90c C |
193 | \RULE{\rt{pmid\_with\_not\_eq}} |
194 | \CASE{\NT{pmid} \OPT{!= \T{id}}} | |
195 | \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\T{id}\mth{)} \ttrb}} | |
196 | ||
197 | \RULE{\rt{pmid\_with\_not\_ceq}} | |
198 | \CASE{\NT{pmid} \OPT{!= \NT{id\_or\_cst}}} | |
199 | \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{id\_or\_cst}\mth{)} \ttrb}} | |
200 | ||
201 | \RULE{\rt{id\_or\_cst}} | |
202 | \CASE{\T{id}} | |
203 | \CASE{\T{integer}} | |
204 | ||
205 | \RULE{\rt{pmid\_with\_not\_eq\_mid}} | |
206 | \CASE{\NT{pmid} \OPT{!= \NT{mid}}} | |
207 | \CASE{\NT{pmid} \OPT{!= \ttlb~\NT{COMMA\_LIST}\mth{(}\NT{mid}\mth{)} \ttrb}} | |
208 | \end{grammar} | |
209 | ||
210 | Subsequently, we refer to arbitrary metavariables as | |
211 | \mth{\msf{metaid}^{\mbox{\scriptsize{\it{ty}}}}}, where {\it{ty}} | |
212 | indicates the {\it metakind} used in the declaration of the variable. | |
213 | For example, \mth{\msf{metaid}^{\ssf{Type}}} refers to a metavariable | |
214 | that was declared using \texttt{type} and stands for any type. | |
215 | ||
216 | The \NT{ctype} and \NT{ctypes} nonterminals are used by both the grammar of | |
217 | metavariable declarations and the grammar of transformations, and are | |
218 | defined on page~\pageref{types}. | |
219 | ||
b1b2de81 C |
220 | \section{Metavariables for scripts} |
221 | ||
222 | Metavariables for scripts can only be inherited from transformation rules. | |
223 | In the spirit of scripting languages such as Python that use dynamic | |
224 | typing, metavariables for scripts do not include type declarations. | |
225 | ||
226 | \begin{grammar} | |
227 | \RULE{\rt{script\_metavariables}} | |
228 | \CASE{@ script:\NT{language} \OPT{depends on \NT{dep}} @ | |
229 | \any{\NT{script\_metadecl}} @@} | |
230 | \CASE{@ initialize:\NT{language} @} | |
231 | \CASE{@ finalize:\NT{language} @} | |
232 | ||
233 | \RULE{\rt{language}} \CASE{python} | |
234 | ||
235 | \RULE{\rt{script\_metadecl}} \CASE{\T{id} <{}< \T{rulename\_id}.\T{id} ;} | |
236 | \end{grammar} | |
237 | ||
238 | Currently, the only scripting language that is supported is Python. The | |
239 | set of available scripting languages may be extended at some point. | |
240 | ||
241 | Script rules declared with \KW{initialize} are run before the treatment of | |
242 | any file. Script rules declared with \KW{finalize} are run when the | |
243 | treatment of all of the files has completed. There can be at most one of | |
244 | each per scripting language (thus currently at most one of each). | |
245 | Initialize and finalize script rules do not have access to SmPL | |
246 | metavariables. Nevertheless, a finalize script rule can access any | |
247 | variables initialized by the other script rules, allowing information to be | |
248 | transmitted from the matching process to the finalize rule. | |
249 | ||
faf9a90c C |
250 | \section{Transformation} |
251 | ||
252 | The transformation specification essentially has the form of C code, | |
253 | except that lines to remove are annotated with \verb+-+ in the first | |
254 | column, and lines to add are annotated with \verb-+-. A | |
255 | transformation specification can also use {\em dots}, ``\verb-...-'', | |
256 | describing an arbitrary sequence of function arguments or instructions | |
257 | within a control-flow path. Dots may be modified with a {\tt when} | |
258 | clause, indicating a pattern that should not occur anywhere within the | |
259 | matched sequence. Finally, a transformation can specify a disjunction | |
260 | of patterns, of the form \mtt{( \mth{\mita{pat}_1} | \mita{\ldots} | | |
261 | \mth{\mita{pat}_n} )} where each \texttt{(}, \texttt{|} or | |
262 | \texttt{)} is in column 0 or preceded by \texttt{\textbackslash}. | |
263 | ||
264 | The grammar that we present for the transformation is not actually the | |
265 | grammar of the SmPL code that can be written by the programmer, but is | |
266 | instead the grammar of the slice of this consisting of the {\tt -} | |
267 | annotated and the unannotated code (the context of the transformed lines), | |
268 | or the {\tt +} annotated code and the unannotated code. For example, for | |
269 | parsing purposes, the following transformation | |
270 | %presented in Section \ref{sec:seq2} | |
271 | is split into the two variants shown below and each is parsed | |
272 | separately. | |
273 | ||
274 | \begin{center} | |
275 | \begin{tabular}{c} | |
276 | \begin{lstlisting}[language=Cocci] | |
277 | proc_info_func(...) { | |
278 | <... | |
279 | @-- hostno | |
280 | @++ hostptr->host_no | |
281 | ...> | |
282 | } | |
283 | \end{lstlisting}\\ | |
284 | \end{tabular} | |
285 | \end{center} | |
286 | ||
287 | {%\sizecodebis | |
288 | \begin{center} | |
289 | \begin{tabular}{p{5cm}p{3cm}p{5cm}} | |
290 | \begin{lstlisting}[language=Cocci] | |
291 | proc_info_func(...) { | |
292 | <... | |
293 | @-- hostno | |
294 | ...> | |
295 | } | |
296 | \end{lstlisting} | |
297 | && | |
298 | \begin{lstlisting}[language=Cocci] | |
299 | proc_info_func(...) { | |
300 | <... | |
301 | @++ hostptr->host_no | |
302 | ...> | |
303 | } | |
304 | \end{lstlisting} | |
305 | \end{tabular} | |
306 | \end{center} | |
307 | } | |
308 | ||
309 | \noindent | |
310 | Requiring that both slices parse correctly ensures that the rule matches | |
311 | syntactically valid C code and that it produces syntactically valid C code. | |
312 | The generated parse trees are then merged for use in the subsequent | |
313 | matching and transformation process. | |
314 | ||
315 | The grammar for the minus or plus slice of a transformation is as follows: | |
316 | ||
317 | \begin{grammar} | |
318 | ||
319 | \RULE{\rt{transformation}} | |
320 | \CASE{\some{\NT{include}}} | |
321 | \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}} | |
322 | \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}} | |
323 | \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{fundecl}, \NT{when}\mth{)}} | |
324 | ||
325 | \RULE{\rt{include}} | |
326 | \CASE{\#include \T{include\_string}} | |
327 | ||
328 | % \RULE{\rt{fun\_decl\_stmt}} | |
329 | % \CASE{\NT{decl\_stmt}} | |
330 | % \CASE{\NT{fundecl}} | |
331 | ||
332 | % \CASE{\NT{ctype}} | |
333 | % \CASE{\ttlb \NT{initialize\_list} \ttrb} | |
334 | % \CASE{\NT{toplevel\_seq\_start\_after\_dots\_init}} | |
335 | % | |
336 | % \RULE{\rt{toplevel\_seq\_start\_after\_dots\_init}} | |
337 | % \CASE{\NT{stmt\_dots} \NT{toplevel\_after\_dots}} | |
338 | % \CASE{\NT{expr} \opt{\NT{toplevel\_after\_exp}}} | |
339 | % \CASE{\NT{decl\_stmt\_expr} \opt{\NT{toplevel\_after\_stmt}}} | |
340 | % | |
341 | % \RULE{\rt{stmt\_dots}} | |
342 | % \CASE{... \any{\NT{when}}} | |
343 | % \CASE{<... \any{\NT{when}} \NT{nest\_after\_dots} ...>} | |
344 | % \CASE{<+... \any{\NT{when}} \NT{nest\_after\_dots} ...+>} | |
345 | ||
346 | \RULE{\rt{when}} | |
347 | \CASE{when != \NT{when\_code}} | |
348 | \CASE{when = \NT{rule\_elem\_stmt}} | |
349 | \CASE{when \NT{COMMA\_LIST}\mth{(}\NT{any\_strict}\mth{)}} | |
350 | \CASE{when true != \NT{expr}} | |
351 | \CASE{when false != \NT{expr}} | |
352 | ||
353 | \RULE{\rt{when\_code}} | |
354 | \CASE{\NT{OPTDOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}} | |
355 | \CASE{\NT{OPTDOTSEQ}\mth{(}\NT{expr}, \NT{when}\mth{)}} | |
356 | ||
357 | \RULE{\rt{rule\_elem\_stmt}} | |
358 | \CASE{\NT{one\_decl}} | |
359 | \CASE{\NT{expr};} | |
360 | \CASE{return \opt{\NT{expr}};} | |
361 | \CASE{break;} | |
362 | \CASE{continue;} | |
363 | \CASE{\bs(\NT{rule\_elem\_stmt} \SOME{\bs| \NT{rule\_elem\_stmt}}\bs)} | |
364 | ||
365 | \RULE{\rt{any\_strict}} | |
366 | \CASE{any} | |
367 | \CASE{strict} | |
368 | \CASE{forall} | |
369 | \CASE{exists} | |
370 | ||
371 | % \RULE{\rt{nest\_after\_dots}} | |
372 | % \CASE{\NT{decl\_stmt\_exp} \opt{\NT{nest\_after\_stmt}}} | |
373 | % \CASE{\opt{\NT{exp}} \opt{\NT{nest\_after\_exp}}} | |
374 | % | |
375 | % \RULE{\rt{nest\_after\_stmt}} | |
376 | % \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}} | |
377 | % \CASE{\NT{decl\_stmt} \opt{\NT{nest\_after\_stmt}}} | |
378 | % | |
379 | % \RULE{\rt{nest\_after\_exp}} | |
380 | % \CASE{\NT{stmt\_dots} \NT{nest\_after\_dots}} | |
381 | % | |
382 | % \RULE{\rt{toplevel\_after\_dots}} | |
383 | % \CASE{\opt{\NT{toplevel\_after\_exp}}} | |
384 | % \CASE{\NT{exp} \opt{\NT{toplevel\_after\_exp}}} | |
385 | % \CASE{\NT{decl\_stmt\_expr} \NT{toplevel\_after\_stmt}} | |
386 | % | |
387 | % \RULE{\rt{toplevel\_after\_exp}} | |
388 | % \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}} | |
389 | % | |
390 | % \RULE{\rt{decl\_stmt\_expr}} | |
391 | % \CASE{TMetaStmList$^\ddag$} | |
392 | % \CASE{\NT{decl\_var}} | |
393 | % \CASE{\NT{stmt}} | |
394 | % \CASE{(\NT{stmt\_seq} \ANY{| \NT{stmt\_seq}})} | |
395 | % | |
396 | % \RULE{\rt{toplevel\_after\_stmt}} | |
397 | % \CASE{\NT{stmt\_dots} \opt{\NT{toplevel\_after\_dots}}} | |
398 | % \CASE{\NT{decl\_stmt} \NT{toplevel\_after\_stmt}} | |
399 | ||
400 | \end{grammar} | |
401 | ||
402 | \begin{grammar} | |
403 | \RULE{\rt{OPTDOTSEQ}\mth{(}\rt{grammar\_ds}, \rt{when\_ds}\mth{)}} | |
404 | \CASE{}\multicolumn{3}{r}{\hspace{1cm} | |
405 | \KW{\opt{... \opt{\NT{when\_ds}}} \NT{grammar\_ds} | |
406 | \ANY{... \opt{\NT{when\_ds}} \NT{grammar\_ds}} | |
407 | \opt{... \opt{\NT{when\_ds}}}} | |
408 | } | |
409 | ||
410 | % \CASE{\opt{... \opt{\NT{when\_ds}}} \NT{grammar} | |
411 | % \ANY{... \opt{\NT{when\_ds}} \NT{grammar}} | |
412 | % \opt{... \opt{\NT{when\_ds}}}} | |
413 | % \CASE{<... \any{\NT{when\_ds}} \NT{grammar} ...>} | |
414 | % \CASE{<+... \any{\NT{when\_ds}} \NT{grammar} ...+>} | |
415 | ||
416 | \end{grammar} | |
417 | ||
418 | \noindent | |
419 | Lines may be annotated with an element of the set $\{\mtt{-}, \mtt{+}, | |
420 | \mtt{*}\}$ or the singleton $\mtt{?}$, or one of each set. \mtt{?} | |
421 | represents at most one match of the given pattern. \mtt{*} is used for | |
422 | semantic match, \emph{i.e.}, a pattern that highlights the fragments | |
423 | annotated with \mtt{*}, but does not perform any modification of the | |
424 | matched code. \mtt{*} cannot be mixed with \mtt{-} and \mtt{+}. There are | |
425 | some constraints on the use of these annotations: | |
426 | \begin{itemize} | |
427 | \item Dots, {\em i.e.} \texttt{...}, cannot occur on a line marked | |
428 | \texttt{+}. | |
429 | \item Nested dots, {\em i.e.}, dots enclosed in {\tt <} and {\tt >}, cannot | |
430 | occur on a line with any marking. | |
431 | \end{itemize} | |
432 | ||
0708f913 C |
433 | Each element of a disjunction must be a proper term like an |
434 | expression, a statement, an identifier or a declaration. Thus, the | |
435 | rule on the left below is not a syntaxically correct SmPL rule. One may | |
436 | use the rule on the right instead. | |
437 | ||
438 | \begin{center} | |
439 | \begin{tabular}{l@{\hspace{5cm}}r} | |
440 | \begin{lstlisting}[language=Cocci] | |
441 | @@ | |
442 | type T; | |
443 | T b; | |
444 | @@ | |
445 | ||
446 | ( | |
447 | writeb(..., | |
448 | | | |
449 | readb( | |
450 | ) | |
451 | @--(T) | |
452 | b) | |
453 | \end{lstlisting} | |
454 | & | |
455 | \begin{lstlisting}[language=Cocci] | |
456 | @@ | |
457 | type T; | |
458 | T b; | |
459 | @@ | |
460 | ||
461 | ( | |
462 | read | |
463 | | | |
464 | write | |
465 | ) | |
466 | (..., | |
467 | @-- (T) | |
468 | b) | |
469 | \end{lstlisting} | |
470 | \\ | |
471 | \end{tabular} | |
472 | \end{center} | |
473 | ||
faf9a90c C |
474 | \section{Types} |
475 | \label{types} | |
476 | ||
477 | \begin{grammar} | |
478 | ||
479 | \RULE{\rt{ctypes}} | |
480 | \CASE{\NT{COMMA\_LIST}\mth{(}\NT{ctype}\mth{)}} | |
481 | ||
482 | \RULE{\rt{ctype}} | |
483 | \CASE{\opt{\NT{const\_vol}} \NT{generic\_ctype} \any{*}} | |
484 | \CASE{\opt{\NT{const\_vol}} void \some{*}} | |
485 | \CASE{(\NT{ctype} \ANY{| \NT{ctype}})} | |
486 | ||
487 | \RULE{\rt{const\_vol}} | |
488 | \CASE{const} | |
489 | \CASE{volatile} | |
490 | ||
491 | \RULE{\rt{generic\_ctype}} | |
492 | \CASE{\NT{ctype\_qualif}} | |
493 | \CASE{\opt{\NT{ctype\_qualif}} char} | |
494 | \CASE{\opt{\NT{ctype\_qualif}} short} | |
495 | \CASE{\opt{\NT{ctype\_qualif}} int} | |
496 | \CASE{\opt{\NT{ctype\_qualif}} long} | |
497 | \CASE{\opt{\NT{ctype\_qualif}} long long} | |
498 | \CASE{double} | |
499 | \CASE{float} | |
500 | \CASE{\OPT{struct\OR union} \T{id} \OPT{\{ \any{\NT{struct\_decl\_list}} \}}} | |
501 | ||
502 | \RULE{\rt{ctype\_qualif}} | |
503 | \CASE{unsigned} | |
504 | \CASE{signed} | |
505 | ||
506 | \RULE{\rt{struct\_decl\_list}} | |
507 | \CASE{\NT{struct\_decl\_list\_start}} | |
508 | ||
509 | \RULE{\rt{struct\_decl\_list\_start}} | |
510 | \CASE{\NT{struct\_decl}} | |
511 | \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}} | |
512 | \CASE{... \opt{when != \NT{struct\_decl}}$^\dag$ \opt{\NT{continue\_struct\_decl\_list}}} | |
513 | ||
514 | \RULE{\rt{continue\_struct\_decl\_list}} | |
515 | \CASE{\NT{struct\_decl} \NT{struct\_decl\_list\_start}} | |
516 | \CASE{\NT{struct\_decl}} | |
517 | ||
518 | \RULE{\rt{struct\_decl}} | |
519 | \CASE{\NT{ctype} \NT{d\_ident};} | |
520 | \CASE{\NT{fn\_ctype} (* \NT{d\_ident}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)});)} | |
521 | \CASE{\opt{\NT{const\_vol}} \T{id} \NT{d\_ident};} | |
522 | ||
523 | \RULE{\rt{d\_ident}} | |
524 | \CASE{\NT{id} \any{[\opt{\NT{expr}}]}} | |
525 | ||
526 | \RULE{\rt{fn\_ctype}} | |
527 | \CASE{\NT{generic\_ctype} \any{*}} | |
528 | \CASE{void \any{*}} | |
529 | ||
530 | \RULE{\rt{name\_opt\_decl}} | |
531 | \CASE{\NT{decl}} | |
532 | \CASE{\NT{ctype}} | |
533 | \CASE{\NT{fn\_ctype}} | |
534 | \end{grammar} | |
535 | ||
536 | $^\dag$ The optional \texttt{when} construct ends at the end of the line. | |
537 | ||
538 | \section{Function declarations} | |
539 | ||
540 | \begin{grammar} | |
541 | ||
542 | \RULE{\rt{fundecl}} | |
543 | \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid} | |
544 | (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}}) | |
545 | \ttlb~\opt{\NT{stmt\_seq}} \ttrb} | |
546 | ||
547 | \RULE{\rt{funproto}} | |
548 | \CASE{\opt{\NT{fn\_ctype}} \any{\NT{funinfo}} \NT{funid} | |
549 | (\opt{\NT{PARAMSEQ}\mth{(}\NT{param}, \mth{\varepsilon)}});} | |
550 | ||
551 | \RULE{\rt{funinfo}} | |
552 | \CASE{inline} | |
553 | \CASE{\NT{storage}} | |
554 | % \CASE{\NT{attr}} | |
555 | ||
556 | \RULE{\rt{storage}} | |
557 | \CASE{static} | |
558 | \CASE{auto} | |
559 | \CASE{register} | |
560 | \CASE{extern} | |
561 | ||
562 | \RULE{\rt{funid}} | |
563 | \CASE{\T{id}} | |
564 | \CASE{\mth{\T{metaid}^{\ssf{Id}}}} | |
565 | % \CASE{\mth{\T{metaid}^{\ssf{Func}}}} | |
566 | % \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}} | |
567 | ||
568 | \RULE{\rt{param}} | |
569 | \CASE{\NT{type} \T{id}} | |
570 | \CASE{\mth{\T{metaid}^{\ssf{Param}}}} | |
571 | \CASE{\mth{\T{metaid}^{\ssf{ParamList}}}} | |
572 | ||
573 | \RULE{\rt{decl}} | |
574 | \CASE{\NT{ctype} \NT{id}} | |
575 | \CASE{\NT{fn\_ctype} (* \NT{id}) (\NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)})} | |
576 | \CASE{void} | |
577 | \CASE{\mth{\T{metaid}^{\ssf{Param}}}} | |
578 | \end{grammar} | |
579 | ||
580 | \begin{grammar} | |
581 | \RULE{\rt{PARAMSEQ}\mth{(}\rt{gram\_p}, \rt{when\_p}\mth{)}} | |
582 | \CASE{\NT{COMMA\_LIST}\mth{(}\NT{gram\_p} \OR \ldots \opt{\NT{when\_p}}\mth{)}} | |
583 | \end{grammar} | |
584 | ||
585 | %\newpage | |
586 | ||
587 | \section{Declarations} | |
588 | ||
589 | \begin{grammar} | |
590 | \RULE{\rt{decl\_var}} | |
591 | % \CASE{\NT{type} \opt{\NT{id} \opt{[\opt{\NT{dot\_expr}}]} | |
592 | % \ANY{, \NT{id} \opt{[ \opt{\NT{dot\_expr}}]}}};} | |
593 | \CASE{\NT{common\_decl}} | |
594 | \CASE{\opt{\NT{storage}} \NT{ctype} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;} | |
595 | \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{COMMA\_LIST}\mth{(}\NT{d\_ident}\mth{)} ;} | |
596 | \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) = \NT{initialize} ;} | |
597 | \CASE{typedef \NT{ctype} \NT{typedef\_ident} ;} | |
598 | ||
599 | \RULE{\rt{one\_decl}} | |
600 | \CASE{\NT{common\_decl}} | |
601 | \CASE{\opt{\NT{storage}} \NT{ctype} \NT{id};} | |
602 | % \CASE{\NT{storage} \NT{ctype} \NT{id} \opt{[\opt{\NT{dot\\_expr}}]} = \NT{nest\\_expr};} | |
603 | \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} ;} | |
604 | ||
605 | \RULE{\rt{common\_decl}} | |
606 | \CASE{\NT{ctype};} | |
607 | \CASE{\NT{funproto}} | |
608 | \CASE{\opt{\NT{storage}} \NT{ctype} \NT{d\_ident} = \NT{initialize} ;} | |
609 | \CASE{\opt{\NT{storage}} \opt{\NT{const\_vol}} \T{id} \NT{d\_ident} = \NT{initialize} ;} | |
610 | \CASE{\opt{\NT{storage}} \NT{fn\_ctype} ( * \NT{d\_ident} ) ( \NT{PARAMSEQ}\mth{(}\NT{name\_opt\_decl}, \mth{\varepsilon)} ) ;} | |
611 | \CASE{\NT{decl\_ident} ( \OPT{\NT{COMMA\_LIST}\mth{(}\NT{expr}\mth{)}} ) ;} | |
612 | ||
613 | \RULE{\rt{initialize}} | |
614 | \CASE{\NT{dot\_expr}} | |
615 | \CASE{\ttlb~\opt{\NT{COMMA\_LIST}\mth{(}\NT{dot\_expr}\mth{)}}~\ttrb} | |
616 | ||
617 | \RULE{\rt{decl\_ident}} | |
618 | \CASE{\T{DeclarerId}} | |
619 | \CASE{\mth{\T{metaid}^{\ssf{Declarer}}}} | |
620 | \end{grammar} | |
621 | ||
622 | \section{Statements} | |
623 | ||
624 | The first rule {\em statement} describes the various forms of a statement. | |
625 | The remaining rules implement the constraints that are sensitive to the | |
626 | context in which the statement occurs: {\em single\_statement} for a | |
627 | context in which only one statement is allowed, and {\em decl\_statement} | |
628 | for a context in which a declaration, statement, or sequence thereof is | |
629 | allowed. | |
630 | ||
631 | \begin{grammar} | |
632 | \RULE{\rt{stmt}} | |
633 | \CASE{\NT{include}} | |
634 | \CASE{\mth{\T{metaid}^{\ssf{Stmt}}}} | |
635 | \CASE{\NT{expr};} | |
636 | \CASE{if (\NT{dot\_expr}) \NT{single\_stmt} \opt{else \NT{single\_stmt}}} | |
637 | \CASE{for (\opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}}; \opt{\NT{dot\_expr}}) | |
638 | \NT{single\_stmt}} | |
639 | \CASE{while (\NT{dot\_expr}) \NT{single\_stmt}} | |
640 | \CASE{do \NT{single\_stmt} while (\NT{dot\_expr});} | |
641 | \CASE{\NT{iter\_ident} (\any{\NT{dot\_expr}}) \NT{single\_stmt}} | |
642 | \CASE{switch (\opt{\NT{dot\_expr}}) \ttlb \any{\NT{case\_line}} \ttrb} | |
643 | \CASE{return \opt{\NT{dot\_expr}};} | |
644 | \CASE{\ttlb~\opt{\NT{stmt\_seq}} \ttrb} | |
645 | \CASE{\NT{NEST}\mth{(}\some{\NT{decl\_stmt}}, \NT{when}\mth{)}} | |
646 | \CASE{\NT{NEST}\mth{(}\NT{expr}, \NT{when}\mth{)}} | |
647 | \CASE{break;} | |
648 | \CASE{continue;} | |
649 | \CASE{\NT{id}:} | |
650 | \CASE{goto \NT{id};} | |
651 | \CASE{\ttlb \NT{stmt\_seq} \ttrb} | |
652 | ||
653 | \RULE{\rt{single\_stmt}} | |
654 | \CASE{\NT{stmt}} | |
655 | \CASE{\NT{OR}\mth{(}\NT{stmt}\mth{)}} | |
656 | ||
657 | \RULE{\rt{decl\_stmt}} | |
658 | \CASE{\mth{\T{metaid}^{\ssf{StmtList}}}} | |
659 | \CASE{\NT{decl\_var}} | |
660 | \CASE{\NT{stmt}} | |
661 | \CASE{\NT{OR}\mth{(}\NT{stmt\_seq}\mth{)}} | |
662 | ||
663 | \RULE{\rt{stmt\_seq}} | |
664 | \CASE{\any{\NT{decl\_stmt}} | |
665 | \opt{\NT{DOTSEQ}\mth{(}\some{\NT{decl\_stmt}}, | |
666 | \NT{when}\mth{)} \any{\NT{decl\_stmt}}}} | |
667 | \CASE{\any{\NT{decl\_stmt}} | |
668 | \opt{\NT{DOTSEQ}\mth{(}\NT{expr}, | |
669 | \NT{when}\mth{)} \any{\NT{decl\_stmt}}}} | |
670 | ||
671 | \RULE{\rt{case\_line}} | |
672 | \CASE{default :~\NT{stmt\_seq}} | |
673 | \CASE{case \NT{dot\_expr} :~\NT{stmt\_seq}} | |
674 | ||
675 | \RULE{\rt{iter\_ident}} | |
676 | \CASE{\T{IteratorId}} | |
677 | \CASE{\mth{\T{metaid}^{\ssf{Iterator}}}} | |
678 | \end{grammar} | |
679 | ||
680 | \begin{grammar} | |
681 | \RULE{\rt{OR}\mth{(}\rt{gram\_o}\mth{)}} | |
682 | \CASE{( \NT{gram\_o} \ANY{\ttmid \NT{gram\_o}})} | |
683 | ||
684 | \RULE{\rt{DOTSEQ}\mth{(}\rt{gram\_d}, \rt{when\_d}\mth{)}} | |
685 | \CASE{\ldots \opt{\NT{when\_d}} \ANY{\NT{gram\_d} \ldots \opt{\NT{when\_d}}}} | |
686 | ||
687 | \RULE{\rt{NEST}\mth{(}\rt{gram\_n}, \rt{when\_n}\mth{)}} | |
688 | \CASE{<\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots>} | |
689 | \CASE{<+\ldots \opt{\NT{when\_n}} \NT{gram\_n} \ANY{\ldots \opt{\NT{when\_n}} \NT{gram\_n}} \ldots+>} | |
690 | \end{grammar} | |
691 | ||
692 | \noindent | |
693 | OR is a macro that generates a disjunction of patterns. The three | |
694 | tokens \T{(}, \T{\ttmid}, and \T{)} must appear in the leftmost | |
695 | column, to differentiate them from the parentheses and bit-or tokens | |
696 | that can appear within expressions (and cannot appear in the leftmost | |
697 | column). These token may also be preceded by \texttt{\bs} | |
698 | when they are used in an other column. These tokens are furthermore | |
699 | different from (, \(\mid\), and ), which are part of the grammar | |
700 | metalanguage. | |
701 | ||
702 | \section{Expressions} | |
703 | ||
704 | A nest or a single ellipsis is allowed in some expression contexts, and | |
705 | causes ambiguity in others. For example, in a sequence \mtt{\ldots | |
706 | \mita{expr} \ldots}, the nonterminal \mita{expr} must be instantiated as an | |
707 | explicit C-language expression, while in an array reference, | |
708 | \mtt{\mth{\mita{expr}_1} \mtt{[} \mth{\mita{expr}_2} \mtt{]}}, the | |
709 | nonterminal \mth{\mita{expr}_2}, because it is delimited by brackets, can | |
710 | be also instantiated as \mtt{\ldots}, representing an arbitrary expression. To | |
711 | distinguish between the various possibilities, we define three nonterminals | |
712 | for expressions: {\em expr} does not allow either top-level nests or | |
713 | ellipses, {\em nest\_expr} allows a nest but not an ellipsis, and {\em | |
714 | dot\_expr} allows both. The EXPR macro is used to express these variants | |
715 | in a concise way. | |
716 | ||
717 | \begin{grammar} | |
718 | \RULE{\rt{expr}} | |
719 | \CASE{\NT{EXPR}\mth{(}\NT{expr}\mth{)}} | |
720 | ||
721 | \RULE{\rt{nest\_expr}} | |
722 | \CASE{\NT{EXPR}\mth{(}\NT{nest\_expr}\mth{)}} | |
723 | \CASE{\NT{NEST}\mth{(}\NT{nest\_expr}, \NT{exp\_whencode}\mth{)}} | |
724 | ||
725 | \RULE{\rt{dot\_expr}} | |
726 | \CASE{\NT{EXPR}\mth{(}\NT{dot\_expr}\mth{)}} | |
727 | \CASE{\NT{NEST}\mth{(}\NT{dot\_expr}, \NT{exp\_whencode}\mth{)}} | |
728 | \CASE{...~\opt{\NT{exp\_whencode}}} | |
729 | ||
730 | \RULE{\rt{EXPR}\mth{(}\rt{exp}\mth{)}} | |
731 | \CASE{\NT{exp} \NT{assign\_op} \NT{exp}} | |
732 | \CASE{\NT{exp}++} | |
733 | \CASE{\NT{exp}--} | |
734 | \CASE{\NT{unary\_op} \NT{exp}} | |
735 | \CASE{\NT{exp} \NT{bin\_op} \NT{exp}} | |
736 | \CASE{\NT{exp} ?~\NT{dot\_expr} :~\NT{exp}} | |
737 | \CASE{(\NT{type}) \NT{exp}} | |
738 | \CASE{\NT{exp} [\NT{dot\_expr}]} | |
739 | \CASE{\NT{exp} .~\NT{id}} | |
740 | \CASE{\NT{exp} -> \NT{id}} | |
741 | \CASE{\NT{exp}(\opt{\NT{PARAMSEQ}\mth{(}\NT{arg}, \NT{exp\_whencode}\mth{)}})} | |
742 | \CASE{\NT{id}} | |
743 | % \CASE{\mth{\T{metaid}^{\ssf{Func}}}} | |
744 | % \CASE{\mth{\T{metaid}^{\ssf{LocalFunc}}}} | |
745 | \CASE{\mth{\T{metaid}^{\ssf{Exp}}}} | |
746 | % \CASE{\mth{\T{metaid}^{\ssf{Err}}}} | |
747 | \CASE{\mth{\T{metaid}^{\ssf{Const}}}} | |
748 | \CASE{\NT{const}} | |
749 | \CASE{(\NT{dot\_expr})} | |
750 | \CASE{\NT{OR}\mth{(}\NT{exp}\mth{)}} | |
751 | ||
752 | \RULE{\rt{arg}} | |
753 | \CASE{\NT{nest\_expr}} | |
754 | \CASE{\mth{\T{metaid}^{\ssf{ExpList}}}} | |
755 | ||
756 | \RULE{\rt{exp\_whencode}} | |
757 | \CASE{when != \NT{expr}} | |
758 | ||
759 | \RULE{\rt{assign\_op}} | |
760 | \CASE{= \OR -= \OR += \OR *= \OR /= \OR \%=} | |
761 | \CASE{\&= \OR |= \OR \caret= \OR \lt\lt= \OR \gt\gt=} | |
762 | ||
763 | \RULE{\rt{bin\_op}} | |
764 | \CASE{* \OR / \OR \% \OR + \OR -} | |
765 | \CASE{\lt\lt \OR \gt\gt \OR \caret\xspace \OR \& \OR \ttmid} | |
766 | \CASE{< \OR > \OR <= \OR >= \OR == \OR != \OR \&\& \OR \ttmid\ttmid} | |
767 | ||
768 | \RULE{\rt{unary\_op}} | |
769 | \CASE{++ \OR -- \OR \& \OR * \OR + \OR - \OR !} | |
770 | ||
771 | \end{grammar} | |
772 | ||
773 | \section{Constant, Identifiers and Types for Transformations} | |
774 | ||
775 | \begin{grammar} | |
776 | \RULE{\rt{const}} | |
777 | \CASE{\NT{string}} | |
778 | \CASE{[0-9]+} | |
779 | \CASE{\mth{\cdots}} | |
780 | ||
781 | \RULE{\rt{string}} | |
782 | \CASE{"\any{[\^{}"]}"} | |
783 | ||
784 | \RULE{\rt{id}} | |
785 | \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Id}}}} | |
786 | ||
787 | \RULE{\rt{typedef\_ident}} | |
788 | \CASE{\T{id} \OR \mth{\T{metaid}^{\ssf{Type}}}} | |
789 | ||
790 | \RULE{\rt{type}} | |
791 | \CASE{\NT{ctype} \OR \mth{\T{metaid}^{\ssf{Type}}}} | |
792 | ||
793 | \RULE{\rt{pathToIsoFile}} | |
794 | \CASE{<.*>} | |
951c7801 C |
795 | |
796 | \RULE{\rt{regexp}} | |
797 | \CASE{"\any{[\^{}"]}"} | |
faf9a90c C |
798 | \end{grammar} |
799 | ||
faf9a90c C |
800 | |
801 | %%% Local Variables: | |
802 | %%% mode: LaTeX | |
708f4980 | 803 | %%% TeX-master: "main_grammar" |
faf9a90c C |
804 | %%% coding: latin-9 |
805 | %%% TeX-PDF-mode: t | |
806 | %%% ispell-local-dictionary: "american" | |
807 | %%% End: |