Commit | Line | Data |
---|---|---|
faf9a90c C |
1 | |
2 | \section{Examples} | |
3 | %\label{sec:examples} | |
4 | ||
5 | This section presents a range of examples. Each | |
6 | example is presented along with some C code to which it is | |
7 | applied. The description explains the rules and the matching process. | |
8 | ||
9 | \subsection{Function renaming} | |
10 | ||
11 | One of the primary goals of Coccinelle is to perform software | |
12 | evolution. For instance, Coccinelle could be used to perform function | |
13 | renaming. In the following example, every occurrence of a call to the | |
14 | function \texttt{foo} is replaced by a call to the | |
15 | function \texttt{bar}.\\ | |
16 | ||
17 | \begin{tabular}{ccc} | |
18 | Before & Semantic patch & After \\ | |
19 | \begin{minipage}[t]{.3\linewidth} | |
20 | \begin{lstlisting} | |
21 | #DEFINE TEST "foo" | |
22 | ||
23 | printf("foo"); | |
24 | ||
25 | int main(int i) { | |
26 | //Test | |
27 | int k = foo(); | |
28 | ||
29 | if(1) { | |
30 | foo(); | |
31 | } else { | |
32 | foo(); | |
33 | } | |
34 | ||
35 | foo(); | |
36 | } | |
37 | \end{lstlisting} | |
38 | \end{minipage} | |
39 | & | |
40 | \begin{minipage}[t]{.3\linewidth} | |
41 | \begin{lstlisting}[language=Cocci] | |
42 | @M@@ | |
43 | ||
44 | @@@M | |
45 | ||
46 | ||
47 | @-- foo() | |
48 | @++ bar() | |
49 | \end{lstlisting} | |
50 | \end{minipage} | |
51 | & | |
52 | \begin{minipage}[t]{.3\linewidth} | |
53 | \begin{lstlisting} | |
54 | #DEFINE TEST "foo" | |
55 | ||
56 | printf("foo"); | |
57 | ||
58 | int main(int i) { | |
59 | //Test | |
60 | int k = bar(); | |
61 | ||
62 | if(1) { | |
63 | bar(); | |
64 | } else { | |
65 | bar(); | |
66 | } | |
67 | ||
68 | bar(); | |
69 | } | |
70 | \end{lstlisting} | |
71 | \end{minipage}\\ | |
72 | \end{tabular} | |
73 | ||
74 | \newpage | |
75 | \subsection{Removing a function argument} | |
76 | ||
77 | Another important kind of evolution is the introduction or deletion of a | |
78 | function argument. In the following example, the rule \texttt{rule1} looks | |
79 | for definitions of functions having return type \texttt{irqreturn\_t} and | |
80 | two parameters. A second \emph{anonymous} rule then looks for calls to the | |
81 | previously matched functions that have three arguments. The third argument | |
82 | is then removed to correspond to the new function prototype.\\ | |
83 | ||
84 | \begin{tabular}{c} | |
85 | \begin{lstlisting}[language=Cocci,name=arg] | |
86 | @M@ rule1 @ | |
87 | identifier fn; | |
88 | identifier irq, dev_id; | |
89 | typedef irqreturn_t; | |
90 | @@@M | |
91 | ||
92 | static irqreturn_t fn (int irq, void *dev_id) | |
93 | { | |
94 | ... | |
95 | } | |
96 | ||
97 | @M@@ | |
98 | identifier rule1.fn; | |
99 | expression E1, E2, E3; | |
100 | @@@M | |
101 | ||
102 | fn(E1, E2 | |
103 | @-- ,E3 | |
104 | ) | |
105 | \end{lstlisting}\\ | |
106 | \end{tabular} | |
107 | ||
108 | \vspace{1cm} | |
109 | ||
110 | \begin{tabular}{c} | |
111 | \texttt{drivers/atm/firestream.c} at line 1653 before transformation\\ | |
112 | \begin{lstlisting}[language=PatchC] | |
113 | static void fs_poll (unsigned long data) | |
114 | { | |
115 | struct fs_dev *dev = (struct fs_dev *) data; | |
116 | ||
117 | @- fs_irq (0, dev, NULL); | |
118 | dev->timer.expires = jiffies + FS_POLL_FREQ; | |
119 | add_timer (&dev->timer); | |
120 | } | |
121 | \end{lstlisting}\\ | |
122 | \vspace{1cm} | |
123 | \\ | |
124 | ||
125 | ||
126 | \texttt{drivers/atm/firestream.c} at line 1653 after transformation\\ | |
127 | \begin{lstlisting}[language=PatchC] | |
128 | static void fs_poll (unsigned long data) | |
129 | { | |
130 | struct fs_dev *dev = (struct fs_dev *) data; | |
131 | ||
132 | @+ fs_irq (0, dev); | |
133 | dev->timer.expires = jiffies + FS_POLL_FREQ; | |
134 | add_timer (&dev->timer); | |
135 | } | |
136 | \end{lstlisting}\\ | |
137 | \end{tabular} | |
138 | ||
139 | \newpage | |
140 | \subsection{Introduction of a macro} | |
141 | ||
142 | To avoid code duplication or error prone code, the kernel provides | |
143 | macros such as \texttt{BUG\_ON}, \texttt{DIV\_ROUND\_UP} and | |
144 | \texttt{FIELD\_SIZE}. In these cases, the semantic patches look for | |
145 | the old code pattern and replace it by the new code.\\ | |
146 | ||
147 | A semantic patch to introduce uses of the \texttt{DIV\_ROUND\_UP} macro | |
148 | looks for the corresponding expression, \emph{i.e.}, $(n + d - 1) / | |
149 | d$. When some code is matched, the metavariables \texttt{n} and \texttt{d} | |
150 | are bound to their corresponding expressions. Finally, Coccinelle rewrites | |
151 | the code with the \texttt{DIV\_ROUND\_UP} macro using the values bound to | |
152 | \texttt{n} and \texttt{d}, as illustrated in the patch that follows.\\ | |
153 | ||
154 | \begin{tabular}{c} | |
155 | Semantic patch to introduce uses of the \texttt{DIV\_ROUND\_UP} macro\\ | |
156 | \begin{lstlisting}[language=Cocci,name=divround] | |
157 | @M@ haskernel @ | |
158 | @@@M | |
159 | ||
160 | #include <linux/kernel.h> | |
161 | ||
162 | @M@ depends on haskernel @ | |
163 | expression n,d; | |
164 | @@@M | |
165 | ||
166 | ( | |
167 | @-- (((n) + (d)) - 1) / (d)) | |
168 | @++ DIV_ROUND_UP(n,d) | |
169 | | | |
170 | @-- (((n) + ((d) - 1)) / (d)) | |
171 | @++ DIV_ROUND_UP(n,d) | |
172 | ) | |
173 | \end{lstlisting} | |
174 | \end{tabular}\\ | |
175 | ||
176 | \vspace{1cm} | |
177 | ||
178 | \begin{tabular}{c} | |
179 | Example of a generated patch hunk\\ | |
180 | \begin{lstlisting}[language=PatchC] | |
181 | @---- a/drivers/atm/horizon.c | |
182 | @++++ b/drivers/atm/horizon.c | |
183 | @M@@ -698,7 +698,7 @@ got_it: | |
184 | if (bits) | |
185 | *bits = (div<<CLOCK_SELECT_SHIFT) | (pre-1); | |
186 | if (actual) { | |
187 | @-- *actual = (br + (pre<<div) - 1) / (pre<<div); | |
188 | @++ *actual = DIV_ROUND_UP(br, pre<<div); | |
189 | PRINTD (DBG_QOS, "actual rate: %u", *actual); | |
190 | } | |
191 | return 0; | |
192 | \end{lstlisting} | |
193 | \end{tabular}\\ | |
194 | ||
195 | \newpage | |
196 | ||
197 | The \texttt{BUG\_ON} macro makes a assertion about the value of an | |
198 | expression. However, because some parts of the kernel define | |
199 | \texttt{BUG\_ON} to be the empty statement when debugging is not wanted, | |
200 | care must be taken when the asserted expression may have some side-effects, | |
201 | as is the case of a function call. Thus, we create a rule introducing | |
202 | \texttt{BUG\_ON} only in the case when the asserted expression does not | |
203 | perform a function call. | |
204 | ||
205 | On particular piece of code that has the form of a function call is a use | |
206 | of \texttt{unlikely}, which informs the compiler that a particular | |
207 | expression is unlikely to be true. In this case, because \texttt{unlikely} | |
208 | does not perform any side effects, it is safe to use \texttt{BUG\_ON}. The | |
209 | second rule takes care of this case. It furthermore disables the | |
210 | isomorphism that allows a call to \texttt{unlikely} be replaced with its | |
211 | argument, as then the second rule would be the same as the first one.\\ | |
212 | ||
213 | \begin{tabular}{c} | |
214 | \begin{lstlisting}[language=Cocci,name=bugon] | |
215 | @M@@ | |
216 | expression E,f; | |
217 | @@@M | |
218 | ||
219 | ( | |
220 | if (<+... f(...) ...+>) { BUG(); } | |
221 | | | |
222 | @-- if (E) { BUG(); } | |
223 | @++ BUG_ON(E); | |
224 | ) | |
225 | ||
226 | @M@ disable unlikely @ | |
227 | expression E,f; | |
228 | @@@M | |
229 | ||
230 | ( | |
231 | if (<+... f(...) ...+>) { BUG(); } | |
232 | | | |
233 | @-- if (unlikely(E)) { BUG(); } | |
234 | @++ BUG_ON(E); | |
235 | ) | |
236 | \end{lstlisting}\\ | |
237 | \end{tabular}\\ | |
238 | ||
239 | For instance, using the semantic patch above, Coccinelle generates | |
240 | patches like the following one. | |
241 | ||
242 | \begin{tabular}{c} | |
243 | \begin{lstlisting}[language=PatchC] | |
244 | @---- a/fs/ext3/balloc.c | |
245 | @++++ b/fs/ext3/balloc.c | |
246 | @M@@ -232,8 +232,7 @@ restart: | |
247 | prev = rsv; | |
248 | } | |
249 | printk("Window map complete.\n"); | |
250 | @-- if (bad) | |
251 | @-- BUG(); | |
252 | @++ BUG_ON(bad); | |
253 | } | |
254 | #define rsv_window_dump(root, verbose) \ | |
255 | __rsv_window_dump((root), (verbose), __FUNCTION__) | |
256 | \end{lstlisting} | |
257 | \end{tabular} | |
258 | ||
259 | \newpage | |
260 | \subsection{Look for \texttt{NULL} dereference} | |
261 | ||
262 | This SmPL match looks for \texttt{NULL} dereferences. Once an | |
263 | expression has been compared to \texttt{NULL}, a dereference to this | |
264 | expression is prohibited unless the pointer variable is reassigned.\\ | |
265 | ||
266 | \begin{tabular}{c} | |
267 | Original \\ | |
268 | ||
269 | \begin{lstlisting} | |
270 | foo = kmalloc(1024); | |
271 | if (!foo) { | |
272 | printk ("Error %s", foo->here); | |
273 | return; | |
274 | } | |
275 | foo->ok = 1; | |
276 | return; | |
277 | \end{lstlisting}\\ | |
278 | \end{tabular} | |
279 | ||
280 | \vspace{1cm} | |
281 | ||
282 | \begin{tabular}{c} | |
283 | Semantic match\\ | |
284 | ||
285 | \begin{lstlisting}[language=Cocci] | |
286 | @M@@ | |
287 | expression E, E1; | |
288 | identifier f; | |
289 | statement S1,S2,S3; | |
290 | @@@M | |
291 | ||
292 | @+* if (E == NULL) | |
293 | { | |
294 | ... when != if (E == NULL) S1 else S2 | |
295 | when != E = E1 | |
296 | @+* E->f | |
297 | ... when any | |
298 | return ...; | |
299 | } | |
300 | else S3 | |
301 | \end{lstlisting}\\ | |
302 | \end{tabular} | |
303 | ||
304 | \vspace{1cm} | |
305 | ||
306 | \begin{tabular}{c} | |
307 | Matched lines\\ | |
308 | ||
309 | \begin{lstlisting}[language=PatchC] | |
310 | foo = kmalloc(1024); | |
311 | @-if (!foo) { | |
312 | @- printk ("Error %s", foo->here); | |
313 | return; | |
314 | } | |
315 | foo->ok = 1; | |
316 | return; | |
317 | \end{lstlisting}\\ | |
318 | \end{tabular} | |
319 | ||
320 | \newpage | |
321 | \subsection{Reference counter: the of\_xxx API} | |
322 | ||
323 | Coccinelle can embed Python code. Python code is used inside special | |
324 | SmPL rule annotated with \texttt{script:python}. Python rules inherit | |
325 | metavariables, such as identifier or token positions, from other SmPL | |
326 | rules. The inherited metavariables can then be manipulated by Python | |
327 | code. | |
328 | ||
329 | The following semantic match looks for a call to the | |
330 | \texttt{of\_find\_node\_by\_name} function. This call increments a | |
331 | counter which must be decremented to release the resource. Then, when | |
332 | there is no call to \texttt{of\_node\_put}, no new assignment to the | |
333 | \texttt{device\_node} variable \texttt{n} and a \texttt{return} | |
334 | statement is reached, a bug is detected and the position \texttt{p1} | |
335 | and \texttt{p2} are initialized. As the Python only depends on the | |
336 | positions \texttt{p1} and \texttt{p2}, it is evaluated. In the | |
337 | following case, some emacs Org mode data are produced. This example | |
338 | illustrates the various fields that can be accessed in the Python code from | |
339 | a position variable. | |
340 | ||
341 | \begin{tabular}{c} | |
342 | \begin{lstlisting}[language=Cocci,breaklines=true] | |
343 | @M@ r exists @ | |
344 | local idexpression struct device_node *n; | |
345 | position p1, p2; | |
346 | statement S1,S2; | |
347 | expression E,E1; | |
348 | @@@M | |
349 | ||
350 | ( | |
351 | if (!(n@p1 = of_find_node_by_name(...))) S1 | |
352 | | | |
353 | n@p1 = of_find_node_by_name(...) | |
354 | ) | |
355 | <... when != of_node_put(n) | |
356 | when != if (...) { <+... of_node_put(n) ...+> } | |
357 | when != true !n || ... | |
358 | when != n = E | |
359 | when != E = n | |
360 | if (!n || ...) S2 | |
361 | ...> | |
362 | ( | |
363 | return <+...n...+>; | |
364 | | | |
365 | return@p2 ...; | |
366 | | | |
367 | n = E1 | |
368 | | | |
369 | E1 = n | |
370 | ) | |
371 | ||
372 | @M@ script:python @ | |
373 | p1 << r.p1; | |
374 | p2 << r.p2; | |
375 | @@@M | |
376 | ||
377 | print "* TODO [[view:%s::face=ovl-face1::linb=%s::colb=%s::cole=%s][inc. counter:%s::%s]]" % (p1[0].file,p1[0].line,p1[0].column,p1[0].column_end,p1[0].file,p1[0].line) | |
378 | print "[[view:%s::face=ovl-face2::linb=%s::colb=%s::cole=%s][return]]" % (p2[0].file,p2[0].line,p2[0].column,p2[0].column_end) | |
379 | \end{lstlisting} | |
380 | \end{tabular} | |
381 | ||
382 | ||
383 | \newpage | |
384 | ||
385 | Lines 13 to 17 list a variety of constructs that should not appear | |
386 | between a call to \texttt{of\_find\_node\_by\_name} and a buggy return | |
387 | site. Examples are a call to \texttt{of\_node\_put} (line 13) and a | |
388 | transition into the then branch of a conditional testing whether | |
389 | \texttt{n} is \texttt{NULL} (line 15). Any number of conditionals | |
390 | testing whether \texttt{n} is \texttt{NULL} are allowed as indicated | |
391 | by the use of a nest \texttt{<...~~...>} to describe the path between | |
392 | the call to \texttt{of\_find\_node\_by\_name}, the return and the | |
393 | conditional in the pattern on line 18.\\ | |
394 | ||
395 | The previously semantic match has been used to generate the following | |
396 | lines. They may be edited using the emacs Org mode to navigate in the code | |
397 | from a site to another. | |
398 | ||
399 | \begin{lstlisting}[language=,breaklines=true] | |
400 | * TODO [[view:/linux-next/arch/powerpc/platforms/pseries/setup.c::face=ovl-face1::linb=236::colb=18::cole=20][inc. counter:/linux-next/arch/powerpc/platforms/pseries/setup.c::236]] | |
401 | [[view:/linux-next/arch/powerpc/platforms/pseries/setup.c::face=ovl-face2::linb=250::colb=3::cole=9][return]] | |
402 | * TODO [[view:/linux-next/arch/powerpc/platforms/pseries/setup.c::face=ovl-face1::linb=236::colb=18::cole=20][inc. counter:/linux-next/arch/powerpc/platforms/pseries/setup.c::236]] | |
403 | [[view:/linux-next/arch/powerpc/platforms/pseries/setup.c::face=ovl-face2::linb=245::colb=3::cole=9][return]] | |
404 | \end{lstlisting} | |
405 | ||
0708f913 C |
406 | Note~: Coccinelle provides some predefined Python functions, |
407 | \emph{i.e.}, \texttt{cocci.print\_main}, \texttt{cocci.print\_sec} and | |
408 | \texttt{cocci.print\_secs}. One could alternatively write the following | |
409 | SmPL rule instead of the previously presented one. | |
410 | ||
411 | \begin{tabular}{c} | |
412 | \begin{lstlisting}[language=Cocci] | |
413 | @M@ script:python @ | |
414 | p1 << r.p1; | |
415 | p2 << r.p2; | |
416 | @@@M | |
417 | ||
951c7801 C |
418 | cocci.print_main(p1) |
419 | cocci.print_sec(p2,"return") | |
0708f913 C |
420 | \end{lstlisting} |
421 | \end{tabular}\\ | |
422 | ||
423 | The function \texttt{cocci.print\_secs} is used when there is several | |
424 | positions which are matched by a single position variable and that | |
425 | every matched position should be printed. | |
426 | ||
427 | Any metavariable could be inherited in the Python code. However, | |
428 | accessible fields are not currently equally supported among them. | |
429 | ||
951c7801 C |
430 | \newpage |
431 | \subsection{Filtering identifiers, declarers or iterators with regular expression} | |
432 | ||
433 | If you consider the following SmPL file which uses the regexp functionality to | |
434 | filter the identifiers that contain, begin or end by \texttt{foo}, | |
435 | ||
436 | \begin{tabular}{c@{\hspace{2cm}}c} | |
437 | \begin{lstlisting}[language=Cocci, name=Regexp] | |
438 | @M@anyid@ | |
439 | type t; | |
440 | identifier id; | |
441 | @@@M | |
442 | t id () {...} | |
443 | ||
444 | @M@script:python@ | |
445 | x << anyid.id; | |
446 | @@@M | |
447 | print "Identifier: %s" % x | |
448 | ||
449 | @M@contains@ | |
450 | type t; | |
451 | identifier foo ~= ".*foo"; | |
452 | @@@M | |
453 | t foo () {...} | |
454 | ||
455 | @M@script:python@ | |
456 | x << contains.foo; | |
457 | @@@M | |
458 | print "Contains foo: %s" % x | |
459 | ||
460 | \end{lstlisting} | |
461 | & | |
462 | \begin{lstlisting}[language=Cocci,name=Regexp] | |
463 | @M@endsby@ | |
464 | type t; | |
465 | identifier foo ~= ".*foo$"; | |
466 | @@@M | |
467 | ||
468 | t foo () {...} | |
469 | ||
470 | @M@script:python@ | |
471 | x << endsby.foo; | |
472 | @@@M | |
473 | print "Ends by foo: %s" % x | |
474 | ||
475 | @M@beginsby@ | |
476 | type t; | |
477 | identifier foo ~= "^foo"; | |
478 | @@@M | |
479 | t foo () {...} | |
480 | ||
481 | @M@script:python@ | |
482 | x << beginsby.foo; | |
483 | @@@M | |
484 | print "Begins by foo: %s" % x | |
485 | \end{lstlisting} | |
486 | \end{tabular}\\ | |
487 | ||
488 | and the following C program, on the left, which defines the functions | |
489 | \texttt{foo}, \texttt{bar}, \texttt{foobar}, \texttt{barfoobar} and | |
490 | \texttt{barfoo}, you will get the result on the right. | |
491 | ||
492 | \begin{tabular}{c@{\hspace{2cm}}c} | |
493 | \begin{lstlisting} | |
494 | int foo () { return 0; } | |
495 | int bar () { return 0; } | |
496 | int foobar () { return 0; } | |
497 | int barfoobar () { return 0; } | |
498 | int barfoo () { return 0; } | |
499 | \end{lstlisting} | |
500 | & | |
501 | \begin{lstlisting} | |
502 | Identifier: foo | |
503 | Identifier: bar | |
504 | Identifier: foobar | |
505 | Identifier: barfoobar | |
506 | Identifier: barfoo | |
507 | Contains foo: foo | |
508 | Contains foo: foobar | |
509 | Contains foo: barfoobar | |
510 | Contains foo: barfoo | |
511 | Ends by foo: foo | |
512 | Ends by foo: barfoo | |
513 | Begins by foo: foo | |
514 | Begins by foo: foobar | |
515 | \end{lstlisting} | |
516 | \end{tabular} | |
517 | ||
faf9a90c C |
518 | % \begin{tabular}{ccc} |
519 | % Before & Semantic patch & After \\ | |
520 | % \begin{minipage}[t]{.3\linewidth} | |
521 | % \begin{lstlisting} | |
522 | % \end{lstlisting} | |
523 | % \end{minipage} | |
524 | % & | |
525 | % \begin{minipage}[t]{.3\linewidth} | |
526 | % \begin{lstlisting}[language=Cocci] | |
527 | % \end{lstlisting} | |
528 | % \end{minipage} | |
529 | % & | |
530 | % \begin{minipage}[t]{.3\linewidth} | |
531 | % \begin{lstlisting} | |
532 | % \end{lstlisting} | |
533 | % \end{minipage}\\ | |
534 | % \end{tabular} | |
535 | ||
536 | %%% Local Variables: | |
537 | %%% mode: LaTeX | |
951c7801 | 538 | %%% TeX-master: "main_grammar" |
5636bb2c | 539 | %%% coding: utf-8 |
faf9a90c C |
540 | %%% TeX-PDF-mode: t |
541 | %%% ispell-local-dictionary: "american" | |
542 | %%% End: |