| 1 | |
| 2 | \section{Examples} |
| 3 | %\label{sec:examples} |
| 4 | |
| 5 | This section presents a range of examples. Each |
| 6 | example is presented along with some C code to which it is |
| 7 | applied. The description explains the rules and the matching process. |
| 8 | |
| 9 | \subsection{Function renaming} |
| 10 | |
| 11 | One of the primary goals of Coccinelle is to perform software |
| 12 | evolution. For instance, Coccinelle could be used to perform function |
| 13 | renaming. In the following example, every occurrence of a call to the |
| 14 | function \texttt{foo} is replaced by a call to the |
| 15 | function \texttt{bar}.\\ |
| 16 | |
| 17 | \begin{tabular}{ccc} |
| 18 | Before & Semantic patch & After \\ |
| 19 | \begin{minipage}[t]{.3\linewidth} |
| 20 | \begin{lstlisting} |
| 21 | #DEFINE TEST "foo" |
| 22 | |
| 23 | printf("foo"); |
| 24 | |
| 25 | int main(int i) { |
| 26 | //Test |
| 27 | int k = foo(); |
| 28 | |
| 29 | if(1) { |
| 30 | foo(); |
| 31 | } else { |
| 32 | foo(); |
| 33 | } |
| 34 | |
| 35 | foo(); |
| 36 | } |
| 37 | \end{lstlisting} |
| 38 | \end{minipage} |
| 39 | & |
| 40 | \begin{minipage}[t]{.3\linewidth} |
| 41 | \begin{lstlisting}[language=Cocci] |
| 42 | @M@@ |
| 43 | |
| 44 | @@@M |
| 45 | |
| 46 | |
| 47 | @-- foo() |
| 48 | @++ bar() |
| 49 | \end{lstlisting} |
| 50 | \end{minipage} |
| 51 | & |
| 52 | \begin{minipage}[t]{.3\linewidth} |
| 53 | \begin{lstlisting} |
| 54 | #DEFINE TEST "foo" |
| 55 | |
| 56 | printf("foo"); |
| 57 | |
| 58 | int main(int i) { |
| 59 | //Test |
| 60 | int k = bar(); |
| 61 | |
| 62 | if(1) { |
| 63 | bar(); |
| 64 | } else { |
| 65 | bar(); |
| 66 | } |
| 67 | |
| 68 | bar(); |
| 69 | } |
| 70 | \end{lstlisting} |
| 71 | \end{minipage}\\ |
| 72 | \end{tabular} |
| 73 | |
| 74 | \newpage |
| 75 | \subsection{Removing a function argument} |
| 76 | |
| 77 | Another important kind of evolution is the introduction or deletion of a |
| 78 | function argument. In the following example, the rule \texttt{rule1} looks |
| 79 | for definitions of functions having return type \texttt{irqreturn\_t} and |
| 80 | two parameters. A second \emph{anonymous} rule then looks for calls to the |
| 81 | previously matched functions that have three arguments. The third argument |
| 82 | is then removed to correspond to the new function prototype.\\ |
| 83 | |
| 84 | \begin{tabular}{c} |
| 85 | \begin{lstlisting}[language=Cocci,name=arg] |
| 86 | @M@ rule1 @ |
| 87 | identifier fn; |
| 88 | identifier irq, dev_id; |
| 89 | typedef irqreturn_t; |
| 90 | @@@M |
| 91 | |
| 92 | static irqreturn_t fn (int irq, void *dev_id) |
| 93 | { |
| 94 | ... |
| 95 | } |
| 96 | |
| 97 | @M@@ |
| 98 | identifier rule1.fn; |
| 99 | expression E1, E2, E3; |
| 100 | @@@M |
| 101 | |
| 102 | fn(E1, E2 |
| 103 | @-- ,E3 |
| 104 | ) |
| 105 | \end{lstlisting}\\ |
| 106 | \end{tabular} |
| 107 | |
| 108 | \vspace{1cm} |
| 109 | |
| 110 | \begin{tabular}{c} |
| 111 | \texttt{drivers/atm/firestream.c} at line 1653 before transformation\\ |
| 112 | \begin{lstlisting}[language=PatchC] |
| 113 | static void fs_poll (unsigned long data) |
| 114 | { |
| 115 | struct fs_dev *dev = (struct fs_dev *) data; |
| 116 | |
| 117 | @- fs_irq (0, dev, NULL); |
| 118 | dev->timer.expires = jiffies + FS_POLL_FREQ; |
| 119 | add_timer (&dev->timer); |
| 120 | } |
| 121 | \end{lstlisting}\\ |
| 122 | \vspace{1cm} |
| 123 | \\ |
| 124 | |
| 125 | |
| 126 | \texttt{drivers/atm/firestream.c} at line 1653 after transformation\\ |
| 127 | \begin{lstlisting}[language=PatchC] |
| 128 | static void fs_poll (unsigned long data) |
| 129 | { |
| 130 | struct fs_dev *dev = (struct fs_dev *) data; |
| 131 | |
| 132 | @+ fs_irq (0, dev); |
| 133 | dev->timer.expires = jiffies + FS_POLL_FREQ; |
| 134 | add_timer (&dev->timer); |
| 135 | } |
| 136 | \end{lstlisting}\\ |
| 137 | \end{tabular} |
| 138 | |
| 139 | \newpage |
| 140 | \subsection{Introduction of a macro} |
| 141 | |
| 142 | To avoid code duplication or error prone code, the kernel provides |
| 143 | macros such as \texttt{BUG\_ON}, \texttt{DIV\_ROUND\_UP} and |
| 144 | \texttt{FIELD\_SIZE}. In these cases, the semantic patches look for |
| 145 | the old code pattern and replace it by the new code.\\ |
| 146 | |
| 147 | A semantic patch to introduce uses of the \texttt{DIV\_ROUND\_UP} macro |
| 148 | looks for the corresponding expression, \emph{i.e.}, $(n + d - 1) / |
| 149 | d$. When some code is matched, the metavariables \texttt{n} and \texttt{d} |
| 150 | are bound to their corresponding expressions. Finally, Coccinelle rewrites |
| 151 | the code with the \texttt{DIV\_ROUND\_UP} macro using the values bound to |
| 152 | \texttt{n} and \texttt{d}, as illustrated in the patch that follows.\\ |
| 153 | |
| 154 | \begin{tabular}{c} |
| 155 | Semantic patch to introduce uses of the \texttt{DIV\_ROUND\_UP} macro\\ |
| 156 | \begin{lstlisting}[language=Cocci,name=divround] |
| 157 | @M@ haskernel @ |
| 158 | @@@M |
| 159 | |
| 160 | #include <linux/kernel.h> |
| 161 | |
| 162 | @M@ depends on haskernel @ |
| 163 | expression n,d; |
| 164 | @@@M |
| 165 | |
| 166 | ( |
| 167 | @-- (((n) + (d)) - 1) / (d)) |
| 168 | @++ DIV_ROUND_UP(n,d) |
| 169 | | |
| 170 | @-- (((n) + ((d) - 1)) / (d)) |
| 171 | @++ DIV_ROUND_UP(n,d) |
| 172 | ) |
| 173 | \end{lstlisting} |
| 174 | \end{tabular}\\ |
| 175 | |
| 176 | \vspace{1cm} |
| 177 | |
| 178 | \begin{tabular}{c} |
| 179 | Example of a generated patch hunk\\ |
| 180 | \begin{lstlisting}[language=PatchC] |
| 181 | @---- a/drivers/atm/horizon.c |
| 182 | @++++ b/drivers/atm/horizon.c |
| 183 | @M@@ -698,7 +698,7 @@ got_it: |
| 184 | if (bits) |
| 185 | *bits = (div<<CLOCK_SELECT_SHIFT) | (pre-1); |
| 186 | if (actual) { |
| 187 | @-- *actual = (br + (pre<<div) - 1) / (pre<<div); |
| 188 | @++ *actual = DIV_ROUND_UP(br, pre<<div); |
| 189 | PRINTD (DBG_QOS, "actual rate: %u", *actual); |
| 190 | } |
| 191 | return 0; |
| 192 | \end{lstlisting} |
| 193 | \end{tabular}\\ |
| 194 | |
| 195 | \newpage |
| 196 | |
| 197 | The \texttt{BUG\_ON} macro makes a assertion about the value of an |
| 198 | expression. However, because some parts of the kernel define |
| 199 | \texttt{BUG\_ON} to be the empty statement when debugging is not wanted, |
| 200 | care must be taken when the asserted expression may have some side-effects, |
| 201 | as is the case of a function call. Thus, we create a rule introducing |
| 202 | \texttt{BUG\_ON} only in the case when the asserted expression does not |
| 203 | perform a function call. |
| 204 | |
| 205 | On particular piece of code that has the form of a function call is a use |
| 206 | of \texttt{unlikely}, which informs the compiler that a particular |
| 207 | expression is unlikely to be true. In this case, because \texttt{unlikely} |
| 208 | does not perform any side effects, it is safe to use \texttt{BUG\_ON}. The |
| 209 | second rule takes care of this case. It furthermore disables the |
| 210 | isomorphism that allows a call to \texttt{unlikely} be replaced with its |
| 211 | argument, as then the second rule would be the same as the first one.\\ |
| 212 | |
| 213 | \begin{tabular}{c} |
| 214 | \begin{lstlisting}[language=Cocci,name=bugon] |
| 215 | @M@@ |
| 216 | expression E,f; |
| 217 | @@@M |
| 218 | |
| 219 | ( |
| 220 | if (<+... f(...) ...+>) { BUG(); } |
| 221 | | |
| 222 | @-- if (E) { BUG(); } |
| 223 | @++ BUG_ON(E); |
| 224 | ) |
| 225 | |
| 226 | @M@ disable unlikely @ |
| 227 | expression E,f; |
| 228 | @@@M |
| 229 | |
| 230 | ( |
| 231 | if (<+... f(...) ...+>) { BUG(); } |
| 232 | | |
| 233 | @-- if (unlikely(E)) { BUG(); } |
| 234 | @++ BUG_ON(E); |
| 235 | ) |
| 236 | \end{lstlisting}\\ |
| 237 | \end{tabular}\\ |
| 238 | |
| 239 | For instance, using the semantic patch above, Coccinelle generates |
| 240 | patches like the following one. |
| 241 | |
| 242 | \begin{tabular}{c} |
| 243 | \begin{lstlisting}[language=PatchC] |
| 244 | @---- a/fs/ext3/balloc.c |
| 245 | @++++ b/fs/ext3/balloc.c |
| 246 | @M@@ -232,8 +232,7 @@ restart: |
| 247 | prev = rsv; |
| 248 | } |
| 249 | printk("Window map complete.\n"); |
| 250 | @-- if (bad) |
| 251 | @-- BUG(); |
| 252 | @++ BUG_ON(bad); |
| 253 | } |
| 254 | #define rsv_window_dump(root, verbose) \ |
| 255 | __rsv_window_dump((root), (verbose), __FUNCTION__) |
| 256 | \end{lstlisting} |
| 257 | \end{tabular} |
| 258 | |
| 259 | \newpage |
| 260 | \subsection{Look for \texttt{NULL} dereference} |
| 261 | |
| 262 | This SmPL match looks for \texttt{NULL} dereferences. Once an |
| 263 | expression has been compared to \texttt{NULL}, a dereference to this |
| 264 | expression is prohibited unless the pointer variable is reassigned.\\ |
| 265 | |
| 266 | \begin{tabular}{c} |
| 267 | Original \\ |
| 268 | |
| 269 | \begin{lstlisting} |
| 270 | foo = kmalloc(1024); |
| 271 | if (!foo) { |
| 272 | printk ("Error %s", foo->here); |
| 273 | return; |
| 274 | } |
| 275 | foo->ok = 1; |
| 276 | return; |
| 277 | \end{lstlisting}\\ |
| 278 | \end{tabular} |
| 279 | |
| 280 | \vspace{1cm} |
| 281 | |
| 282 | \begin{tabular}{c} |
| 283 | Semantic match\\ |
| 284 | |
| 285 | \begin{lstlisting}[language=Cocci] |
| 286 | @M@@ |
| 287 | expression E, E1; |
| 288 | identifier f; |
| 289 | statement S1,S2,S3; |
| 290 | @@@M |
| 291 | |
| 292 | @+* if (E == NULL) |
| 293 | { |
| 294 | ... when != if (E == NULL) S1 else S2 |
| 295 | when != E = E1 |
| 296 | @+* E->f |
| 297 | ... when any |
| 298 | return ...; |
| 299 | } |
| 300 | else S3 |
| 301 | \end{lstlisting}\\ |
| 302 | \end{tabular} |
| 303 | |
| 304 | \vspace{1cm} |
| 305 | |
| 306 | \begin{tabular}{c} |
| 307 | Matched lines\\ |
| 308 | |
| 309 | \begin{lstlisting}[language=PatchC] |
| 310 | foo = kmalloc(1024); |
| 311 | @-if (!foo) { |
| 312 | @- printk ("Error %s", foo->here); |
| 313 | return; |
| 314 | } |
| 315 | foo->ok = 1; |
| 316 | return; |
| 317 | \end{lstlisting}\\ |
| 318 | \end{tabular} |
| 319 | |
| 320 | \newpage |
| 321 | \subsection{Reference counter: the of\_xxx API} |
| 322 | |
| 323 | Coccinelle can embed Python code. Python code is used inside special |
| 324 | SmPL rule annotated with \texttt{script:python}. Python rules inherit |
| 325 | metavariables, such as identifier or token positions, from other SmPL |
| 326 | rules. The inherited metavariables can then be manipulated by Python |
| 327 | code. |
| 328 | |
| 329 | The following semantic match looks for a call to the |
| 330 | \texttt{of\_find\_node\_by\_name} function. This call increments a |
| 331 | counter which must be decremented to release the resource. Then, when |
| 332 | there is no call to \texttt{of\_node\_put}, no new assignment to the |
| 333 | \texttt{device\_node} variable \texttt{n} and a \texttt{return} |
| 334 | statement is reached, a bug is detected and the position \texttt{p1} |
| 335 | and \texttt{p2} are initialized. As the Python only depends on the |
| 336 | positions \texttt{p1} and \texttt{p2}, it is evaluated. In the |
| 337 | following case, some emacs Org mode data are produced. This example |
| 338 | illustrates the various fields that can be accessed in the Python code from |
| 339 | a position variable. |
| 340 | |
| 341 | \begin{tabular}{c} |
| 342 | \begin{lstlisting}[language=Cocci,breaklines=true] |
| 343 | @M@ r exists @ |
| 344 | local idexpression struct device_node *n; |
| 345 | position p1, p2; |
| 346 | statement S1,S2; |
| 347 | expression E,E1; |
| 348 | @@@M |
| 349 | |
| 350 | ( |
| 351 | if (!(n@p1 = of_find_node_by_name(...))) S1 |
| 352 | | |
| 353 | n@p1 = of_find_node_by_name(...) |
| 354 | ) |
| 355 | <... when != of_node_put(n) |
| 356 | when != if (...) { <+... of_node_put(n) ...+> } |
| 357 | when != true !n || ... |
| 358 | when != n = E |
| 359 | when != E = n |
| 360 | if (!n || ...) S2 |
| 361 | ...> |
| 362 | ( |
| 363 | return <+...n...+>; |
| 364 | | |
| 365 | return@p2 ...; |
| 366 | | |
| 367 | n = E1 |
| 368 | | |
| 369 | E1 = n |
| 370 | ) |
| 371 | |
| 372 | @M@ script:python @ |
| 373 | p1 << r.p1; |
| 374 | p2 << r.p2; |
| 375 | @@@M |
| 376 | |
| 377 | print "* TODO [[view:%s::face=ovl-face1::linb=%s::colb=%s::cole=%s][inc. counter:%s::%s]]" % (p1[0].file,p1[0].line,p1[0].column,p1[0].column_end,p1[0].file,p1[0].line) |
| 378 | print "[[view:%s::face=ovl-face2::linb=%s::colb=%s::cole=%s][return]]" % (p2[0].file,p2[0].line,p2[0].column,p2[0].column_end) |
| 379 | \end{lstlisting} |
| 380 | \end{tabular} |
| 381 | |
| 382 | |
| 383 | \newpage |
| 384 | |
| 385 | Lines 13 to 17 list a variety of constructs that should not appear |
| 386 | between a call to \texttt{of\_find\_node\_by\_name} and a buggy return |
| 387 | site. Examples are a call to \texttt{of\_node\_put} (line 13) and a |
| 388 | transition into the then branch of a conditional testing whether |
| 389 | \texttt{n} is \texttt{NULL} (line 15). Any number of conditionals |
| 390 | testing whether \texttt{n} is \texttt{NULL} are allowed as indicated |
| 391 | by the use of a nest \texttt{<...~~...>} to describe the path between |
| 392 | the call to \texttt{of\_find\_node\_by\_name}, the return and the |
| 393 | conditional in the pattern on line 18.\\ |
| 394 | |
| 395 | The previously semantic match has been used to generate the following |
| 396 | lines. They may be edited using the emacs Org mode to navigate in the code |
| 397 | from a site to another. |
| 398 | |
| 399 | \begin{lstlisting}[language=,breaklines=true] |
| 400 | * TODO [[view:/linux-next/arch/powerpc/platforms/pseries/setup.c::face=ovl-face1::linb=236::colb=18::cole=20][inc. counter:/linux-next/arch/powerpc/platforms/pseries/setup.c::236]] |
| 401 | [[view:/linux-next/arch/powerpc/platforms/pseries/setup.c::face=ovl-face2::linb=250::colb=3::cole=9][return]] |
| 402 | * TODO [[view:/linux-next/arch/powerpc/platforms/pseries/setup.c::face=ovl-face1::linb=236::colb=18::cole=20][inc. counter:/linux-next/arch/powerpc/platforms/pseries/setup.c::236]] |
| 403 | [[view:/linux-next/arch/powerpc/platforms/pseries/setup.c::face=ovl-face2::linb=245::colb=3::cole=9][return]] |
| 404 | \end{lstlisting} |
| 405 | |
| 406 | Note~: Coccinelle provides some predefined Python functions, |
| 407 | \emph{i.e.}, \texttt{cocci.print\_main}, \texttt{cocci.print\_sec} and |
| 408 | \texttt{cocci.print\_secs}. One could alternatively write the following |
| 409 | SmPL rule instead of the previously presented one. |
| 410 | |
| 411 | \begin{tabular}{c} |
| 412 | \begin{lstlisting}[language=Cocci] |
| 413 | @M@ script:python @ |
| 414 | p1 << r.p1; |
| 415 | p2 << r.p2; |
| 416 | @@@M |
| 417 | |
| 418 | cocci.print_main("",p1) |
| 419 | cocci.print_sec("return",p2) |
| 420 | \end{lstlisting} |
| 421 | \end{tabular}\\ |
| 422 | |
| 423 | The function \texttt{cocci.print\_secs} is used when there is several |
| 424 | positions which are matched by a single position variable and that |
| 425 | every matched position should be printed. |
| 426 | |
| 427 | Any metavariable could be inherited in the Python code. However, |
| 428 | accessible fields are not currently equally supported among them. |
| 429 | |
| 430 | % \begin{tabular}{ccc} |
| 431 | % Before & Semantic patch & After \\ |
| 432 | % \begin{minipage}[t]{.3\linewidth} |
| 433 | % \begin{lstlisting} |
| 434 | % \end{lstlisting} |
| 435 | % \end{minipage} |
| 436 | % & |
| 437 | % \begin{minipage}[t]{.3\linewidth} |
| 438 | % \begin{lstlisting}[language=Cocci] |
| 439 | % \end{lstlisting} |
| 440 | % \end{minipage} |
| 441 | % & |
| 442 | % \begin{minipage}[t]{.3\linewidth} |
| 443 | % \begin{lstlisting} |
| 444 | % \end{lstlisting} |
| 445 | % \end{minipage}\\ |
| 446 | % \end{tabular} |
| 447 | |
| 448 | %%% Local Variables: |
| 449 | %%% mode: LaTeX |
| 450 | %%% TeX-master: "cocci_syntax" |
| 451 | %%% coding: latin-9 |
| 452 | %%% TeX-PDF-mode: t |
| 453 | %%% ispell-local-dictionary: "american" |
| 454 | %%% End: |