Commit | Line | Data |
---|---|---|
0efc778b | 1 | /* Profiler implementation. |
c2d7786e TM |
2 | |
3 | Copyright (C) 2012 Free Software Foundation, Inc. | |
4 | ||
5 | This file is part of GNU Emacs. | |
6 | ||
7 | GNU Emacs is free software: you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation, either version 3 of the License, or | |
10 | (at your option) any later version. | |
11 | ||
12 | GNU Emacs is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include <config.h> | |
21 | #include <stdio.h> | |
22 | #include <limits.h> | |
23 | #include <sys/time.h> | |
24 | #include <signal.h> | |
25 | #include <setjmp.h> | |
26 | #include "lisp.h" | |
704d3f45 | 27 | #include "syssignal.h" |
c2d7786e | 28 | |
3d80c99f | 29 | /* Logs. */ |
c2d7786e | 30 | |
3d80c99f | 31 | typedef struct Lisp_Hash_Table log_t; |
c2d7786e TM |
32 | |
33 | static Lisp_Object | |
3d80c99f SM |
34 | make_log (int heap_size, int max_stack_depth) |
35 | { | |
36 | /* We use a standard Elisp hash-table object, but we use it in | |
37 | a special way. This is OK as long as the object is not exposed | |
38 | to Elisp, i.e. until it is returned by *-profiler-log, after which | |
39 | it can't be used any more. */ | |
40 | Lisp_Object log = make_hash_table (Qequal, make_number (heap_size), | |
41 | make_float (DEFAULT_REHASH_SIZE), | |
42 | make_float (DEFAULT_REHASH_THRESHOLD), | |
43 | Qnil, Qnil, Qnil); | |
44 | struct Lisp_Hash_Table *h = XHASH_TABLE (log); | |
45 | ||
46 | /* What is special about our hash-tables is that the keys are pre-filled | |
47 | with the vectors we'll put in them. */ | |
48 | int i = ASIZE (h->key_and_value) / 2; | |
49 | while (0 < i) | |
50 | set_hash_key_slot (h, --i, | |
51 | Fmake_vector (make_number (max_stack_depth), Qnil)); | |
52 | return log; | |
c2d7786e TM |
53 | } |
54 | ||
3d80c99f | 55 | /* Evict the least used half of the hash_table. |
c2d7786e | 56 | |
3d80c99f SM |
57 | When the table is full, we have to evict someone. |
58 | The easiest and most efficient is to evict the value we're about to add | |
59 | (i.e. once the table is full, stop sampling). | |
c2d7786e | 60 | |
3d80c99f SM |
61 | We could also pick the element with the lowest count and evict it, |
62 | but finding it is O(N) and for that amount of work we get very | |
63 | little in return: for the next sample, this latest sample will have | |
64 | count==1 and will hence be a prime candidate for eviction :-( | |
c2d7786e | 65 | |
3d80c99f SM |
66 | So instead, we take O(N) time to eliminate more or less half of the |
67 | entries (the half with the lowest counts). So we get an amortized | |
68 | cost of O(1) and we get O(N) time for a new entry to grow larger | |
69 | than the other least counts before a new round of eviction. */ | |
c2d7786e | 70 | |
3d80c99f SM |
71 | static EMACS_INT approximate_median (log_t *log, |
72 | ptrdiff_t start, ptrdiff_t size) | |
c2d7786e | 73 | { |
3d80c99f SM |
74 | eassert (size > 0); |
75 | if (size < 2) | |
76 | return XINT (HASH_VALUE (log, start)); | |
77 | if (size < 3) | |
78 | /* Not an actual median, but better for our application than | |
79 | choosing either of the two numbers. */ | |
80 | return ((XINT (HASH_VALUE (log, start)) | |
81 | + XINT (HASH_VALUE (log, start + 1))) | |
82 | / 2); | |
c2d7786e | 83 | else |
c2d7786e | 84 | { |
3d80c99f SM |
85 | ptrdiff_t newsize = size / 3; |
86 | ptrdiff_t start2 = start + newsize; | |
87 | EMACS_INT i1 = approximate_median (log, start, newsize); | |
88 | EMACS_INT i2 = approximate_median (log, start2, newsize); | |
89 | EMACS_INT i3 = approximate_median (log, start2 + newsize, | |
90 | size - 2 * newsize); | |
91 | return (i1 < i2 | |
92 | ? (i2 < i3 ? i2 : (i1 < i3 ? i3 : i1)) | |
93 | : (i1 < i3 ? i1 : (i2 < i3 ? i3 : i2))); | |
c2d7786e | 94 | } |
c2d7786e TM |
95 | } |
96 | ||
3d80c99f | 97 | static void evict_lower_half (log_t *log) |
c2d7786e | 98 | { |
3d80c99f SM |
99 | ptrdiff_t size = ASIZE (log->key_and_value) / 2; |
100 | EMACS_INT median = approximate_median (log, 0, size); | |
101 | ptrdiff_t i; | |
c2d7786e | 102 | |
c2d7786e | 103 | for (i = 0; i < size; i++) |
3d80c99f SM |
104 | /* Evict not only values smaller but also values equal to the median, |
105 | so as to make sure we evict something no matter what. */ | |
106 | if (XINT (HASH_VALUE (log, i)) <= median) | |
107 | { | |
108 | Lisp_Object key = HASH_KEY (log, i); | |
109 | { /* FIXME: we could make this more efficient. */ | |
110 | Lisp_Object tmp; | |
111 | XSET_HASH_TABLE (tmp, log); /* FIXME: Use make_lisp_ptr. */ | |
112 | Fremhash (key, tmp); | |
113 | } | |
114 | eassert (EQ (log->next_free, make_number (i))); | |
115 | { | |
116 | int j; | |
117 | eassert (VECTORP (key)); | |
118 | for (j = 0; j < ASIZE (key); j++) | |
ad942b63 | 119 | ASET (key, j, Qnil); |
3d80c99f SM |
120 | } |
121 | set_hash_key_slot (log, i, key); | |
122 | } | |
c2d7786e TM |
123 | } |
124 | ||
0efc778b TM |
125 | /* Record the current backtrace in LOG. BASE is a special name for |
126 | describing which the backtrace come from. BASE can be nil. COUNT is | |
127 | a number how many times the profiler sees the backtrace at the | |
128 | time. ELAPSED is a elapsed time in millisecond that the backtrace | |
129 | took. */ | |
130 | ||
c2d7786e | 131 | static void |
3d80c99f | 132 | record_backtrace (log_t *log, size_t count) |
c2d7786e | 133 | { |
c2d7786e | 134 | struct backtrace *backlist = backtrace_list; |
3d80c99f SM |
135 | Lisp_Object backtrace; |
136 | ptrdiff_t index, i = 0; | |
137 | ptrdiff_t asize; | |
c2d7786e | 138 | |
3d80c99f | 139 | if (!INTEGERP (log->next_free)) |
611b7507 JB |
140 | /* FIXME: transfer the evicted counts to a special entry rather |
141 | than dropping them on the floor. */ | |
3d80c99f SM |
142 | evict_lower_half (log); |
143 | index = XINT (log->next_free); | |
c2d7786e | 144 | |
3d80c99f SM |
145 | /* Get a "working memory" vector. */ |
146 | backtrace = HASH_KEY (log, index); | |
147 | asize = ASIZE (backtrace); | |
12b3895d | 148 | |
0efc778b | 149 | /* Copy the backtrace contents into working memory. */ |
3d80c99f | 150 | for (; i < asize && backlist; i++, backlist = backlist->next) |
611b7507 | 151 | /* FIXME: For closures we should ignore the environment. */ |
e7c1b6ef | 152 | ASET (backtrace, i, backlist->function); |
0efc778b | 153 | |
3d80c99f SM |
154 | /* Make sure that unused space of working memory is filled with nil. */ |
155 | for (; i < asize; i++) | |
156 | ASET (backtrace, i, Qnil); | |
c2d7786e | 157 | |
3d80c99f SM |
158 | { /* We basically do a `gethash+puthash' here, except that we have to be |
159 | careful to avoid memory allocation since we're in a signal | |
160 | handler, and we optimize the code to try and avoid computing the | |
161 | hash+lookup twice. See fns.c:Fputhash for reference. */ | |
162 | EMACS_UINT hash; | |
163 | ptrdiff_t j = hash_lookup (log, backtrace, &hash); | |
164 | if (j >= 0) | |
165 | set_hash_value_slot (log, j, | |
166 | make_number (count + XINT (HASH_VALUE (log, j)))); | |
167 | else | |
168 | { /* BEWARE! hash_put in general can allocate memory. | |
169 | But currently it only does that if log->next_free is nil. */ | |
170 | int j; | |
171 | eassert (!NILP (log->next_free)); | |
172 | j = hash_put (log, backtrace, make_number (count), hash); | |
173 | /* Let's make sure we've put `backtrace' right where it | |
174 | already was to start with. */ | |
175 | eassert (index == j); | |
176 | ||
177 | /* FIXME: If the hash-table is almost full, we should set | |
178 | some global flag so that some Elisp code can offload its | |
611b7507 JB |
179 | data elsewhere, so as to avoid the eviction code. |
180 | There are 2 ways to do that, AFAICT: | |
181 | - Set a flag checked in QUIT, such that QUIT can then call | |
182 | Fprofiler_cpu_log and stash the full log for later use. | |
183 | - Set a flag check in post-gc-hook, so that Elisp code can call | |
184 | profiler-cpu-log. That gives us more flexibility since that | |
185 | Elisp code can then do all kinds of fun stuff like write | |
186 | the log to disk. Or turn it right away into a call tree. | |
187 | Of course, using Elisp is generally preferable, but it may | |
188 | take longer until we get a chance to run the Elisp code, so | |
189 | there's more risk that the table will get full before we | |
190 | get there. */ | |
3d80c99f SM |
191 | } |
192 | } | |
c2d7786e | 193 | } |
c2d7786e | 194 | \f |
0efc778b | 195 | /* Sample profiler. */ |
c2d7786e | 196 | |
3a880af4 SM |
197 | /* FIXME: Add support for the CPU profiler in W32. */ |
198 | /* FIXME: the sigprof_handler suffers from race-conditions if the signal | |
199 | is delivered to a thread other than the main Emacs thread. */ | |
200 | ||
ad942b63 SM |
201 | #if defined SIGPROF && defined HAVE_SETITIMER |
202 | #define PROFILER_CPU_SUPPORT | |
203 | ||
6521894d SM |
204 | /* True if sampling profiler is running. */ |
205 | static bool profiler_cpu_running; | |
206 | ||
3d80c99f SM |
207 | static Lisp_Object cpu_log; |
208 | /* Separate counter for the time spent in the GC. */ | |
209 | static EMACS_INT cpu_gc_count; | |
0efc778b TM |
210 | |
211 | /* The current sample interval in millisecond. */ | |
212 | ||
c2d7786e TM |
213 | static int current_sample_interval; |
214 | ||
6521894d SM |
215 | /* Signal handler for sample profiler. */ |
216 | ||
217 | static void | |
704d3f45 | 218 | sigprof_handler_1 (int signal) |
6521894d SM |
219 | { |
220 | eassert (HASH_TABLE_P (cpu_log)); | |
e7c1b6ef | 221 | if (backtrace_list && EQ (backtrace_list->function, Qautomatic_gc)) |
6521894d SM |
222 | /* Special case the time-count inside GC because the hash-table |
223 | code is not prepared to be used while the GC is running. | |
224 | More specifically it uses ASIZE at many places where it does | |
225 | not expect the ARRAY_MARK_FLAG to be set. We could try and | |
226 | harden the hash-table code, but it doesn't seem worth the | |
227 | effort. */ | |
228 | cpu_gc_count += current_sample_interval; | |
229 | else | |
230 | record_backtrace (XHASH_TABLE (cpu_log), current_sample_interval); | |
231 | } | |
232 | ||
704d3f45 TM |
233 | static void |
234 | sigprof_handler (int signal) | |
235 | { | |
236 | deliver_process_signal (signal, sigprof_handler_1); | |
237 | } | |
238 | ||
6521894d | 239 | DEFUN ("profiler-cpu-start", Fprofiler_cpu_start, Sprofiler_cpu_start, |
c2d7786e | 240 | 1, 1, 0, |
6521894d SM |
241 | doc: /* Start or restart the cpu profiler. |
242 | The cpu profiler will take call-stack samples each SAMPLE-INTERVAL (expressed in milliseconds). | |
243 | See also `profiler-log-size' and `profiler-max-stack-depth'. */) | |
c2d7786e TM |
244 | (Lisp_Object sample_interval) |
245 | { | |
246 | struct sigaction sa; | |
247 | struct itimerval timer; | |
248 | ||
6521894d | 249 | if (profiler_cpu_running) |
c2d7786e TM |
250 | error ("Sample profiler is already running"); |
251 | ||
3d80c99f SM |
252 | if (NILP (cpu_log)) |
253 | { | |
254 | cpu_gc_count = 0; | |
6521894d | 255 | cpu_log = make_log (profiler_log_size, |
3d80c99f SM |
256 | profiler_max_stack_depth); |
257 | } | |
c2d7786e TM |
258 | |
259 | current_sample_interval = XINT (sample_interval); | |
260 | ||
3670daf7 TM |
261 | sa.sa_handler = sigprof_handler; |
262 | sa.sa_flags = SA_RESTART; | |
c2d7786e TM |
263 | sigemptyset (&sa.sa_mask); |
264 | sigaction (SIGPROF, &sa, 0); | |
265 | ||
266 | timer.it_interval.tv_sec = 0; | |
267 | timer.it_interval.tv_usec = current_sample_interval * 1000; | |
268 | timer.it_value = timer.it_interval; | |
269 | setitimer (ITIMER_PROF, &timer, 0); | |
270 | ||
234148bf | 271 | profiler_cpu_running = true; |
c2d7786e TM |
272 | |
273 | return Qt; | |
274 | } | |
275 | ||
6521894d | 276 | DEFUN ("profiler-cpu-stop", Fprofiler_cpu_stop, Sprofiler_cpu_stop, |
c2d7786e | 277 | 0, 0, 0, |
234148bf SM |
278 | doc: /* Stop the cpu profiler. The profiler log is not affected. |
279 | Return non-nil if the profiler was running. */) | |
c2d7786e TM |
280 | (void) |
281 | { | |
6521894d | 282 | if (!profiler_cpu_running) |
234148bf SM |
283 | return Qnil; |
284 | profiler_cpu_running = false; | |
c2d7786e TM |
285 | |
286 | setitimer (ITIMER_PROF, 0, 0); | |
287 | ||
288 | return Qt; | |
289 | } | |
290 | ||
6521894d SM |
291 | DEFUN ("profiler-cpu-running-p", |
292 | Fprofiler_cpu_running_p, Sprofiler_cpu_running_p, | |
c2d7786e | 293 | 0, 0, 0, |
6521894d | 294 | doc: /* Return non-nil iff cpu profiler is running. */) |
c2d7786e TM |
295 | (void) |
296 | { | |
6521894d | 297 | return profiler_cpu_running ? Qt : Qnil; |
c2d7786e TM |
298 | } |
299 | ||
6521894d | 300 | DEFUN ("profiler-cpu-log", Fprofiler_cpu_log, Sprofiler_cpu_log, |
c2d7786e | 301 | 0, 0, 0, |
6521894d SM |
302 | doc: /* Return the current cpu profiler log. |
303 | The log is a hash-table mapping backtraces to counters which represent | |
304 | the amount of time spent at those points. Every backtrace is a vector | |
305 | of functions, where the last few elements may be nil. | |
306 | Before returning, a new log is allocated for future samples. */) | |
c2d7786e TM |
307 | (void) |
308 | { | |
3d80c99f SM |
309 | Lisp_Object result = cpu_log; |
310 | /* Here we're making the log visible to Elisp , so it's not safe any | |
311 | more for our use afterwards since we can't rely on its special | |
312 | pre-allocated keys anymore. So we have to allocate a new one. */ | |
6521894d SM |
313 | cpu_log = (profiler_cpu_running |
314 | ? make_log (profiler_log_size, profiler_max_stack_depth) | |
3d80c99f SM |
315 | : Qnil); |
316 | Fputhash (Fmake_vector (make_number (1), Qautomatic_gc), | |
317 | make_number (cpu_gc_count), | |
318 | result); | |
319 | cpu_gc_count = 0; | |
c2d7786e TM |
320 | return result; |
321 | } | |
6521894d | 322 | #endif /* not defined PROFILER_CPU_SUPPORT */ |
c2d7786e | 323 | \f |
0efc778b | 324 | /* Memory profiler. */ |
c2d7786e | 325 | |
6521894d SM |
326 | /* True if memory profiler is running. */ |
327 | bool profiler_memory_running; | |
328 | ||
3d80c99f | 329 | static Lisp_Object memory_log; |
c2d7786e | 330 | |
6521894d | 331 | DEFUN ("profiler-memory-start", Fprofiler_memory_start, Sprofiler_memory_start, |
c2d7786e | 332 | 0, 0, 0, |
6521894d SM |
333 | doc: /* Start/restart the memory profiler. |
334 | The memory profiler will take samples of the call-stack whenever a new | |
335 | allocation takes place. Note that most small allocations only trigger | |
336 | the profiler occasionally. | |
337 | See also `profiler-log-size' and `profiler-max-stack-depth'. */) | |
c2d7786e TM |
338 | (void) |
339 | { | |
6521894d | 340 | if (profiler_memory_running) |
c2d7786e TM |
341 | error ("Memory profiler is already running"); |
342 | ||
3d80c99f | 343 | if (NILP (memory_log)) |
6521894d | 344 | memory_log = make_log (profiler_log_size, |
c2d7786e TM |
345 | profiler_max_stack_depth); |
346 | ||
234148bf | 347 | profiler_memory_running = true; |
c2d7786e TM |
348 | |
349 | return Qt; | |
350 | } | |
351 | ||
6521894d SM |
352 | DEFUN ("profiler-memory-stop", |
353 | Fprofiler_memory_stop, Sprofiler_memory_stop, | |
c2d7786e | 354 | 0, 0, 0, |
234148bf SM |
355 | doc: /* Stop the memory profiler. The profiler log is not affected. |
356 | Return non-nil if the profiler was running. */) | |
c2d7786e TM |
357 | (void) |
358 | { | |
6521894d | 359 | if (!profiler_memory_running) |
234148bf SM |
360 | return Qnil; |
361 | profiler_memory_running = false; | |
c2d7786e TM |
362 | return Qt; |
363 | } | |
364 | ||
6521894d SM |
365 | DEFUN ("profiler-memory-running-p", |
366 | Fprofiler_memory_running_p, Sprofiler_memory_running_p, | |
c2d7786e | 367 | 0, 0, 0, |
6521894d | 368 | doc: /* Return non-nil if memory profiler is running. */) |
c2d7786e TM |
369 | (void) |
370 | { | |
6521894d | 371 | return profiler_memory_running ? Qt : Qnil; |
c2d7786e TM |
372 | } |
373 | ||
6521894d SM |
374 | DEFUN ("profiler-memory-log", |
375 | Fprofiler_memory_log, Sprofiler_memory_log, | |
c2d7786e | 376 | 0, 0, 0, |
6521894d SM |
377 | doc: /* Return the current memory profiler log. |
378 | The log is a hash-table mapping backtraces to counters which represent | |
379 | the amount of memory allocated at those points. Every backtrace is a vector | |
380 | of functions, where the last few elements may be nil. | |
381 | Before returning, a new log is allocated for future samples. */) | |
c2d7786e TM |
382 | (void) |
383 | { | |
3d80c99f SM |
384 | Lisp_Object result = memory_log; |
385 | /* Here we're making the log visible to Elisp , so it's not safe any | |
386 | more for our use afterwards since we can't rely on its special | |
387 | pre-allocated keys anymore. So we have to allocate a new one. */ | |
6521894d SM |
388 | memory_log = (profiler_memory_running |
389 | ? make_log (profiler_log_size, profiler_max_stack_depth) | |
3d80c99f | 390 | : Qnil); |
c2d7786e TM |
391 | return result; |
392 | } | |
393 | ||
394 | \f | |
0efc778b | 395 | /* Signals and probes. */ |
c2d7786e | 396 | |
0efc778b | 397 | /* Record that the current backtrace allocated SIZE bytes. */ |
c2d7786e TM |
398 | void |
399 | malloc_probe (size_t size) | |
400 | { | |
ad942b63 SM |
401 | eassert (HASH_TABLE_P (memory_log)); |
402 | record_backtrace (XHASH_TABLE (memory_log), size); | |
c2d7786e TM |
403 | } |
404 | ||
405 | void | |
406 | syms_of_profiler (void) | |
407 | { | |
408 | DEFVAR_INT ("profiler-max-stack-depth", profiler_max_stack_depth, | |
6521894d | 409 | doc: /* Number of elements from the call-stack recorded in the log. */); |
c2d7786e | 410 | profiler_max_stack_depth = 16; |
6521894d SM |
411 | DEFVAR_INT ("profiler-log-size", profiler_log_size, |
412 | doc: /* Number of distinct call-stacks that can be recorded in a profiler log. | |
413 | If the log gets full, some of the least-seen call-stacks will be evicted | |
414 | to make room for new entries. */); | |
415 | profiler_log_size = 10000; | |
c2d7786e | 416 | |
ad942b63 | 417 | #ifdef PROFILER_CPU_SUPPORT |
234148bf | 418 | profiler_cpu_running = false; |
ad942b63 SM |
419 | cpu_log = Qnil; |
420 | staticpro (&cpu_log); | |
6521894d SM |
421 | defsubr (&Sprofiler_cpu_start); |
422 | defsubr (&Sprofiler_cpu_stop); | |
423 | defsubr (&Sprofiler_cpu_running_p); | |
424 | defsubr (&Sprofiler_cpu_log); | |
ad942b63 | 425 | #endif |
234148bf | 426 | profiler_memory_running = false; |
ad942b63 SM |
427 | memory_log = Qnil; |
428 | staticpro (&memory_log); | |
6521894d SM |
429 | defsubr (&Sprofiler_memory_start); |
430 | defsubr (&Sprofiler_memory_stop); | |
431 | defsubr (&Sprofiler_memory_running_p); | |
432 | defsubr (&Sprofiler_memory_log); | |
c2d7786e | 433 | } |