Commit | Line | Data |
---|---|---|
0efc778b | 1 | /* Profiler implementation. |
c2d7786e TM |
2 | |
3 | Copyright (C) 2012 Free Software Foundation, Inc. | |
4 | ||
5 | This file is part of GNU Emacs. | |
6 | ||
7 | GNU Emacs is free software: you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation, either version 3 of the License, or | |
10 | (at your option) any later version. | |
11 | ||
12 | GNU Emacs is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include <config.h> | |
21 | #include <stdio.h> | |
22 | #include <limits.h> | |
23 | #include <sys/time.h> | |
24 | #include <signal.h> | |
25 | #include <setjmp.h> | |
26 | #include "lisp.h" | |
27 | ||
3d80c99f | 28 | /* Logs. */ |
c2d7786e | 29 | |
3d80c99f | 30 | typedef struct Lisp_Hash_Table log_t; |
c2d7786e TM |
31 | |
32 | static Lisp_Object | |
3d80c99f SM |
33 | make_log (int heap_size, int max_stack_depth) |
34 | { | |
35 | /* We use a standard Elisp hash-table object, but we use it in | |
36 | a special way. This is OK as long as the object is not exposed | |
37 | to Elisp, i.e. until it is returned by *-profiler-log, after which | |
38 | it can't be used any more. */ | |
39 | Lisp_Object log = make_hash_table (Qequal, make_number (heap_size), | |
40 | make_float (DEFAULT_REHASH_SIZE), | |
41 | make_float (DEFAULT_REHASH_THRESHOLD), | |
42 | Qnil, Qnil, Qnil); | |
43 | struct Lisp_Hash_Table *h = XHASH_TABLE (log); | |
44 | ||
45 | /* What is special about our hash-tables is that the keys are pre-filled | |
46 | with the vectors we'll put in them. */ | |
47 | int i = ASIZE (h->key_and_value) / 2; | |
48 | while (0 < i) | |
49 | set_hash_key_slot (h, --i, | |
50 | Fmake_vector (make_number (max_stack_depth), Qnil)); | |
51 | return log; | |
c2d7786e TM |
52 | } |
53 | ||
3d80c99f | 54 | /* Evict the least used half of the hash_table. |
c2d7786e | 55 | |
3d80c99f SM |
56 | When the table is full, we have to evict someone. |
57 | The easiest and most efficient is to evict the value we're about to add | |
58 | (i.e. once the table is full, stop sampling). | |
c2d7786e | 59 | |
3d80c99f SM |
60 | We could also pick the element with the lowest count and evict it, |
61 | but finding it is O(N) and for that amount of work we get very | |
62 | little in return: for the next sample, this latest sample will have | |
63 | count==1 and will hence be a prime candidate for eviction :-( | |
c2d7786e | 64 | |
3d80c99f SM |
65 | So instead, we take O(N) time to eliminate more or less half of the |
66 | entries (the half with the lowest counts). So we get an amortized | |
67 | cost of O(1) and we get O(N) time for a new entry to grow larger | |
68 | than the other least counts before a new round of eviction. */ | |
c2d7786e | 69 | |
3d80c99f SM |
70 | static EMACS_INT approximate_median (log_t *log, |
71 | ptrdiff_t start, ptrdiff_t size) | |
c2d7786e | 72 | { |
3d80c99f SM |
73 | eassert (size > 0); |
74 | if (size < 2) | |
75 | return XINT (HASH_VALUE (log, start)); | |
76 | if (size < 3) | |
77 | /* Not an actual median, but better for our application than | |
78 | choosing either of the two numbers. */ | |
79 | return ((XINT (HASH_VALUE (log, start)) | |
80 | + XINT (HASH_VALUE (log, start + 1))) | |
81 | / 2); | |
c2d7786e | 82 | else |
c2d7786e | 83 | { |
3d80c99f SM |
84 | ptrdiff_t newsize = size / 3; |
85 | ptrdiff_t start2 = start + newsize; | |
86 | EMACS_INT i1 = approximate_median (log, start, newsize); | |
87 | EMACS_INT i2 = approximate_median (log, start2, newsize); | |
88 | EMACS_INT i3 = approximate_median (log, start2 + newsize, | |
89 | size - 2 * newsize); | |
90 | return (i1 < i2 | |
91 | ? (i2 < i3 ? i2 : (i1 < i3 ? i3 : i1)) | |
92 | : (i1 < i3 ? i1 : (i2 < i3 ? i3 : i2))); | |
c2d7786e | 93 | } |
c2d7786e TM |
94 | } |
95 | ||
3d80c99f | 96 | static void evict_lower_half (log_t *log) |
c2d7786e | 97 | { |
3d80c99f SM |
98 | ptrdiff_t size = ASIZE (log->key_and_value) / 2; |
99 | EMACS_INT median = approximate_median (log, 0, size); | |
100 | ptrdiff_t i; | |
c2d7786e | 101 | |
c2d7786e | 102 | for (i = 0; i < size; i++) |
3d80c99f SM |
103 | /* Evict not only values smaller but also values equal to the median, |
104 | so as to make sure we evict something no matter what. */ | |
105 | if (XINT (HASH_VALUE (log, i)) <= median) | |
106 | { | |
107 | Lisp_Object key = HASH_KEY (log, i); | |
108 | { /* FIXME: we could make this more efficient. */ | |
109 | Lisp_Object tmp; | |
110 | XSET_HASH_TABLE (tmp, log); /* FIXME: Use make_lisp_ptr. */ | |
111 | Fremhash (key, tmp); | |
112 | } | |
113 | eassert (EQ (log->next_free, make_number (i))); | |
114 | { | |
115 | int j; | |
116 | eassert (VECTORP (key)); | |
117 | for (j = 0; j < ASIZE (key); j++) | |
ad942b63 | 118 | ASET (key, j, Qnil); |
3d80c99f SM |
119 | } |
120 | set_hash_key_slot (log, i, key); | |
121 | } | |
c2d7786e TM |
122 | } |
123 | ||
0efc778b TM |
124 | /* Record the current backtrace in LOG. BASE is a special name for |
125 | describing which the backtrace come from. BASE can be nil. COUNT is | |
126 | a number how many times the profiler sees the backtrace at the | |
127 | time. ELAPSED is a elapsed time in millisecond that the backtrace | |
128 | took. */ | |
129 | ||
c2d7786e | 130 | static void |
3d80c99f | 131 | record_backtrace (log_t *log, size_t count) |
c2d7786e | 132 | { |
c2d7786e | 133 | struct backtrace *backlist = backtrace_list; |
3d80c99f SM |
134 | Lisp_Object backtrace; |
135 | ptrdiff_t index, i = 0; | |
136 | ptrdiff_t asize; | |
c2d7786e | 137 | |
3d80c99f | 138 | if (!INTEGERP (log->next_free)) |
611b7507 JB |
139 | /* FIXME: transfer the evicted counts to a special entry rather |
140 | than dropping them on the floor. */ | |
3d80c99f SM |
141 | evict_lower_half (log); |
142 | index = XINT (log->next_free); | |
c2d7786e | 143 | |
3d80c99f SM |
144 | /* Get a "working memory" vector. */ |
145 | backtrace = HASH_KEY (log, index); | |
146 | asize = ASIZE (backtrace); | |
12b3895d | 147 | |
0efc778b | 148 | /* Copy the backtrace contents into working memory. */ |
3d80c99f | 149 | for (; i < asize && backlist; i++, backlist = backlist->next) |
611b7507 | 150 | /* FIXME: For closures we should ignore the environment. */ |
3d80c99f | 151 | ASET (backtrace, i, *backlist->function); |
0efc778b | 152 | |
3d80c99f SM |
153 | /* Make sure that unused space of working memory is filled with nil. */ |
154 | for (; i < asize; i++) | |
155 | ASET (backtrace, i, Qnil); | |
c2d7786e | 156 | |
3d80c99f SM |
157 | { /* We basically do a `gethash+puthash' here, except that we have to be |
158 | careful to avoid memory allocation since we're in a signal | |
159 | handler, and we optimize the code to try and avoid computing the | |
160 | hash+lookup twice. See fns.c:Fputhash for reference. */ | |
161 | EMACS_UINT hash; | |
162 | ptrdiff_t j = hash_lookup (log, backtrace, &hash); | |
163 | if (j >= 0) | |
164 | set_hash_value_slot (log, j, | |
165 | make_number (count + XINT (HASH_VALUE (log, j)))); | |
166 | else | |
167 | { /* BEWARE! hash_put in general can allocate memory. | |
168 | But currently it only does that if log->next_free is nil. */ | |
169 | int j; | |
170 | eassert (!NILP (log->next_free)); | |
171 | j = hash_put (log, backtrace, make_number (count), hash); | |
172 | /* Let's make sure we've put `backtrace' right where it | |
173 | already was to start with. */ | |
174 | eassert (index == j); | |
175 | ||
176 | /* FIXME: If the hash-table is almost full, we should set | |
177 | some global flag so that some Elisp code can offload its | |
611b7507 JB |
178 | data elsewhere, so as to avoid the eviction code. |
179 | There are 2 ways to do that, AFAICT: | |
180 | - Set a flag checked in QUIT, such that QUIT can then call | |
181 | Fprofiler_cpu_log and stash the full log for later use. | |
182 | - Set a flag check in post-gc-hook, so that Elisp code can call | |
183 | profiler-cpu-log. That gives us more flexibility since that | |
184 | Elisp code can then do all kinds of fun stuff like write | |
185 | the log to disk. Or turn it right away into a call tree. | |
186 | Of course, using Elisp is generally preferable, but it may | |
187 | take longer until we get a chance to run the Elisp code, so | |
188 | there's more risk that the table will get full before we | |
189 | get there. */ | |
3d80c99f SM |
190 | } |
191 | } | |
c2d7786e | 192 | } |
c2d7786e | 193 | \f |
0efc778b | 194 | /* Sample profiler. */ |
c2d7786e | 195 | |
3a880af4 SM |
196 | /* FIXME: Add support for the CPU profiler in W32. */ |
197 | /* FIXME: the sigprof_handler suffers from race-conditions if the signal | |
198 | is delivered to a thread other than the main Emacs thread. */ | |
199 | ||
ad942b63 SM |
200 | #if defined SIGPROF && defined HAVE_SETITIMER |
201 | #define PROFILER_CPU_SUPPORT | |
202 | ||
6521894d SM |
203 | /* True if sampling profiler is running. */ |
204 | static bool profiler_cpu_running; | |
205 | ||
3d80c99f SM |
206 | static Lisp_Object cpu_log; |
207 | /* Separate counter for the time spent in the GC. */ | |
208 | static EMACS_INT cpu_gc_count; | |
0efc778b TM |
209 | |
210 | /* The current sample interval in millisecond. */ | |
211 | ||
c2d7786e TM |
212 | static int current_sample_interval; |
213 | ||
6521894d SM |
214 | /* Signal handler for sample profiler. */ |
215 | ||
216 | static void | |
3670daf7 | 217 | sigprof_handler (int signal) |
6521894d SM |
218 | { |
219 | eassert (HASH_TABLE_P (cpu_log)); | |
220 | if (backtrace_list && EQ (*backtrace_list->function, Qautomatic_gc)) | |
221 | /* Special case the time-count inside GC because the hash-table | |
222 | code is not prepared to be used while the GC is running. | |
223 | More specifically it uses ASIZE at many places where it does | |
224 | not expect the ARRAY_MARK_FLAG to be set. We could try and | |
225 | harden the hash-table code, but it doesn't seem worth the | |
226 | effort. */ | |
227 | cpu_gc_count += current_sample_interval; | |
228 | else | |
229 | record_backtrace (XHASH_TABLE (cpu_log), current_sample_interval); | |
230 | } | |
231 | ||
232 | DEFUN ("profiler-cpu-start", Fprofiler_cpu_start, Sprofiler_cpu_start, | |
c2d7786e | 233 | 1, 1, 0, |
6521894d SM |
234 | doc: /* Start or restart the cpu profiler. |
235 | The cpu profiler will take call-stack samples each SAMPLE-INTERVAL (expressed in milliseconds). | |
236 | See also `profiler-log-size' and `profiler-max-stack-depth'. */) | |
c2d7786e TM |
237 | (Lisp_Object sample_interval) |
238 | { | |
239 | struct sigaction sa; | |
240 | struct itimerval timer; | |
241 | ||
6521894d | 242 | if (profiler_cpu_running) |
c2d7786e TM |
243 | error ("Sample profiler is already running"); |
244 | ||
3d80c99f SM |
245 | if (NILP (cpu_log)) |
246 | { | |
247 | cpu_gc_count = 0; | |
6521894d | 248 | cpu_log = make_log (profiler_log_size, |
3d80c99f SM |
249 | profiler_max_stack_depth); |
250 | } | |
c2d7786e TM |
251 | |
252 | current_sample_interval = XINT (sample_interval); | |
253 | ||
3670daf7 TM |
254 | sa.sa_handler = sigprof_handler; |
255 | sa.sa_flags = SA_RESTART; | |
c2d7786e TM |
256 | sigemptyset (&sa.sa_mask); |
257 | sigaction (SIGPROF, &sa, 0); | |
258 | ||
259 | timer.it_interval.tv_sec = 0; | |
260 | timer.it_interval.tv_usec = current_sample_interval * 1000; | |
261 | timer.it_value = timer.it_interval; | |
262 | setitimer (ITIMER_PROF, &timer, 0); | |
263 | ||
234148bf | 264 | profiler_cpu_running = true; |
c2d7786e TM |
265 | |
266 | return Qt; | |
267 | } | |
268 | ||
6521894d | 269 | DEFUN ("profiler-cpu-stop", Fprofiler_cpu_stop, Sprofiler_cpu_stop, |
c2d7786e | 270 | 0, 0, 0, |
234148bf SM |
271 | doc: /* Stop the cpu profiler. The profiler log is not affected. |
272 | Return non-nil if the profiler was running. */) | |
c2d7786e TM |
273 | (void) |
274 | { | |
6521894d | 275 | if (!profiler_cpu_running) |
234148bf SM |
276 | return Qnil; |
277 | profiler_cpu_running = false; | |
c2d7786e TM |
278 | |
279 | setitimer (ITIMER_PROF, 0, 0); | |
280 | ||
281 | return Qt; | |
282 | } | |
283 | ||
6521894d SM |
284 | DEFUN ("profiler-cpu-running-p", |
285 | Fprofiler_cpu_running_p, Sprofiler_cpu_running_p, | |
c2d7786e | 286 | 0, 0, 0, |
6521894d | 287 | doc: /* Return non-nil iff cpu profiler is running. */) |
c2d7786e TM |
288 | (void) |
289 | { | |
6521894d | 290 | return profiler_cpu_running ? Qt : Qnil; |
c2d7786e TM |
291 | } |
292 | ||
6521894d | 293 | DEFUN ("profiler-cpu-log", Fprofiler_cpu_log, Sprofiler_cpu_log, |
c2d7786e | 294 | 0, 0, 0, |
6521894d SM |
295 | doc: /* Return the current cpu profiler log. |
296 | The log is a hash-table mapping backtraces to counters which represent | |
297 | the amount of time spent at those points. Every backtrace is a vector | |
298 | of functions, where the last few elements may be nil. | |
299 | Before returning, a new log is allocated for future samples. */) | |
c2d7786e TM |
300 | (void) |
301 | { | |
3d80c99f SM |
302 | Lisp_Object result = cpu_log; |
303 | /* Here we're making the log visible to Elisp , so it's not safe any | |
304 | more for our use afterwards since we can't rely on its special | |
305 | pre-allocated keys anymore. So we have to allocate a new one. */ | |
6521894d SM |
306 | cpu_log = (profiler_cpu_running |
307 | ? make_log (profiler_log_size, profiler_max_stack_depth) | |
3d80c99f SM |
308 | : Qnil); |
309 | Fputhash (Fmake_vector (make_number (1), Qautomatic_gc), | |
310 | make_number (cpu_gc_count), | |
311 | result); | |
312 | cpu_gc_count = 0; | |
c2d7786e TM |
313 | return result; |
314 | } | |
6521894d | 315 | #endif /* not defined PROFILER_CPU_SUPPORT */ |
c2d7786e | 316 | \f |
0efc778b | 317 | /* Memory profiler. */ |
c2d7786e | 318 | |
6521894d SM |
319 | /* True if memory profiler is running. */ |
320 | bool profiler_memory_running; | |
321 | ||
3d80c99f | 322 | static Lisp_Object memory_log; |
c2d7786e | 323 | |
6521894d | 324 | DEFUN ("profiler-memory-start", Fprofiler_memory_start, Sprofiler_memory_start, |
c2d7786e | 325 | 0, 0, 0, |
6521894d SM |
326 | doc: /* Start/restart the memory profiler. |
327 | The memory profiler will take samples of the call-stack whenever a new | |
328 | allocation takes place. Note that most small allocations only trigger | |
329 | the profiler occasionally. | |
330 | See also `profiler-log-size' and `profiler-max-stack-depth'. */) | |
c2d7786e TM |
331 | (void) |
332 | { | |
6521894d | 333 | if (profiler_memory_running) |
c2d7786e TM |
334 | error ("Memory profiler is already running"); |
335 | ||
3d80c99f | 336 | if (NILP (memory_log)) |
6521894d | 337 | memory_log = make_log (profiler_log_size, |
c2d7786e TM |
338 | profiler_max_stack_depth); |
339 | ||
234148bf | 340 | profiler_memory_running = true; |
c2d7786e TM |
341 | |
342 | return Qt; | |
343 | } | |
344 | ||
6521894d SM |
345 | DEFUN ("profiler-memory-stop", |
346 | Fprofiler_memory_stop, Sprofiler_memory_stop, | |
c2d7786e | 347 | 0, 0, 0, |
234148bf SM |
348 | doc: /* Stop the memory profiler. The profiler log is not affected. |
349 | Return non-nil if the profiler was running. */) | |
c2d7786e TM |
350 | (void) |
351 | { | |
6521894d | 352 | if (!profiler_memory_running) |
234148bf SM |
353 | return Qnil; |
354 | profiler_memory_running = false; | |
c2d7786e TM |
355 | return Qt; |
356 | } | |
357 | ||
6521894d SM |
358 | DEFUN ("profiler-memory-running-p", |
359 | Fprofiler_memory_running_p, Sprofiler_memory_running_p, | |
c2d7786e | 360 | 0, 0, 0, |
6521894d | 361 | doc: /* Return non-nil if memory profiler is running. */) |
c2d7786e TM |
362 | (void) |
363 | { | |
6521894d | 364 | return profiler_memory_running ? Qt : Qnil; |
c2d7786e TM |
365 | } |
366 | ||
6521894d SM |
367 | DEFUN ("profiler-memory-log", |
368 | Fprofiler_memory_log, Sprofiler_memory_log, | |
c2d7786e | 369 | 0, 0, 0, |
6521894d SM |
370 | doc: /* Return the current memory profiler log. |
371 | The log is a hash-table mapping backtraces to counters which represent | |
372 | the amount of memory allocated at those points. Every backtrace is a vector | |
373 | of functions, where the last few elements may be nil. | |
374 | Before returning, a new log is allocated for future samples. */) | |
c2d7786e TM |
375 | (void) |
376 | { | |
3d80c99f SM |
377 | Lisp_Object result = memory_log; |
378 | /* Here we're making the log visible to Elisp , so it's not safe any | |
379 | more for our use afterwards since we can't rely on its special | |
380 | pre-allocated keys anymore. So we have to allocate a new one. */ | |
6521894d SM |
381 | memory_log = (profiler_memory_running |
382 | ? make_log (profiler_log_size, profiler_max_stack_depth) | |
3d80c99f | 383 | : Qnil); |
c2d7786e TM |
384 | return result; |
385 | } | |
386 | ||
387 | \f | |
0efc778b | 388 | /* Signals and probes. */ |
c2d7786e | 389 | |
0efc778b | 390 | /* Record that the current backtrace allocated SIZE bytes. */ |
c2d7786e TM |
391 | void |
392 | malloc_probe (size_t size) | |
393 | { | |
ad942b63 SM |
394 | eassert (HASH_TABLE_P (memory_log)); |
395 | record_backtrace (XHASH_TABLE (memory_log), size); | |
c2d7786e TM |
396 | } |
397 | ||
398 | void | |
399 | syms_of_profiler (void) | |
400 | { | |
401 | DEFVAR_INT ("profiler-max-stack-depth", profiler_max_stack_depth, | |
6521894d | 402 | doc: /* Number of elements from the call-stack recorded in the log. */); |
c2d7786e | 403 | profiler_max_stack_depth = 16; |
6521894d SM |
404 | DEFVAR_INT ("profiler-log-size", profiler_log_size, |
405 | doc: /* Number of distinct call-stacks that can be recorded in a profiler log. | |
406 | If the log gets full, some of the least-seen call-stacks will be evicted | |
407 | to make room for new entries. */); | |
408 | profiler_log_size = 10000; | |
c2d7786e | 409 | |
ad942b63 | 410 | #ifdef PROFILER_CPU_SUPPORT |
234148bf | 411 | profiler_cpu_running = false; |
ad942b63 SM |
412 | cpu_log = Qnil; |
413 | staticpro (&cpu_log); | |
6521894d SM |
414 | defsubr (&Sprofiler_cpu_start); |
415 | defsubr (&Sprofiler_cpu_stop); | |
416 | defsubr (&Sprofiler_cpu_running_p); | |
417 | defsubr (&Sprofiler_cpu_log); | |
ad942b63 | 418 | #endif |
234148bf | 419 | profiler_memory_running = false; |
ad942b63 SM |
420 | memory_log = Qnil; |
421 | staticpro (&memory_log); | |
6521894d SM |
422 | defsubr (&Sprofiler_memory_start); |
423 | defsubr (&Sprofiler_memory_stop); | |
424 | defsubr (&Sprofiler_memory_running_p); | |
425 | defsubr (&Sprofiler_memory_log); | |
c2d7786e | 426 | } |