Merge from emacs-24; up to 2012-12-05T00:13:56Z!yamaoka@jpl.org
[bpt/emacs.git] / src / profiler.c
1 /* Profiler implementation.
2
3 Copyright (C) 2012 Free Software Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
19
20 #include <config.h>
21 #include "lisp.h"
22 #include "syssignal.h"
23 #include "systime.h"
24
25 /* Return A + B, but return the maximum fixnum if the result would overflow.
26 Assume A and B are nonnegative and in fixnum range. */
27
28 static EMACS_INT
29 saturated_add (EMACS_INT a, EMACS_INT b)
30 {
31 return min (a + b, MOST_POSITIVE_FIXNUM);
32 }
33
34 /* Logs. */
35
36 typedef struct Lisp_Hash_Table log_t;
37
38 static Lisp_Object Qprofiler_backtrace_equal;
39 static struct hash_table_test hashtest_profiler;
40
41 static Lisp_Object
42 make_log (int heap_size, int max_stack_depth)
43 {
44 /* We use a standard Elisp hash-table object, but we use it in
45 a special way. This is OK as long as the object is not exposed
46 to Elisp, i.e. until it is returned by *-profiler-log, after which
47 it can't be used any more. */
48 Lisp_Object log = make_hash_table (hashtest_profiler,
49 make_number (heap_size),
50 make_float (DEFAULT_REHASH_SIZE),
51 make_float (DEFAULT_REHASH_THRESHOLD),
52 Qnil);
53 struct Lisp_Hash_Table *h = XHASH_TABLE (log);
54
55 /* What is special about our hash-tables is that the keys are pre-filled
56 with the vectors we'll put in them. */
57 int i = ASIZE (h->key_and_value) / 2;
58 while (0 < i)
59 set_hash_key_slot (h, --i,
60 Fmake_vector (make_number (max_stack_depth), Qnil));
61 return log;
62 }
63
64 /* Evict the least used half of the hash_table.
65
66 When the table is full, we have to evict someone.
67 The easiest and most efficient is to evict the value we're about to add
68 (i.e. once the table is full, stop sampling).
69
70 We could also pick the element with the lowest count and evict it,
71 but finding it is O(N) and for that amount of work we get very
72 little in return: for the next sample, this latest sample will have
73 count==1 and will hence be a prime candidate for eviction :-(
74
75 So instead, we take O(N) time to eliminate more or less half of the
76 entries (the half with the lowest counts). So we get an amortized
77 cost of O(1) and we get O(N) time for a new entry to grow larger
78 than the other least counts before a new round of eviction. */
79
80 static EMACS_INT approximate_median (log_t *log,
81 ptrdiff_t start, ptrdiff_t size)
82 {
83 eassert (size > 0);
84 if (size < 2)
85 return XINT (HASH_VALUE (log, start));
86 if (size < 3)
87 /* Not an actual median, but better for our application than
88 choosing either of the two numbers. */
89 return ((XINT (HASH_VALUE (log, start))
90 + XINT (HASH_VALUE (log, start + 1)))
91 / 2);
92 else
93 {
94 ptrdiff_t newsize = size / 3;
95 ptrdiff_t start2 = start + newsize;
96 EMACS_INT i1 = approximate_median (log, start, newsize);
97 EMACS_INT i2 = approximate_median (log, start2, newsize);
98 EMACS_INT i3 = approximate_median (log, start2 + newsize,
99 size - 2 * newsize);
100 return (i1 < i2
101 ? (i2 < i3 ? i2 : (i1 < i3 ? i3 : i1))
102 : (i1 < i3 ? i1 : (i2 < i3 ? i3 : i2)));
103 }
104 }
105
106 static void evict_lower_half (log_t *log)
107 {
108 ptrdiff_t size = ASIZE (log->key_and_value) / 2;
109 EMACS_INT median = approximate_median (log, 0, size);
110 ptrdiff_t i;
111
112 for (i = 0; i < size; i++)
113 /* Evict not only values smaller but also values equal to the median,
114 so as to make sure we evict something no matter what. */
115 if (XINT (HASH_VALUE (log, i)) <= median)
116 {
117 Lisp_Object key = HASH_KEY (log, i);
118 { /* FIXME: we could make this more efficient. */
119 Lisp_Object tmp;
120 XSET_HASH_TABLE (tmp, log); /* FIXME: Use make_lisp_ptr. */
121 Fremhash (key, tmp);
122 }
123 eassert (EQ (log->next_free, make_number (i)));
124 {
125 int j;
126 eassert (VECTORP (key));
127 for (j = 0; j < ASIZE (key); j++)
128 ASET (key, j, Qnil);
129 }
130 set_hash_key_slot (log, i, key);
131 }
132 }
133
134 /* Record the current backtrace in LOG. COUNT is the weight of this
135 current backtrace: interrupt counts for CPU, and the allocation
136 size for memory. */
137
138 static void
139 record_backtrace (log_t *log, EMACS_INT count)
140 {
141 struct backtrace *backlist = backtrace_list;
142 Lisp_Object backtrace;
143 ptrdiff_t index, i = 0;
144 ptrdiff_t asize;
145
146 if (!INTEGERP (log->next_free))
147 /* FIXME: transfer the evicted counts to a special entry rather
148 than dropping them on the floor. */
149 evict_lower_half (log);
150 index = XINT (log->next_free);
151
152 /* Get a "working memory" vector. */
153 backtrace = HASH_KEY (log, index);
154 asize = ASIZE (backtrace);
155
156 /* Copy the backtrace contents into working memory. */
157 for (; i < asize && backlist; i++, backlist = backlist->next)
158 /* FIXME: For closures we should ignore the environment. */
159 ASET (backtrace, i, backlist->function);
160
161 /* Make sure that unused space of working memory is filled with nil. */
162 for (; i < asize; i++)
163 ASET (backtrace, i, Qnil);
164
165 { /* We basically do a `gethash+puthash' here, except that we have to be
166 careful to avoid memory allocation since we're in a signal
167 handler, and we optimize the code to try and avoid computing the
168 hash+lookup twice. See fns.c:Fputhash for reference. */
169 EMACS_UINT hash;
170 ptrdiff_t j = hash_lookup (log, backtrace, &hash);
171 if (j >= 0)
172 {
173 EMACS_INT old_val = XINT (HASH_VALUE (log, j));
174 EMACS_INT new_val = saturated_add (old_val, count);
175 set_hash_value_slot (log, j, make_number (new_val));
176 }
177 else
178 { /* BEWARE! hash_put in general can allocate memory.
179 But currently it only does that if log->next_free is nil. */
180 int j;
181 eassert (!NILP (log->next_free));
182 j = hash_put (log, backtrace, make_number (count), hash);
183 /* Let's make sure we've put `backtrace' right where it
184 already was to start with. */
185 eassert (index == j);
186
187 /* FIXME: If the hash-table is almost full, we should set
188 some global flag so that some Elisp code can offload its
189 data elsewhere, so as to avoid the eviction code.
190 There are 2 ways to do that, AFAICT:
191 - Set a flag checked in QUIT, such that QUIT can then call
192 Fprofiler_cpu_log and stash the full log for later use.
193 - Set a flag check in post-gc-hook, so that Elisp code can call
194 profiler-cpu-log. That gives us more flexibility since that
195 Elisp code can then do all kinds of fun stuff like write
196 the log to disk. Or turn it right away into a call tree.
197 Of course, using Elisp is generally preferable, but it may
198 take longer until we get a chance to run the Elisp code, so
199 there's more risk that the table will get full before we
200 get there. */
201 }
202 }
203 }
204 \f
205 /* Sampling profiler. */
206
207 #ifdef PROFILER_CPU_SUPPORT
208
209 /* The profiler timer and whether it was properly initialized, if
210 POSIX timers are available. */
211 #ifdef HAVE_ITIMERSPEC
212 static timer_t profiler_timer;
213 static bool profiler_timer_ok;
214 #endif
215
216 /* Status of sampling profiler. */
217 static enum profiler_cpu_running
218 { NOT_RUNNING, TIMER_SETTIME_RUNNING, SETITIMER_RUNNING }
219 profiler_cpu_running;
220
221 /* Hash-table log of CPU profiler. */
222 static Lisp_Object cpu_log;
223
224 /* Separate counter for the time spent in the GC. */
225 static EMACS_INT cpu_gc_count;
226
227 /* The current sampling interval in nanoseconds. */
228 static EMACS_INT current_sampling_interval;
229
230 /* Signal handler for sampling profiler. */
231
232 static void
233 handle_profiler_signal (int signal)
234 {
235 if (backtrace_list && EQ (backtrace_list->function, Qautomatic_gc))
236 /* Special case the time-count inside GC because the hash-table
237 code is not prepared to be used while the GC is running.
238 More specifically it uses ASIZE at many places where it does
239 not expect the ARRAY_MARK_FLAG to be set. We could try and
240 harden the hash-table code, but it doesn't seem worth the
241 effort. */
242 cpu_gc_count = saturated_add (cpu_gc_count, 1);
243 else
244 {
245 EMACS_INT count = 1;
246 #ifdef HAVE_ITIMERSPEC
247 if (profiler_timer_ok)
248 {
249 int overruns = timer_getoverrun (profiler_timer);
250 eassert (0 <= overruns);
251 count += overruns;
252 }
253 #endif
254 eassert (HASH_TABLE_P (cpu_log));
255 record_backtrace (XHASH_TABLE (cpu_log), count);
256 }
257 }
258
259 static void
260 deliver_profiler_signal (int signal)
261 {
262 deliver_process_signal (signal, handle_profiler_signal);
263 }
264
265 static enum profiler_cpu_running
266 setup_cpu_timer (Lisp_Object sampling_interval)
267 {
268 struct sigaction action;
269 struct itimerval timer;
270 struct timespec interval;
271 int billion = 1000000000;
272
273 if (! RANGED_INTEGERP (1, sampling_interval,
274 (TYPE_MAXIMUM (time_t) < EMACS_INT_MAX / billion
275 ? ((EMACS_INT) TYPE_MAXIMUM (time_t) * billion
276 + (billion - 1))
277 : EMACS_INT_MAX)))
278 return NOT_RUNNING;
279
280 current_sampling_interval = XINT (sampling_interval);
281 interval = make_emacs_time (current_sampling_interval / billion,
282 current_sampling_interval % billion);
283 emacs_sigaction_init (&action, deliver_profiler_signal);
284 sigaction (SIGPROF, &action, 0);
285
286 #ifdef HAVE_ITIMERSPEC
287 if (! profiler_timer_ok)
288 {
289 /* System clocks to try, in decreasing order of desirability. */
290 static clockid_t const system_clock[] = {
291 #ifdef CLOCK_THREAD_CPUTIME_ID
292 CLOCK_THREAD_CPUTIME_ID,
293 #endif
294 #ifdef CLOCK_PROCESS_CPUTIME_ID
295 CLOCK_PROCESS_CPUTIME_ID,
296 #endif
297 #ifdef CLOCK_MONOTONIC
298 CLOCK_MONOTONIC,
299 #endif
300 CLOCK_REALTIME
301 };
302 int i;
303 struct sigevent sigev;
304 sigev.sigev_value.sival_ptr = &profiler_timer;
305 sigev.sigev_signo = SIGPROF;
306 sigev.sigev_notify = SIGEV_SIGNAL;
307
308 for (i = 0; i < sizeof system_clock / sizeof *system_clock; i++)
309 if (timer_create (system_clock[i], &sigev, &profiler_timer) == 0)
310 {
311 profiler_timer_ok = 1;
312 break;
313 }
314 }
315
316 if (profiler_timer_ok)
317 {
318 struct itimerspec ispec;
319 ispec.it_value = ispec.it_interval = interval;
320 if (timer_settime (profiler_timer, 0, &ispec, 0) == 0)
321 return TIMER_SETTIME_RUNNING;
322 }
323 #endif
324
325 #ifdef HAVE_SETITIMER
326 timer.it_value = timer.it_interval = make_timeval (interval);
327 if (setitimer (ITIMER_PROF, &timer, 0) == 0)
328 return SETITIMER_RUNNING;
329 #endif
330
331 return NOT_RUNNING;
332 }
333
334 DEFUN ("profiler-cpu-start", Fprofiler_cpu_start, Sprofiler_cpu_start,
335 1, 1, 0,
336 doc: /* Start or restart the cpu profiler.
337 It takes call-stack samples each SAMPLING-INTERVAL nanoseconds, approximately.
338 See also `profiler-log-size' and `profiler-max-stack-depth'. */)
339 (Lisp_Object sampling_interval)
340 {
341 if (profiler_cpu_running)
342 error ("CPU profiler is already running");
343
344 if (NILP (cpu_log))
345 {
346 cpu_gc_count = 0;
347 cpu_log = make_log (profiler_log_size,
348 profiler_max_stack_depth);
349 }
350
351 profiler_cpu_running = setup_cpu_timer (sampling_interval);
352 if (! profiler_cpu_running)
353 error ("Invalid sampling interval");
354
355 return Qt;
356 }
357
358 DEFUN ("profiler-cpu-stop", Fprofiler_cpu_stop, Sprofiler_cpu_stop,
359 0, 0, 0,
360 doc: /* Stop the cpu profiler. The profiler log is not affected.
361 Return non-nil if the profiler was running. */)
362 (void)
363 {
364 switch (profiler_cpu_running)
365 {
366 case NOT_RUNNING:
367 return Qnil;
368
369 #ifdef HAVE_ITIMERSPEC
370 case TIMER_SETTIME_RUNNING:
371 {
372 struct itimerspec disable;
373 memset (&disable, 0, sizeof disable);
374 timer_settime (profiler_timer, 0, &disable, 0);
375 }
376 break;
377 #endif
378
379 #ifdef HAVE_SETITIMER
380 case SETITIMER_RUNNING:
381 {
382 struct itimerval disable;
383 memset (&disable, 0, sizeof disable);
384 setitimer (ITIMER_PROF, &disable, 0);
385 }
386 break;
387 #endif
388 }
389
390 signal (SIGPROF, SIG_IGN);
391 profiler_cpu_running = NOT_RUNNING;
392 return Qt;
393 }
394
395 DEFUN ("profiler-cpu-running-p",
396 Fprofiler_cpu_running_p, Sprofiler_cpu_running_p,
397 0, 0, 0,
398 doc: /* Return non-nil iff cpu profiler is running. */)
399 (void)
400 {
401 return profiler_cpu_running ? Qt : Qnil;
402 }
403
404 DEFUN ("profiler-cpu-log", Fprofiler_cpu_log, Sprofiler_cpu_log,
405 0, 0, 0,
406 doc: /* Return the current cpu profiler log.
407 The log is a hash-table mapping backtraces to counters which represent
408 the amount of time spent at those points. Every backtrace is a vector
409 of functions, where the last few elements may be nil.
410 Before returning, a new log is allocated for future samples. */)
411 (void)
412 {
413 Lisp_Object result = cpu_log;
414 /* Here we're making the log visible to Elisp, so it's not safe any
415 more for our use afterwards since we can't rely on its special
416 pre-allocated keys anymore. So we have to allocate a new one. */
417 cpu_log = (profiler_cpu_running
418 ? make_log (profiler_log_size, profiler_max_stack_depth)
419 : Qnil);
420 Fputhash (Fmake_vector (make_number (1), Qautomatic_gc),
421 make_number (cpu_gc_count),
422 result);
423 cpu_gc_count = 0;
424 return result;
425 }
426 #endif /* PROFILER_CPU_SUPPORT */
427 \f
428 /* Memory profiler. */
429
430 /* True if memory profiler is running. */
431 bool profiler_memory_running;
432
433 static Lisp_Object memory_log;
434
435 DEFUN ("profiler-memory-start", Fprofiler_memory_start, Sprofiler_memory_start,
436 0, 0, 0,
437 doc: /* Start/restart the memory profiler.
438 The memory profiler will take samples of the call-stack whenever a new
439 allocation takes place. Note that most small allocations only trigger
440 the profiler occasionally.
441 See also `profiler-log-size' and `profiler-max-stack-depth'. */)
442 (void)
443 {
444 if (profiler_memory_running)
445 error ("Memory profiler is already running");
446
447 if (NILP (memory_log))
448 memory_log = make_log (profiler_log_size,
449 profiler_max_stack_depth);
450
451 profiler_memory_running = true;
452
453 return Qt;
454 }
455
456 DEFUN ("profiler-memory-stop",
457 Fprofiler_memory_stop, Sprofiler_memory_stop,
458 0, 0, 0,
459 doc: /* Stop the memory profiler. The profiler log is not affected.
460 Return non-nil if the profiler was running. */)
461 (void)
462 {
463 if (!profiler_memory_running)
464 return Qnil;
465 profiler_memory_running = false;
466 return Qt;
467 }
468
469 DEFUN ("profiler-memory-running-p",
470 Fprofiler_memory_running_p, Sprofiler_memory_running_p,
471 0, 0, 0,
472 doc: /* Return non-nil if memory profiler is running. */)
473 (void)
474 {
475 return profiler_memory_running ? Qt : Qnil;
476 }
477
478 DEFUN ("profiler-memory-log",
479 Fprofiler_memory_log, Sprofiler_memory_log,
480 0, 0, 0,
481 doc: /* Return the current memory profiler log.
482 The log is a hash-table mapping backtraces to counters which represent
483 the amount of memory allocated at those points. Every backtrace is a vector
484 of functions, where the last few elements may be nil.
485 Before returning, a new log is allocated for future samples. */)
486 (void)
487 {
488 Lisp_Object result = memory_log;
489 /* Here we're making the log visible to Elisp , so it's not safe any
490 more for our use afterwards since we can't rely on its special
491 pre-allocated keys anymore. So we have to allocate a new one. */
492 memory_log = (profiler_memory_running
493 ? make_log (profiler_log_size, profiler_max_stack_depth)
494 : Qnil);
495 return result;
496 }
497
498 \f
499 /* Signals and probes. */
500
501 /* Record that the current backtrace allocated SIZE bytes. */
502 void
503 malloc_probe (size_t size)
504 {
505 eassert (HASH_TABLE_P (memory_log));
506 record_backtrace (XHASH_TABLE (memory_log), min (size, MOST_POSITIVE_FIXNUM));
507 }
508
509 DEFUN ("function-equal", Ffunction_equal, Sfunction_equal, 2, 2, 0,
510 doc: /* Return non-nil if F1 and F2 come from the same source.
511 Used to determine if different closures are just different instances of
512 the same lambda expression, or are really unrelated function. */)
513 (Lisp_Object f1, Lisp_Object f2)
514 {
515 bool res;
516 if (EQ (f1, f2))
517 res = true;
518 else if (COMPILEDP (f1) && COMPILEDP (f2))
519 res = EQ (AREF (f1, COMPILED_BYTECODE), AREF (f2, COMPILED_BYTECODE));
520 else if (CONSP (f1) && CONSP (f2) && CONSP (XCDR (f1)) && CONSP (XCDR (f2))
521 && EQ (Qclosure, XCAR (f1))
522 && EQ (Qclosure, XCAR (f2)))
523 res = EQ (XCDR (XCDR (f1)), XCDR (XCDR (f2)));
524 else
525 res = false;
526 return res ? Qt : Qnil;
527 }
528
529 static bool
530 cmpfn_profiler (struct hash_table_test *t,
531 Lisp_Object bt1, Lisp_Object bt2)
532 {
533 if (VECTORP (bt1) && VECTORP (bt2))
534 {
535 ptrdiff_t i, l = ASIZE (bt1);
536 if (l != ASIZE (bt2))
537 return false;
538 for (i = 0; i < l; i++)
539 if (NILP (Ffunction_equal (AREF (bt1, i), AREF (bt2, i))))
540 return false;
541 return true;
542 }
543 else
544 return EQ (bt1, bt2);
545 }
546
547 static EMACS_UINT
548 hashfn_profiler (struct hash_table_test *ht, Lisp_Object bt)
549 {
550 if (VECTORP (bt))
551 {
552 EMACS_UINT hash = 0;
553 ptrdiff_t i, l = ASIZE (bt);
554 for (i = 0; i < l; i++)
555 {
556 Lisp_Object f = AREF (bt, i);
557 EMACS_UINT hash1
558 = (COMPILEDP (f) ? XHASH (AREF (f, COMPILED_BYTECODE))
559 : (CONSP (f) && CONSP (XCDR (f)) && EQ (Qclosure, XCAR (f)))
560 ? XHASH (XCDR (XCDR (f))) : XHASH (f));
561 hash = sxhash_combine (hash, hash1);
562 }
563 return (hash & INTMASK);
564 }
565 else
566 return XHASH (bt);
567 }
568
569 void
570 syms_of_profiler (void)
571 {
572 DEFVAR_INT ("profiler-max-stack-depth", profiler_max_stack_depth,
573 doc: /* Number of elements from the call-stack recorded in the log. */);
574 profiler_max_stack_depth = 16;
575 DEFVAR_INT ("profiler-log-size", profiler_log_size,
576 doc: /* Number of distinct call-stacks that can be recorded in a profiler log.
577 If the log gets full, some of the least-seen call-stacks will be evicted
578 to make room for new entries. */);
579 profiler_log_size = 10000;
580
581 DEFSYM (Qprofiler_backtrace_equal, "profiler-backtrace-equal");
582 {
583 struct hash_table_test test
584 = { Qprofiler_backtrace_equal, Qnil, Qnil,
585 cmpfn_profiler, hashfn_profiler };
586 hashtest_profiler = test;
587 }
588
589 defsubr (&Sfunction_equal);
590
591 #ifdef PROFILER_CPU_SUPPORT
592 profiler_cpu_running = NOT_RUNNING;
593 cpu_log = Qnil;
594 staticpro (&cpu_log);
595 defsubr (&Sprofiler_cpu_start);
596 defsubr (&Sprofiler_cpu_stop);
597 defsubr (&Sprofiler_cpu_running_p);
598 defsubr (&Sprofiler_cpu_log);
599 #endif
600 profiler_memory_running = false;
601 memory_log = Qnil;
602 staticpro (&memory_log);
603 defsubr (&Sprofiler_memory_start);
604 defsubr (&Sprofiler_memory_stop);
605 defsubr (&Sprofiler_memory_running_p);
606 defsubr (&Sprofiler_memory_log);
607 }