Rewrite sampler to use Elisp hash-tables.
[bpt/emacs.git] / src / profiler.c
1 /* Profiler implementation.
2
3 Copyright (C) 2012 Free Software Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
19
20 #include <config.h>
21 #include <stdio.h>
22 #include <limits.h>
23 #include <sys/time.h>
24 #include <signal.h>
25 #include <setjmp.h>
26 #include "lisp.h"
27
28 /* True if sampling profiler is running. */
29
30 bool sample_profiler_running;
31
32 /* True if memory profiler is running. */
33
34 bool memory_profiler_running;
35
36 static void sigprof_handler (int, siginfo_t *, void *);
37
38 \f
39 /* Logs. */
40
41 typedef struct Lisp_Hash_Table log_t;
42
43 static Lisp_Object
44 make_log (int heap_size, int max_stack_depth)
45 {
46 /* We use a standard Elisp hash-table object, but we use it in
47 a special way. This is OK as long as the object is not exposed
48 to Elisp, i.e. until it is returned by *-profiler-log, after which
49 it can't be used any more. */
50 Lisp_Object log = make_hash_table (Qequal, make_number (heap_size),
51 make_float (DEFAULT_REHASH_SIZE),
52 make_float (DEFAULT_REHASH_THRESHOLD),
53 Qnil, Qnil, Qnil);
54 struct Lisp_Hash_Table *h = XHASH_TABLE (log);
55
56 /* What is special about our hash-tables is that the keys are pre-filled
57 with the vectors we'll put in them. */
58 int i = ASIZE (h->key_and_value) / 2;
59 while (0 < i)
60 set_hash_key_slot (h, --i,
61 Fmake_vector (make_number (max_stack_depth), Qnil));
62 return log;
63 }
64
65 /* Evict the least used half of the hash_table.
66
67 When the table is full, we have to evict someone.
68 The easiest and most efficient is to evict the value we're about to add
69 (i.e. once the table is full, stop sampling).
70
71 We could also pick the element with the lowest count and evict it,
72 but finding it is O(N) and for that amount of work we get very
73 little in return: for the next sample, this latest sample will have
74 count==1 and will hence be a prime candidate for eviction :-(
75
76 So instead, we take O(N) time to eliminate more or less half of the
77 entries (the half with the lowest counts). So we get an amortized
78 cost of O(1) and we get O(N) time for a new entry to grow larger
79 than the other least counts before a new round of eviction. */
80
81 static EMACS_INT approximate_median (log_t *log,
82 ptrdiff_t start, ptrdiff_t size)
83 {
84 eassert (size > 0);
85 if (size < 2)
86 return XINT (HASH_VALUE (log, start));
87 if (size < 3)
88 /* Not an actual median, but better for our application than
89 choosing either of the two numbers. */
90 return ((XINT (HASH_VALUE (log, start))
91 + XINT (HASH_VALUE (log, start + 1)))
92 / 2);
93 else
94 {
95 ptrdiff_t newsize = size / 3;
96 ptrdiff_t start2 = start + newsize;
97 EMACS_INT i1 = approximate_median (log, start, newsize);
98 EMACS_INT i2 = approximate_median (log, start2, newsize);
99 EMACS_INT i3 = approximate_median (log, start2 + newsize,
100 size - 2 * newsize);
101 return (i1 < i2
102 ? (i2 < i3 ? i2 : (i1 < i3 ? i3 : i1))
103 : (i1 < i3 ? i1 : (i2 < i3 ? i3 : i2)));
104 }
105 }
106
107 static void evict_lower_half (log_t *log)
108 {
109 ptrdiff_t size = ASIZE (log->key_and_value) / 2;
110 EMACS_INT median = approximate_median (log, 0, size);
111 ptrdiff_t i;
112
113 for (i = 0; i < size; i++)
114 /* Evict not only values smaller but also values equal to the median,
115 so as to make sure we evict something no matter what. */
116 if (XINT (HASH_VALUE (log, i)) <= median)
117 {
118 Lisp_Object key = HASH_KEY (log, i);
119 { /* FIXME: we could make this more efficient. */
120 Lisp_Object tmp;
121 XSET_HASH_TABLE (tmp, log); /* FIXME: Use make_lisp_ptr. */
122 Fremhash (key, tmp);
123 }
124 eassert (EQ (log->next_free, make_number (i)));
125 {
126 int j;
127 eassert (VECTORP (key));
128 for (j = 0; j < ASIZE (key); j++)
129 ASET (key, i, Qnil);
130 }
131 set_hash_key_slot (log, i, key);
132 }
133 }
134
135 /* Record the current backtrace in LOG. BASE is a special name for
136 describing which the backtrace come from. BASE can be nil. COUNT is
137 a number how many times the profiler sees the backtrace at the
138 time. ELAPSED is a elapsed time in millisecond that the backtrace
139 took. */
140
141 static void
142 record_backtrace (log_t *log, size_t count)
143 {
144 struct backtrace *backlist = backtrace_list;
145 Lisp_Object backtrace;
146 ptrdiff_t index, i = 0;
147 ptrdiff_t asize;
148
149 if (!INTEGERP (log->next_free))
150 evict_lower_half (log);
151 index = XINT (log->next_free);
152
153 /* Get a "working memory" vector. */
154 backtrace = HASH_KEY (log, index);
155 asize = ASIZE (backtrace);
156
157 /* Copy the backtrace contents into working memory. */
158 for (; i < asize && backlist; i++, backlist = backlist->next)
159 ASET (backtrace, i, *backlist->function);
160
161 /* Make sure that unused space of working memory is filled with nil. */
162 for (; i < asize; i++)
163 ASET (backtrace, i, Qnil);
164
165 { /* We basically do a `gethash+puthash' here, except that we have to be
166 careful to avoid memory allocation since we're in a signal
167 handler, and we optimize the code to try and avoid computing the
168 hash+lookup twice. See fns.c:Fputhash for reference. */
169 EMACS_UINT hash;
170 ptrdiff_t j = hash_lookup (log, backtrace, &hash);
171 if (j >= 0)
172 set_hash_value_slot (log, j,
173 make_number (count + XINT (HASH_VALUE (log, j))));
174 else
175 { /* BEWARE! hash_put in general can allocate memory.
176 But currently it only does that if log->next_free is nil. */
177 int j;
178 eassert (!NILP (log->next_free));
179 j = hash_put (log, backtrace, make_number (count), hash);
180 /* Let's make sure we've put `backtrace' right where it
181 already was to start with. */
182 eassert (index == j);
183
184 /* FIXME: If the hash-table is almost full, we should set
185 some global flag so that some Elisp code can offload its
186 data elsewhere, so as to avoid the eviction code. */
187 }
188 }
189 }
190 \f
191 /* Sample profiler. */
192
193 static Lisp_Object cpu_log;
194 /* Separate counter for the time spent in the GC. */
195 static EMACS_INT cpu_gc_count;
196
197 /* The current sample interval in millisecond. */
198
199 static int current_sample_interval;
200
201 DEFUN ("sample-profiler-start", Fsample_profiler_start, Ssample_profiler_start,
202 1, 1, 0,
203 doc: /* Start or restart sample profiler. Sample profiler will
204 take samples each SAMPLE-INTERVAL in millisecond. See also
205 `profiler-slot-heap-size' and `profiler-max-stack-depth'. */)
206 (Lisp_Object sample_interval)
207 {
208 struct sigaction sa;
209 struct itimerval timer;
210
211 if (sample_profiler_running)
212 error ("Sample profiler is already running");
213
214 if (NILP (cpu_log))
215 {
216 cpu_gc_count = 0;
217 cpu_log = make_log (profiler_slot_heap_size,
218 profiler_max_stack_depth);
219 }
220
221 current_sample_interval = XINT (sample_interval);
222
223 sa.sa_sigaction = sigprof_handler;
224 sa.sa_flags = SA_RESTART | SA_SIGINFO;
225 sigemptyset (&sa.sa_mask);
226 sigaction (SIGPROF, &sa, 0);
227
228 timer.it_interval.tv_sec = 0;
229 timer.it_interval.tv_usec = current_sample_interval * 1000;
230 timer.it_value = timer.it_interval;
231 setitimer (ITIMER_PROF, &timer, 0);
232
233 sample_profiler_running = 1;
234
235 return Qt;
236 }
237
238 DEFUN ("sample-profiler-stop", Fsample_profiler_stop, Ssample_profiler_stop,
239 0, 0, 0,
240 doc: /* Stop sample profiler. Profiler log will be kept. */)
241 (void)
242 {
243 if (!sample_profiler_running)
244 error ("Sample profiler is not running");
245 sample_profiler_running = 0;
246
247 setitimer (ITIMER_PROF, 0, 0);
248
249 return Qt;
250 }
251
252 DEFUN ("sample-profiler-running-p",
253 Fsample_profiler_running_p, Ssample_profiler_running_p,
254 0, 0, 0,
255 doc: /* Return t if sample profiler is running. */)
256 (void)
257 {
258 return sample_profiler_running ? Qt : Qnil;
259 }
260
261 DEFUN ("sample-profiler-log",
262 Fsample_profiler_log, Ssample_profiler_log,
263 0, 0, 0,
264 doc: /* Return sample profiler log. The data is a list of
265 (sample nil TIMESTAMP SLOTS), where TIMESTAMP is a timestamp when the
266 log is collected and SLOTS is a list of slots. */)
267 (void)
268 {
269 Lisp_Object result = cpu_log;
270 /* Here we're making the log visible to Elisp , so it's not safe any
271 more for our use afterwards since we can't rely on its special
272 pre-allocated keys anymore. So we have to allocate a new one. */
273 cpu_log = (sample_profiler_running
274 ? make_log (profiler_slot_heap_size, profiler_max_stack_depth)
275 : Qnil);
276 Fputhash (Fmake_vector (make_number (1), Qautomatic_gc),
277 make_number (cpu_gc_count),
278 result);
279 cpu_gc_count = 0;
280 return result;
281 }
282
283 \f
284 /* Memory profiler. */
285
286 static Lisp_Object memory_log;
287
288 DEFUN ("memory-profiler-start", Fmemory_profiler_start, Smemory_profiler_start,
289 0, 0, 0,
290 doc: /* Start/restart memory profiler. See also
291 `profiler-slot-heap-size' and `profiler-max-stack-depth'. */)
292 (void)
293 {
294 if (memory_profiler_running)
295 error ("Memory profiler is already running");
296
297 if (NILP (memory_log))
298 memory_log = make_log (profiler_slot_heap_size,
299 profiler_max_stack_depth);
300
301 memory_profiler_running = 1;
302
303 return Qt;
304 }
305
306 DEFUN ("memory-profiler-stop",
307 Fmemory_profiler_stop, Smemory_profiler_stop,
308 0, 0, 0,
309 doc: /* Stop memory profiler. Profiler log will be kept. */)
310 (void)
311 {
312 if (!memory_profiler_running)
313 error ("Memory profiler is not running");
314 memory_profiler_running = 0;
315
316 return Qt;
317 }
318
319 DEFUN ("memory-profiler-running-p",
320 Fmemory_profiler_running_p, Smemory_profiler_running_p,
321 0, 0, 0,
322 doc: /* Return t if memory profiler is running. */)
323 (void)
324 {
325 return memory_profiler_running ? Qt : Qnil;
326 }
327
328 DEFUN ("memory-profiler-log",
329 Fmemory_profiler_log, Smemory_profiler_log,
330 0, 0, 0,
331 doc: /* Return memory profiler log. The data is a list of
332 (memory nil TIMESTAMP SLOTS), where TIMESTAMP is a timestamp when the
333 log is collected and SLOTS is a list of slots. */)
334 (void)
335 {
336 Lisp_Object result = memory_log;
337 /* Here we're making the log visible to Elisp , so it's not safe any
338 more for our use afterwards since we can't rely on its special
339 pre-allocated keys anymore. So we have to allocate a new one. */
340 memory_log = (memory_profiler_running
341 ? make_log (profiler_slot_heap_size, profiler_max_stack_depth)
342 : Qnil);
343 return result;
344 }
345
346 \f
347 /* Signals and probes. */
348
349 /* Signal handler for sample profiler. */
350
351 static void
352 sigprof_handler (int signal, siginfo_t *info, void *ctx)
353 {
354 eassert (HASH_TABLE_P (cpu_log));
355 if (backtrace_list && EQ (*backtrace_list->function, Qautomatic_gc))
356 /* Special case the time-count inside GC because the hash-table
357 code is not prepared to be used while the GC is running.
358 More specifically it uses ASIZE at many places where it does
359 not expect the ARRAY_MARK_FLAG to be set. We could try and
360 harden the hash-table code, but it doesn't seem worth the
361 effort. */
362 cpu_gc_count += current_sample_interval;
363 else
364 record_backtrace (XHASH_TABLE (cpu_log), current_sample_interval);
365 }
366
367 /* Record that the current backtrace allocated SIZE bytes. */
368 /* FIXME: Inline it everywhere! */
369 void
370 malloc_probe (size_t size)
371 {
372 if (HASH_TABLE_P (memory_log))
373 record_backtrace (XHASH_TABLE (memory_log), size);
374 }
375
376 void
377 syms_of_profiler (void)
378 {
379 DEFVAR_INT ("profiler-max-stack-depth", profiler_max_stack_depth,
380 doc: /* FIXME */);
381 profiler_max_stack_depth = 16;
382 DEFVAR_INT ("profiler-slot-heap-size", profiler_slot_heap_size,
383 doc: /* FIXME */);
384 profiler_slot_heap_size = 10000;
385
386 cpu_log = memory_log = Qnil;
387 staticpro (&cpu_log);
388 staticpro (&memory_log);
389
390 /* FIXME: Rename things to start with "profiler-", to use "cpu" instead of
391 "sample", and to make them sound like they're internal or something. */
392 defsubr (&Ssample_profiler_start);
393 defsubr (&Ssample_profiler_stop);
394 defsubr (&Ssample_profiler_running_p);
395 defsubr (&Ssample_profiler_log);
396
397 defsubr (&Smemory_profiler_start);
398 defsubr (&Smemory_profiler_stop);
399 defsubr (&Smemory_profiler_running_p);
400 defsubr (&Smemory_profiler_log);
401 }