Commit | Line | Data |
---|---|---|
0efc778b | 1 | /* Profiler implementation. |
c2d7786e TM |
2 | |
3 | Copyright (C) 2012 Free Software Foundation, Inc. | |
4 | ||
5 | This file is part of GNU Emacs. | |
6 | ||
7 | GNU Emacs is free software: you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation, either version 3 of the License, or | |
10 | (at your option) any later version. | |
11 | ||
12 | GNU Emacs is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include <config.h> | |
21 | #include <stdio.h> | |
22 | #include <limits.h> | |
23 | #include <sys/time.h> | |
24 | #include <signal.h> | |
25 | #include <setjmp.h> | |
26 | #include "lisp.h" | |
27 | ||
0efc778b TM |
28 | /* True if sampling profiler is running. */ |
29 | ||
30 | bool sample_profiler_running; | |
31 | ||
32 | /* True if memory profiler is running. */ | |
33 | ||
34 | bool memory_profiler_running; | |
35 | ||
c2d7786e | 36 | static void sigprof_handler (int, siginfo_t *, void *); |
c2d7786e TM |
37 | |
38 | \f | |
3d80c99f | 39 | /* Logs. */ |
c2d7786e | 40 | |
3d80c99f | 41 | typedef struct Lisp_Hash_Table log_t; |
c2d7786e TM |
42 | |
43 | static Lisp_Object | |
3d80c99f SM |
44 | make_log (int heap_size, int max_stack_depth) |
45 | { | |
46 | /* We use a standard Elisp hash-table object, but we use it in | |
47 | a special way. This is OK as long as the object is not exposed | |
48 | to Elisp, i.e. until it is returned by *-profiler-log, after which | |
49 | it can't be used any more. */ | |
50 | Lisp_Object log = make_hash_table (Qequal, make_number (heap_size), | |
51 | make_float (DEFAULT_REHASH_SIZE), | |
52 | make_float (DEFAULT_REHASH_THRESHOLD), | |
53 | Qnil, Qnil, Qnil); | |
54 | struct Lisp_Hash_Table *h = XHASH_TABLE (log); | |
55 | ||
56 | /* What is special about our hash-tables is that the keys are pre-filled | |
57 | with the vectors we'll put in them. */ | |
58 | int i = ASIZE (h->key_and_value) / 2; | |
59 | while (0 < i) | |
60 | set_hash_key_slot (h, --i, | |
61 | Fmake_vector (make_number (max_stack_depth), Qnil)); | |
62 | return log; | |
c2d7786e TM |
63 | } |
64 | ||
3d80c99f | 65 | /* Evict the least used half of the hash_table. |
c2d7786e | 66 | |
3d80c99f SM |
67 | When the table is full, we have to evict someone. |
68 | The easiest and most efficient is to evict the value we're about to add | |
69 | (i.e. once the table is full, stop sampling). | |
c2d7786e | 70 | |
3d80c99f SM |
71 | We could also pick the element with the lowest count and evict it, |
72 | but finding it is O(N) and for that amount of work we get very | |
73 | little in return: for the next sample, this latest sample will have | |
74 | count==1 and will hence be a prime candidate for eviction :-( | |
c2d7786e | 75 | |
3d80c99f SM |
76 | So instead, we take O(N) time to eliminate more or less half of the |
77 | entries (the half with the lowest counts). So we get an amortized | |
78 | cost of O(1) and we get O(N) time for a new entry to grow larger | |
79 | than the other least counts before a new round of eviction. */ | |
c2d7786e | 80 | |
3d80c99f SM |
81 | static EMACS_INT approximate_median (log_t *log, |
82 | ptrdiff_t start, ptrdiff_t size) | |
c2d7786e | 83 | { |
3d80c99f SM |
84 | eassert (size > 0); |
85 | if (size < 2) | |
86 | return XINT (HASH_VALUE (log, start)); | |
87 | if (size < 3) | |
88 | /* Not an actual median, but better for our application than | |
89 | choosing either of the two numbers. */ | |
90 | return ((XINT (HASH_VALUE (log, start)) | |
91 | + XINT (HASH_VALUE (log, start + 1))) | |
92 | / 2); | |
c2d7786e | 93 | else |
c2d7786e | 94 | { |
3d80c99f SM |
95 | ptrdiff_t newsize = size / 3; |
96 | ptrdiff_t start2 = start + newsize; | |
97 | EMACS_INT i1 = approximate_median (log, start, newsize); | |
98 | EMACS_INT i2 = approximate_median (log, start2, newsize); | |
99 | EMACS_INT i3 = approximate_median (log, start2 + newsize, | |
100 | size - 2 * newsize); | |
101 | return (i1 < i2 | |
102 | ? (i2 < i3 ? i2 : (i1 < i3 ? i3 : i1)) | |
103 | : (i1 < i3 ? i1 : (i2 < i3 ? i3 : i2))); | |
c2d7786e | 104 | } |
c2d7786e TM |
105 | } |
106 | ||
3d80c99f | 107 | static void evict_lower_half (log_t *log) |
c2d7786e | 108 | { |
3d80c99f SM |
109 | ptrdiff_t size = ASIZE (log->key_and_value) / 2; |
110 | EMACS_INT median = approximate_median (log, 0, size); | |
111 | ptrdiff_t i; | |
c2d7786e | 112 | |
c2d7786e | 113 | for (i = 0; i < size; i++) |
3d80c99f SM |
114 | /* Evict not only values smaller but also values equal to the median, |
115 | so as to make sure we evict something no matter what. */ | |
116 | if (XINT (HASH_VALUE (log, i)) <= median) | |
117 | { | |
118 | Lisp_Object key = HASH_KEY (log, i); | |
119 | { /* FIXME: we could make this more efficient. */ | |
120 | Lisp_Object tmp; | |
121 | XSET_HASH_TABLE (tmp, log); /* FIXME: Use make_lisp_ptr. */ | |
122 | Fremhash (key, tmp); | |
123 | } | |
124 | eassert (EQ (log->next_free, make_number (i))); | |
125 | { | |
126 | int j; | |
127 | eassert (VECTORP (key)); | |
128 | for (j = 0; j < ASIZE (key); j++) | |
129 | ASET (key, i, Qnil); | |
130 | } | |
131 | set_hash_key_slot (log, i, key); | |
132 | } | |
c2d7786e TM |
133 | } |
134 | ||
0efc778b TM |
135 | /* Record the current backtrace in LOG. BASE is a special name for |
136 | describing which the backtrace come from. BASE can be nil. COUNT is | |
137 | a number how many times the profiler sees the backtrace at the | |
138 | time. ELAPSED is a elapsed time in millisecond that the backtrace | |
139 | took. */ | |
140 | ||
c2d7786e | 141 | static void |
3d80c99f | 142 | record_backtrace (log_t *log, size_t count) |
c2d7786e | 143 | { |
c2d7786e | 144 | struct backtrace *backlist = backtrace_list; |
3d80c99f SM |
145 | Lisp_Object backtrace; |
146 | ptrdiff_t index, i = 0; | |
147 | ptrdiff_t asize; | |
c2d7786e | 148 | |
3d80c99f SM |
149 | if (!INTEGERP (log->next_free)) |
150 | evict_lower_half (log); | |
151 | index = XINT (log->next_free); | |
c2d7786e | 152 | |
3d80c99f SM |
153 | /* Get a "working memory" vector. */ |
154 | backtrace = HASH_KEY (log, index); | |
155 | asize = ASIZE (backtrace); | |
12b3895d | 156 | |
0efc778b | 157 | /* Copy the backtrace contents into working memory. */ |
3d80c99f SM |
158 | for (; i < asize && backlist; i++, backlist = backlist->next) |
159 | ASET (backtrace, i, *backlist->function); | |
0efc778b | 160 | |
3d80c99f SM |
161 | /* Make sure that unused space of working memory is filled with nil. */ |
162 | for (; i < asize; i++) | |
163 | ASET (backtrace, i, Qnil); | |
c2d7786e | 164 | |
3d80c99f SM |
165 | { /* We basically do a `gethash+puthash' here, except that we have to be |
166 | careful to avoid memory allocation since we're in a signal | |
167 | handler, and we optimize the code to try and avoid computing the | |
168 | hash+lookup twice. See fns.c:Fputhash for reference. */ | |
169 | EMACS_UINT hash; | |
170 | ptrdiff_t j = hash_lookup (log, backtrace, &hash); | |
171 | if (j >= 0) | |
172 | set_hash_value_slot (log, j, | |
173 | make_number (count + XINT (HASH_VALUE (log, j)))); | |
174 | else | |
175 | { /* BEWARE! hash_put in general can allocate memory. | |
176 | But currently it only does that if log->next_free is nil. */ | |
177 | int j; | |
178 | eassert (!NILP (log->next_free)); | |
179 | j = hash_put (log, backtrace, make_number (count), hash); | |
180 | /* Let's make sure we've put `backtrace' right where it | |
181 | already was to start with. */ | |
182 | eassert (index == j); | |
183 | ||
184 | /* FIXME: If the hash-table is almost full, we should set | |
185 | some global flag so that some Elisp code can offload its | |
186 | data elsewhere, so as to avoid the eviction code. */ | |
187 | } | |
188 | } | |
c2d7786e | 189 | } |
c2d7786e | 190 | \f |
0efc778b | 191 | /* Sample profiler. */ |
c2d7786e | 192 | |
3d80c99f SM |
193 | static Lisp_Object cpu_log; |
194 | /* Separate counter for the time spent in the GC. */ | |
195 | static EMACS_INT cpu_gc_count; | |
0efc778b TM |
196 | |
197 | /* The current sample interval in millisecond. */ | |
198 | ||
c2d7786e TM |
199 | static int current_sample_interval; |
200 | ||
201 | DEFUN ("sample-profiler-start", Fsample_profiler_start, Ssample_profiler_start, | |
202 | 1, 1, 0, | |
0efc778b TM |
203 | doc: /* Start or restart sample profiler. Sample profiler will |
204 | take samples each SAMPLE-INTERVAL in millisecond. See also | |
205 | `profiler-slot-heap-size' and `profiler-max-stack-depth'. */) | |
c2d7786e TM |
206 | (Lisp_Object sample_interval) |
207 | { | |
208 | struct sigaction sa; | |
209 | struct itimerval timer; | |
210 | ||
211 | if (sample_profiler_running) | |
212 | error ("Sample profiler is already running"); | |
213 | ||
3d80c99f SM |
214 | if (NILP (cpu_log)) |
215 | { | |
216 | cpu_gc_count = 0; | |
217 | cpu_log = make_log (profiler_slot_heap_size, | |
218 | profiler_max_stack_depth); | |
219 | } | |
c2d7786e TM |
220 | |
221 | current_sample_interval = XINT (sample_interval); | |
222 | ||
223 | sa.sa_sigaction = sigprof_handler; | |
224 | sa.sa_flags = SA_RESTART | SA_SIGINFO; | |
225 | sigemptyset (&sa.sa_mask); | |
226 | sigaction (SIGPROF, &sa, 0); | |
227 | ||
228 | timer.it_interval.tv_sec = 0; | |
229 | timer.it_interval.tv_usec = current_sample_interval * 1000; | |
230 | timer.it_value = timer.it_interval; | |
231 | setitimer (ITIMER_PROF, &timer, 0); | |
232 | ||
233 | sample_profiler_running = 1; | |
234 | ||
235 | return Qt; | |
236 | } | |
237 | ||
238 | DEFUN ("sample-profiler-stop", Fsample_profiler_stop, Ssample_profiler_stop, | |
239 | 0, 0, 0, | |
0efc778b | 240 | doc: /* Stop sample profiler. Profiler log will be kept. */) |
c2d7786e TM |
241 | (void) |
242 | { | |
243 | if (!sample_profiler_running) | |
244 | error ("Sample profiler is not running"); | |
245 | sample_profiler_running = 0; | |
246 | ||
247 | setitimer (ITIMER_PROF, 0, 0); | |
248 | ||
249 | return Qt; | |
250 | } | |
251 | ||
c2d7786e TM |
252 | DEFUN ("sample-profiler-running-p", |
253 | Fsample_profiler_running_p, Ssample_profiler_running_p, | |
254 | 0, 0, 0, | |
0efc778b | 255 | doc: /* Return t if sample profiler is running. */) |
c2d7786e TM |
256 | (void) |
257 | { | |
258 | return sample_profiler_running ? Qt : Qnil; | |
259 | } | |
260 | ||
261 | DEFUN ("sample-profiler-log", | |
262 | Fsample_profiler_log, Ssample_profiler_log, | |
263 | 0, 0, 0, | |
0efc778b TM |
264 | doc: /* Return sample profiler log. The data is a list of |
265 | (sample nil TIMESTAMP SLOTS), where TIMESTAMP is a timestamp when the | |
266 | log is collected and SLOTS is a list of slots. */) | |
c2d7786e TM |
267 | (void) |
268 | { | |
3d80c99f SM |
269 | Lisp_Object result = cpu_log; |
270 | /* Here we're making the log visible to Elisp , so it's not safe any | |
271 | more for our use afterwards since we can't rely on its special | |
272 | pre-allocated keys anymore. So we have to allocate a new one. */ | |
273 | cpu_log = (sample_profiler_running | |
274 | ? make_log (profiler_slot_heap_size, profiler_max_stack_depth) | |
275 | : Qnil); | |
276 | Fputhash (Fmake_vector (make_number (1), Qautomatic_gc), | |
277 | make_number (cpu_gc_count), | |
278 | result); | |
279 | cpu_gc_count = 0; | |
c2d7786e TM |
280 | return result; |
281 | } | |
282 | ||
283 | \f | |
0efc778b | 284 | /* Memory profiler. */ |
c2d7786e | 285 | |
3d80c99f | 286 | static Lisp_Object memory_log; |
c2d7786e TM |
287 | |
288 | DEFUN ("memory-profiler-start", Fmemory_profiler_start, Smemory_profiler_start, | |
289 | 0, 0, 0, | |
0efc778b TM |
290 | doc: /* Start/restart memory profiler. See also |
291 | `profiler-slot-heap-size' and `profiler-max-stack-depth'. */) | |
c2d7786e TM |
292 | (void) |
293 | { | |
294 | if (memory_profiler_running) | |
295 | error ("Memory profiler is already running"); | |
296 | ||
3d80c99f SM |
297 | if (NILP (memory_log)) |
298 | memory_log = make_log (profiler_slot_heap_size, | |
c2d7786e TM |
299 | profiler_max_stack_depth); |
300 | ||
301 | memory_profiler_running = 1; | |
302 | ||
303 | return Qt; | |
304 | } | |
305 | ||
306 | DEFUN ("memory-profiler-stop", | |
307 | Fmemory_profiler_stop, Smemory_profiler_stop, | |
308 | 0, 0, 0, | |
0efc778b | 309 | doc: /* Stop memory profiler. Profiler log will be kept. */) |
c2d7786e TM |
310 | (void) |
311 | { | |
312 | if (!memory_profiler_running) | |
313 | error ("Memory profiler is not running"); | |
314 | memory_profiler_running = 0; | |
315 | ||
316 | return Qt; | |
317 | } | |
318 | ||
c2d7786e TM |
319 | DEFUN ("memory-profiler-running-p", |
320 | Fmemory_profiler_running_p, Smemory_profiler_running_p, | |
321 | 0, 0, 0, | |
0efc778b | 322 | doc: /* Return t if memory profiler is running. */) |
c2d7786e TM |
323 | (void) |
324 | { | |
325 | return memory_profiler_running ? Qt : Qnil; | |
326 | } | |
327 | ||
328 | DEFUN ("memory-profiler-log", | |
329 | Fmemory_profiler_log, Smemory_profiler_log, | |
330 | 0, 0, 0, | |
0efc778b TM |
331 | doc: /* Return memory profiler log. The data is a list of |
332 | (memory nil TIMESTAMP SLOTS), where TIMESTAMP is a timestamp when the | |
333 | log is collected and SLOTS is a list of slots. */) | |
c2d7786e TM |
334 | (void) |
335 | { | |
3d80c99f SM |
336 | Lisp_Object result = memory_log; |
337 | /* Here we're making the log visible to Elisp , so it's not safe any | |
338 | more for our use afterwards since we can't rely on its special | |
339 | pre-allocated keys anymore. So we have to allocate a new one. */ | |
340 | memory_log = (memory_profiler_running | |
341 | ? make_log (profiler_slot_heap_size, profiler_max_stack_depth) | |
342 | : Qnil); | |
c2d7786e TM |
343 | return result; |
344 | } | |
345 | ||
346 | \f | |
0efc778b | 347 | /* Signals and probes. */ |
c2d7786e | 348 | |
0efc778b | 349 | /* Signal handler for sample profiler. */ |
c2d7786e TM |
350 | |
351 | static void | |
352 | sigprof_handler (int signal, siginfo_t *info, void *ctx) | |
353 | { | |
3d80c99f SM |
354 | eassert (HASH_TABLE_P (cpu_log)); |
355 | if (backtrace_list && EQ (*backtrace_list->function, Qautomatic_gc)) | |
356 | /* Special case the time-count inside GC because the hash-table | |
357 | code is not prepared to be used while the GC is running. | |
358 | More specifically it uses ASIZE at many places where it does | |
359 | not expect the ARRAY_MARK_FLAG to be set. We could try and | |
360 | harden the hash-table code, but it doesn't seem worth the | |
361 | effort. */ | |
362 | cpu_gc_count += current_sample_interval; | |
363 | else | |
364 | record_backtrace (XHASH_TABLE (cpu_log), current_sample_interval); | |
c2d7786e TM |
365 | } |
366 | ||
0efc778b | 367 | /* Record that the current backtrace allocated SIZE bytes. */ |
3d80c99f | 368 | /* FIXME: Inline it everywhere! */ |
c2d7786e TM |
369 | void |
370 | malloc_probe (size_t size) | |
371 | { | |
3d80c99f SM |
372 | if (HASH_TABLE_P (memory_log)) |
373 | record_backtrace (XHASH_TABLE (memory_log), size); | |
c2d7786e TM |
374 | } |
375 | ||
376 | void | |
377 | syms_of_profiler (void) | |
378 | { | |
379 | DEFVAR_INT ("profiler-max-stack-depth", profiler_max_stack_depth, | |
380 | doc: /* FIXME */); | |
381 | profiler_max_stack_depth = 16; | |
382 | DEFVAR_INT ("profiler-slot-heap-size", profiler_slot_heap_size, | |
383 | doc: /* FIXME */); | |
384 | profiler_slot_heap_size = 10000; | |
385 | ||
3d80c99f SM |
386 | cpu_log = memory_log = Qnil; |
387 | staticpro (&cpu_log); | |
388 | staticpro (&memory_log); | |
c2d7786e | 389 | |
3d80c99f SM |
390 | /* FIXME: Rename things to start with "profiler-", to use "cpu" instead of |
391 | "sample", and to make them sound like they're internal or something. */ | |
c2d7786e TM |
392 | defsubr (&Ssample_profiler_start); |
393 | defsubr (&Ssample_profiler_stop); | |
c2d7786e TM |
394 | defsubr (&Ssample_profiler_running_p); |
395 | defsubr (&Ssample_profiler_log); | |
396 | ||
397 | defsubr (&Smemory_profiler_start); | |
398 | defsubr (&Smemory_profiler_stop); | |
c2d7786e TM |
399 | defsubr (&Smemory_profiler_running_p); |
400 | defsubr (&Smemory_profiler_log); | |
401 | } |