tprof.c revision 1.2.8.1 1 /* $NetBSD: tprof.c,v 1.2.8.1 2009/03/03 18:31:52 skrll Exp $ */
2
3 /*-
4 * Copyright (c)2008 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.2.8.1 2009/03/03 18:31:52 skrll Exp $");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35
36 #include <sys/cpu.h>
37 #include <sys/conf.h>
38 #include <sys/callout.h>
39 #include <sys/kmem.h>
40 #include <sys/workqueue.h>
41 #include <sys/queue.h>
42
43 #include <dev/tprof/tprof.h>
44 #include <dev/tprof/tprof_ioctl.h>
45
46 #include <machine/db_machdep.h> /* PC_REGS */
47
48 typedef struct {
49 uintptr_t s_pc; /* program counter */
50 } tprof_sample_t;
51
52 typedef struct tprof_buf {
53 u_int b_used;
54 u_int b_size;
55 u_int b_overflow;
56 u_int b_unused;
57 STAILQ_ENTRY(tprof_buf) b_list;
58 tprof_sample_t b_data[];
59 } tprof_buf_t;
60 #define TPROF_BUF_BYTESIZE(sz) \
61 (sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t))
62 #define TPROF_MAX_SAMPLES_PER_BUF 10000
63
64 #define TPROF_MAX_BUF 100
65
66 typedef struct {
67 tprof_buf_t *c_buf;
68 struct work c_work;
69 callout_t c_callout;
70 } __aligned(CACHE_LINE_SIZE) tprof_cpu_t;
71
72 /*
73 * locking order:
74 * tprof_reader_lock -> tprof_lock
75 * tprof_startstop_lock -> tprof_lock
76 */
77
78 static kmutex_t tprof_lock;
79 static bool tprof_running;
80 static u_int tprof_nworker;
81 static lwp_t *tprof_owner;
82 static STAILQ_HEAD(, tprof_buf) tprof_list;
83 static u_int tprof_nbuf_on_list;
84 static struct workqueue *tprof_wq;
85 static tprof_cpu_t tprof_cpus[MAXCPUS] __aligned(CACHE_LINE_SIZE);
86 static u_int tprof_samples_per_buf;
87
88 static kmutex_t tprof_reader_lock;
89 static kcondvar_t tprof_reader_cv;
90 static off_t tprof_reader_offset;
91
92 static kmutex_t tprof_startstop_lock;
93 static kcondvar_t tprof_cv;
94
95 static struct tprof_stat tprof_stat;
96
97 static tprof_cpu_t *
98 tprof_cpu(struct cpu_info *ci)
99 {
100
101 return &tprof_cpus[cpu_index(ci)];
102 }
103
104 static tprof_cpu_t *
105 tprof_curcpu(void)
106 {
107
108 return tprof_cpu(curcpu());
109 }
110
111 static tprof_buf_t *
112 tprof_buf_alloc(void)
113 {
114 tprof_buf_t *new;
115 u_int size = tprof_samples_per_buf;
116
117 new = kmem_alloc(TPROF_BUF_BYTESIZE(size), KM_SLEEP);
118 new->b_used = 0;
119 new->b_size = size;
120 new->b_overflow = 0;
121 return new;
122 }
123
124 static void
125 tprof_buf_free(tprof_buf_t *buf)
126 {
127
128 kmem_free(buf, TPROF_BUF_BYTESIZE(buf->b_size));
129 }
130
131 static tprof_buf_t *
132 tprof_buf_switch(tprof_cpu_t *c, tprof_buf_t *new)
133 {
134 tprof_buf_t *old;
135
136 old = c->c_buf;
137 c->c_buf = new;
138 return old;
139 }
140
141 static tprof_buf_t *
142 tprof_buf_refresh(void)
143 {
144 tprof_cpu_t * const c = tprof_curcpu();
145 tprof_buf_t *new;
146
147 new = tprof_buf_alloc();
148 return tprof_buf_switch(c, new);
149 }
150
151 static void
152 tprof_worker(struct work *wk, void *dummy)
153 {
154 tprof_cpu_t * const c = tprof_curcpu();
155 tprof_buf_t *buf;
156 bool shouldstop;
157
158 KASSERT(wk == &c->c_work);
159 KASSERT(dummy == NULL);
160
161 /*
162 * get a per cpu buffer.
163 */
164 buf = tprof_buf_refresh();
165
166 /*
167 * and put it on the global list for read(2).
168 */
169 mutex_enter(&tprof_lock);
170 shouldstop = !tprof_running;
171 if (shouldstop) {
172 KASSERT(tprof_nworker > 0);
173 tprof_nworker--;
174 cv_broadcast(&tprof_cv);
175 cv_broadcast(&tprof_reader_cv);
176 }
177 if (buf->b_used == 0) {
178 tprof_stat.ts_emptybuf++;
179 } else if (tprof_nbuf_on_list < TPROF_MAX_BUF) {
180 tprof_stat.ts_sample += buf->b_used;
181 tprof_stat.ts_overflow += buf->b_overflow;
182 tprof_stat.ts_buf++;
183 STAILQ_INSERT_TAIL(&tprof_list, buf, b_list);
184 tprof_nbuf_on_list++;
185 buf = NULL;
186 cv_broadcast(&tprof_reader_cv);
187 } else {
188 tprof_stat.ts_dropbuf_sample += buf->b_used;
189 tprof_stat.ts_dropbuf++;
190 }
191 mutex_exit(&tprof_lock);
192 if (buf) {
193 tprof_buf_free(buf);
194 }
195 if (!shouldstop) {
196 callout_schedule(&c->c_callout, hz);
197 }
198 }
199
200 static void
201 tprof_kick(void *vp)
202 {
203 struct cpu_info * const ci = vp;
204 tprof_cpu_t * const c = tprof_cpu(ci);
205
206 workqueue_enqueue(tprof_wq, &c->c_work, ci);
207 }
208
209 static void
210 tprof_stop1(void)
211 {
212 CPU_INFO_ITERATOR cii;
213 struct cpu_info *ci;
214
215 KASSERT(mutex_owned(&tprof_startstop_lock));
216
217 for (CPU_INFO_FOREACH(cii, ci)) {
218 tprof_cpu_t * const c = tprof_cpu(ci);
219 tprof_buf_t *old;
220
221 old = tprof_buf_switch(c, NULL);
222 if (old != NULL) {
223 tprof_buf_free(old);
224 }
225 callout_destroy(&c->c_callout);
226 }
227 workqueue_destroy(tprof_wq);
228 }
229
230 static int
231 tprof_start(const struct tprof_param *param)
232 {
233 CPU_INFO_ITERATOR cii;
234 struct cpu_info *ci;
235 int error;
236 uint64_t freq;
237
238 KASSERT(mutex_owned(&tprof_startstop_lock));
239 if (tprof_running) {
240 error = EBUSY;
241 goto done;
242 }
243
244 freq = tprof_backend_estimate_freq();
245 tprof_samples_per_buf = MIN(freq * 2, TPROF_MAX_SAMPLES_PER_BUF);
246
247 error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker, NULL,
248 PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU);
249 if (error != 0) {
250 goto done;
251 }
252
253 for (CPU_INFO_FOREACH(cii, ci)) {
254 tprof_cpu_t * const c = tprof_cpu(ci);
255 tprof_buf_t *new;
256 tprof_buf_t *old;
257
258 new = tprof_buf_alloc();
259 old = tprof_buf_switch(c, new);
260 if (old != NULL) {
261 tprof_buf_free(old);
262 }
263 callout_init(&c->c_callout, CALLOUT_MPSAFE);
264 callout_setfunc(&c->c_callout, tprof_kick, ci);
265 }
266
267 error = tprof_backend_start();
268 if (error != 0) {
269 tprof_stop1();
270 goto done;
271 }
272
273 mutex_enter(&tprof_lock);
274 tprof_running = true;
275 mutex_exit(&tprof_lock);
276 for (CPU_INFO_FOREACH(cii, ci)) {
277 tprof_cpu_t * const c = tprof_cpu(ci);
278
279 mutex_enter(&tprof_lock);
280 tprof_nworker++;
281 mutex_exit(&tprof_lock);
282 workqueue_enqueue(tprof_wq, &c->c_work, ci);
283 }
284 done:
285 return error;
286 }
287
288 static void
289 tprof_stop(void)
290 {
291 CPU_INFO_ITERATOR cii;
292 struct cpu_info *ci;
293
294 KASSERT(mutex_owned(&tprof_startstop_lock));
295 if (!tprof_running) {
296 goto done;
297 }
298
299 tprof_backend_stop();
300
301 mutex_enter(&tprof_lock);
302 tprof_running = false;
303 cv_broadcast(&tprof_reader_cv);
304 mutex_exit(&tprof_lock);
305
306 for (CPU_INFO_FOREACH(cii, ci)) {
307 mutex_enter(&tprof_lock);
308 while (tprof_nworker > 0) {
309 cv_wait(&tprof_cv, &tprof_lock);
310 }
311 mutex_exit(&tprof_lock);
312 }
313
314 tprof_stop1();
315 done:
316 ;
317 }
318
319 static void
320 tprof_clear(void)
321 {
322 tprof_buf_t *buf;
323
324 mutex_enter(&tprof_reader_lock);
325 mutex_enter(&tprof_lock);
326 while ((buf = STAILQ_FIRST(&tprof_list)) != NULL) {
327 if (buf != NULL) {
328 STAILQ_REMOVE_HEAD(&tprof_list, b_list);
329 KASSERT(tprof_nbuf_on_list > 0);
330 tprof_nbuf_on_list--;
331 mutex_exit(&tprof_lock);
332 tprof_buf_free(buf);
333 mutex_enter(&tprof_lock);
334 }
335 }
336 KASSERT(tprof_nbuf_on_list == 0);
337 mutex_exit(&tprof_lock);
338 tprof_reader_offset = 0;
339 mutex_exit(&tprof_reader_lock);
340
341 memset(&tprof_stat, 0, sizeof(tprof_stat));
342 }
343
344 /* -------------------- backend interfaces */
345
346 /*
347 * tprof_sample: record a sample on the per-cpu buffer.
348 *
349 * be careful; can be called in NMI context.
350 * we are assuming that curcpu() is safe.
351 */
352
353 void
354 tprof_sample(const struct trapframe *tf)
355 {
356 tprof_cpu_t * const c = tprof_curcpu();
357 tprof_buf_t * const buf = c->c_buf;
358 const uintptr_t pc = PC_REGS(tf);
359 u_int idx;
360
361 idx = buf->b_used;
362 if (__predict_false(idx >= buf->b_size)) {
363 buf->b_overflow++;
364 return;
365 }
366 buf->b_data[idx].s_pc = pc;
367 buf->b_used = idx + 1;
368 }
369
370 /* -------------------- cdevsw interfaces */
371
372 void tprofattach(int);
373
374 static int
375 tprof_open(dev_t dev, int flags, int type, struct lwp *l)
376 {
377
378 if (minor(dev) != 0) {
379 return EXDEV;
380 }
381 mutex_enter(&tprof_lock);
382 if (tprof_owner != NULL) {
383 mutex_exit(&tprof_lock);
384 return EBUSY;
385 }
386 tprof_owner = curlwp;
387 mutex_exit(&tprof_lock);
388
389 return 0;
390 }
391
392 static int
393 tprof_close(dev_t dev, int flags, int type, struct lwp *l)
394 {
395
396 KASSERT(minor(dev) == 0);
397
398 mutex_enter(&tprof_startstop_lock);
399 mutex_enter(&tprof_lock);
400 tprof_owner = NULL;
401 mutex_exit(&tprof_lock);
402 tprof_stop();
403 tprof_clear();
404 mutex_exit(&tprof_startstop_lock);
405
406 return 0;
407 }
408
409 static int
410 tprof_read(dev_t dev, struct uio *uio, int flags)
411 {
412 tprof_buf_t *buf;
413 size_t bytes;
414 size_t resid;
415 size_t done;
416 int error = 0;
417
418 KASSERT(minor(dev) == 0);
419 mutex_enter(&tprof_reader_lock);
420 while (uio->uio_resid > 0 && error == 0) {
421 /*
422 * take the first buffer from the list.
423 */
424 mutex_enter(&tprof_lock);
425 buf = STAILQ_FIRST(&tprof_list);
426 if (buf == NULL) {
427 if (tprof_nworker == 0) {
428 mutex_exit(&tprof_lock);
429 error = 0;
430 break;
431 }
432 mutex_exit(&tprof_reader_lock);
433 error = cv_wait_sig(&tprof_reader_cv, &tprof_lock);
434 mutex_exit(&tprof_lock);
435 mutex_enter(&tprof_reader_lock);
436 continue;
437 }
438 STAILQ_REMOVE_HEAD(&tprof_list, b_list);
439 KASSERT(tprof_nbuf_on_list > 0);
440 tprof_nbuf_on_list--;
441 mutex_exit(&tprof_lock);
442
443 /*
444 * copy it out.
445 */
446 bytes = MIN(buf->b_used * sizeof(tprof_sample_t) -
447 tprof_reader_offset, uio->uio_resid);
448 resid = uio->uio_resid;
449 error = uiomove((char *)buf->b_data + tprof_reader_offset,
450 bytes, uio);
451 done = resid - uio->uio_resid;
452 tprof_reader_offset += done;
453
454 /*
455 * if we didn't consume the whole buffer,
456 * put it back to the list.
457 */
458 if (tprof_reader_offset <
459 buf->b_used * sizeof(tprof_sample_t)) {
460 mutex_enter(&tprof_lock);
461 STAILQ_INSERT_HEAD(&tprof_list, buf, b_list);
462 tprof_nbuf_on_list++;
463 cv_broadcast(&tprof_reader_cv);
464 mutex_exit(&tprof_lock);
465 } else {
466 tprof_buf_free(buf);
467 tprof_reader_offset = 0;
468 }
469 }
470 mutex_exit(&tprof_reader_lock);
471
472 return error;
473 }
474
475 static int
476 tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l)
477 {
478 const struct tprof_param *param;
479 int error = 0;
480
481 KASSERT(minor(dev) == 0);
482
483 switch (cmd) {
484 case TPROF_IOC_GETVERSION:
485 *(int *)data = TPROF_VERSION;
486 break;
487 case TPROF_IOC_START:
488 param = data;
489 mutex_enter(&tprof_startstop_lock);
490 error = tprof_start(param);
491 mutex_exit(&tprof_startstop_lock);
492 break;
493 case TPROF_IOC_STOP:
494 mutex_enter(&tprof_startstop_lock);
495 tprof_stop();
496 mutex_exit(&tprof_startstop_lock);
497 break;
498 case TPROF_IOC_GETSTAT:
499 mutex_enter(&tprof_lock);
500 memcpy(data, &tprof_stat, sizeof(tprof_stat));
501 mutex_exit(&tprof_lock);
502 break;
503 default:
504 error = EINVAL;
505 break;
506 }
507
508 return error;
509 }
510
511 const struct cdevsw tprof_cdevsw = {
512 .d_open = tprof_open,
513 .d_close = tprof_close,
514 .d_read = tprof_read,
515 .d_write = nowrite,
516 .d_ioctl = tprof_ioctl,
517 .d_stop = nostop,
518 .d_tty = notty,
519 .d_poll = nopoll,
520 .d_mmap = nommap,
521 .d_kqfilter = nokqfilter,
522 .d_flag = D_OTHER | D_MPSAFE,
523 };
524
525 void
526 tprofattach(int nunits)
527 {
528
529 mutex_init(&tprof_lock, MUTEX_DEFAULT, IPL_NONE);
530 mutex_init(&tprof_reader_lock, MUTEX_DEFAULT, IPL_NONE);
531 mutex_init(&tprof_startstop_lock, MUTEX_DEFAULT, IPL_NONE);
532 cv_init(&tprof_cv, "tprof");
533 cv_init(&tprof_reader_cv, "tprofread");
534 STAILQ_INIT(&tprof_list);
535 }
536