tprof.c revision 1.13.8.1 1 /* $NetBSD: tprof.c,v 1.13.8.1 2017/04/29 09:17:59 pgoyette Exp $ */
2
3 /*-
4 * Copyright (c)2008,2009,2010 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.13.8.1 2017/04/29 09:17:59 pgoyette Exp $");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35
36 #include <sys/cpu.h>
37 #include <sys/localcount.h>
38 #include <sys/conf.h>
39 #include <sys/callout.h>
40 #include <sys/kmem.h>
41 #include <sys/module.h>
42 #include <sys/proc.h>
43 #include <sys/workqueue.h>
44 #include <sys/queue.h>
45
46 #include <dev/tprof/tprof.h>
47 #include <dev/tprof/tprof_ioctl.h>
48
49 #include "ioconf.h"
50
51 /*
52 * locking order:
53 * tprof_reader_lock -> tprof_lock
54 * tprof_startstop_lock -> tprof_lock
55 */
56
57 /*
58 * protected by:
59 * L: tprof_lock
60 * R: tprof_reader_lock
61 * S: tprof_startstop_lock
62 * s: writer should hold tprof_startstop_lock and tprof_lock
63 * reader should hold tprof_startstop_lock or tprof_lock
64 */
65
66 typedef struct tprof_buf {
67 u_int b_used;
68 u_int b_size;
69 u_int b_overflow;
70 u_int b_unused;
71 STAILQ_ENTRY(tprof_buf) b_list;
72 tprof_sample_t b_data[];
73 } tprof_buf_t;
74 #define TPROF_BUF_BYTESIZE(sz) \
75 (sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t))
76 #define TPROF_MAX_SAMPLES_PER_BUF 10000
77
78 #define TPROF_MAX_BUF 100
79
80 typedef struct {
81 tprof_buf_t *c_buf;
82 uint32_t c_cpuid;
83 struct work c_work;
84 callout_t c_callout;
85 } __aligned(CACHE_LINE_SIZE) tprof_cpu_t;
86
87 typedef struct tprof_backend {
88 const char *tb_name;
89 const tprof_backend_ops_t *tb_ops;
90 LIST_ENTRY(tprof_backend) tb_list;
91 int tb_usecount; /* S: */
92 } tprof_backend_t;
93
94 static kmutex_t tprof_lock;
95 static bool tprof_running; /* s: */
96 static u_int tprof_nworker; /* L: # of running worker LWPs */
97 static lwp_t *tprof_owner;
98 static STAILQ_HEAD(, tprof_buf) tprof_list; /* L: global buffer list */
99 static u_int tprof_nbuf_on_list; /* L: # of buffers on tprof_list */
100 static struct workqueue *tprof_wq;
101 static tprof_cpu_t tprof_cpus[MAXCPUS] __aligned(CACHE_LINE_SIZE);
102 static u_int tprof_samples_per_buf;
103
104 static tprof_backend_t *tprof_backend; /* S: */
105 static LIST_HEAD(, tprof_backend) tprof_backends =
106 LIST_HEAD_INITIALIZER(tprof_backend); /* S: */
107
108 static kmutex_t tprof_reader_lock;
109 static kcondvar_t tprof_reader_cv; /* L: */
110 static off_t tprof_reader_offset; /* R: */
111
112 static kmutex_t tprof_startstop_lock;
113 static kcondvar_t tprof_cv; /* L: */
114
115 static struct tprof_stat tprof_stat; /* L: */
116
117 static tprof_cpu_t *
118 tprof_cpu(struct cpu_info *ci)
119 {
120
121 return &tprof_cpus[cpu_index(ci)];
122 }
123
124 static tprof_cpu_t *
125 tprof_curcpu(void)
126 {
127
128 return tprof_cpu(curcpu());
129 }
130
131 static tprof_buf_t *
132 tprof_buf_alloc(void)
133 {
134 tprof_buf_t *new;
135 u_int size = tprof_samples_per_buf;
136
137 new = kmem_alloc(TPROF_BUF_BYTESIZE(size), KM_SLEEP);
138 new->b_used = 0;
139 new->b_size = size;
140 new->b_overflow = 0;
141 return new;
142 }
143
144 static void
145 tprof_buf_free(tprof_buf_t *buf)
146 {
147
148 kmem_free(buf, TPROF_BUF_BYTESIZE(buf->b_size));
149 }
150
151 static tprof_buf_t *
152 tprof_buf_switch(tprof_cpu_t *c, tprof_buf_t *new)
153 {
154 tprof_buf_t *old;
155
156 old = c->c_buf;
157 c->c_buf = new;
158 return old;
159 }
160
161 static tprof_buf_t *
162 tprof_buf_refresh(void)
163 {
164 tprof_cpu_t * const c = tprof_curcpu();
165 tprof_buf_t *new;
166
167 new = tprof_buf_alloc();
168 return tprof_buf_switch(c, new);
169 }
170
171 static void
172 tprof_worker(struct work *wk, void *dummy)
173 {
174 tprof_cpu_t * const c = tprof_curcpu();
175 tprof_buf_t *buf;
176 bool shouldstop;
177
178 KASSERT(wk == &c->c_work);
179 KASSERT(dummy == NULL);
180
181 /*
182 * get a per cpu buffer.
183 */
184 buf = tprof_buf_refresh();
185
186 /*
187 * and put it on the global list for read(2).
188 */
189 mutex_enter(&tprof_lock);
190 shouldstop = !tprof_running;
191 if (shouldstop) {
192 KASSERT(tprof_nworker > 0);
193 tprof_nworker--;
194 cv_broadcast(&tprof_cv);
195 cv_broadcast(&tprof_reader_cv);
196 }
197 if (buf->b_used == 0) {
198 tprof_stat.ts_emptybuf++;
199 } else if (tprof_nbuf_on_list < TPROF_MAX_BUF) {
200 tprof_stat.ts_sample += buf->b_used;
201 tprof_stat.ts_overflow += buf->b_overflow;
202 tprof_stat.ts_buf++;
203 STAILQ_INSERT_TAIL(&tprof_list, buf, b_list);
204 tprof_nbuf_on_list++;
205 buf = NULL;
206 cv_broadcast(&tprof_reader_cv);
207 } else {
208 tprof_stat.ts_dropbuf_sample += buf->b_used;
209 tprof_stat.ts_dropbuf++;
210 }
211 mutex_exit(&tprof_lock);
212 if (buf) {
213 tprof_buf_free(buf);
214 }
215 if (!shouldstop) {
216 callout_schedule(&c->c_callout, hz);
217 }
218 }
219
220 static void
221 tprof_kick(void *vp)
222 {
223 struct cpu_info * const ci = vp;
224 tprof_cpu_t * const c = tprof_cpu(ci);
225
226 workqueue_enqueue(tprof_wq, &c->c_work, ci);
227 }
228
229 static void
230 tprof_stop1(void)
231 {
232 CPU_INFO_ITERATOR cii;
233 struct cpu_info *ci;
234
235 KASSERT(mutex_owned(&tprof_startstop_lock));
236 KASSERT(tprof_nworker == 0);
237
238 for (CPU_INFO_FOREACH(cii, ci)) {
239 tprof_cpu_t * const c = tprof_cpu(ci);
240 tprof_buf_t *old;
241
242 old = tprof_buf_switch(c, NULL);
243 if (old != NULL) {
244 tprof_buf_free(old);
245 }
246 callout_destroy(&c->c_callout);
247 }
248 workqueue_destroy(tprof_wq);
249 }
250
251 static int
252 tprof_start(const struct tprof_param *param)
253 {
254 CPU_INFO_ITERATOR cii;
255 struct cpu_info *ci;
256 int error;
257 uint64_t freq;
258 tprof_backend_t *tb;
259
260 KASSERT(mutex_owned(&tprof_startstop_lock));
261 if (tprof_running) {
262 error = EBUSY;
263 goto done;
264 }
265
266 tb = tprof_backend;
267 if (tb == NULL) {
268 error = ENOENT;
269 goto done;
270 }
271 if (tb->tb_usecount > 0) {
272 error = EBUSY;
273 goto done;
274 }
275
276 tb->tb_usecount++;
277 freq = tb->tb_ops->tbo_estimate_freq();
278 tprof_samples_per_buf = MIN(freq * 2, TPROF_MAX_SAMPLES_PER_BUF);
279
280 error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker, NULL,
281 PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU);
282 if (error != 0) {
283 goto done;
284 }
285
286 for (CPU_INFO_FOREACH(cii, ci)) {
287 tprof_cpu_t * const c = tprof_cpu(ci);
288 tprof_buf_t *new;
289 tprof_buf_t *old;
290
291 new = tprof_buf_alloc();
292 old = tprof_buf_switch(c, new);
293 if (old != NULL) {
294 tprof_buf_free(old);
295 }
296 callout_init(&c->c_callout, CALLOUT_MPSAFE);
297 callout_setfunc(&c->c_callout, tprof_kick, ci);
298 }
299
300 error = tb->tb_ops->tbo_start(NULL);
301 if (error != 0) {
302 KASSERT(tb->tb_usecount > 0);
303 tb->tb_usecount--;
304 tprof_stop1();
305 goto done;
306 }
307
308 mutex_enter(&tprof_lock);
309 tprof_running = true;
310 mutex_exit(&tprof_lock);
311 for (CPU_INFO_FOREACH(cii, ci)) {
312 tprof_cpu_t * const c = tprof_cpu(ci);
313
314 mutex_enter(&tprof_lock);
315 tprof_nworker++;
316 mutex_exit(&tprof_lock);
317 workqueue_enqueue(tprof_wq, &c->c_work, ci);
318 }
319 done:
320 return error;
321 }
322
323 static void
324 tprof_stop(void)
325 {
326 tprof_backend_t *tb;
327
328 KASSERT(mutex_owned(&tprof_startstop_lock));
329 if (!tprof_running) {
330 goto done;
331 }
332
333 tb = tprof_backend;
334 KASSERT(tb->tb_usecount > 0);
335 tb->tb_ops->tbo_stop(NULL);
336 tb->tb_usecount--;
337
338 mutex_enter(&tprof_lock);
339 tprof_running = false;
340 cv_broadcast(&tprof_reader_cv);
341 while (tprof_nworker > 0) {
342 cv_wait(&tprof_cv, &tprof_lock);
343 }
344 mutex_exit(&tprof_lock);
345
346 tprof_stop1();
347 done:
348 ;
349 }
350
351 /*
352 * tprof_clear: drain unread samples.
353 */
354
355 static void
356 tprof_clear(void)
357 {
358 tprof_buf_t *buf;
359
360 mutex_enter(&tprof_reader_lock);
361 mutex_enter(&tprof_lock);
362 while ((buf = STAILQ_FIRST(&tprof_list)) != NULL) {
363 if (buf != NULL) {
364 STAILQ_REMOVE_HEAD(&tprof_list, b_list);
365 KASSERT(tprof_nbuf_on_list > 0);
366 tprof_nbuf_on_list--;
367 mutex_exit(&tprof_lock);
368 tprof_buf_free(buf);
369 mutex_enter(&tprof_lock);
370 }
371 }
372 KASSERT(tprof_nbuf_on_list == 0);
373 mutex_exit(&tprof_lock);
374 tprof_reader_offset = 0;
375 mutex_exit(&tprof_reader_lock);
376
377 memset(&tprof_stat, 0, sizeof(tprof_stat));
378 }
379
380 static tprof_backend_t *
381 tprof_backend_lookup(const char *name)
382 {
383 tprof_backend_t *tb;
384
385 KASSERT(mutex_owned(&tprof_startstop_lock));
386
387 LIST_FOREACH(tb, &tprof_backends, tb_list) {
388 if (!strcmp(tb->tb_name, name)) {
389 return tb;
390 }
391 }
392 return NULL;
393 }
394
395 /* -------------------- backend interfaces */
396
397 /*
398 * tprof_sample: record a sample on the per-cpu buffer.
399 *
400 * be careful; can be called in NMI context.
401 * we are bluntly assuming the followings are safe.
402 * curcpu()
403 * curlwp->l_lid
404 * curlwp->l_proc->p_pid
405 */
406
407 void
408 tprof_sample(tprof_backend_cookie_t *cookie, const tprof_frame_info_t *tfi)
409 {
410 tprof_cpu_t * const c = tprof_curcpu();
411 tprof_buf_t * const buf = c->c_buf;
412 tprof_sample_t *sp;
413 const uintptr_t pc = tfi->tfi_pc;
414 const lwp_t * const l = curlwp;
415 u_int idx;
416
417 idx = buf->b_used;
418 if (__predict_false(idx >= buf->b_size)) {
419 buf->b_overflow++;
420 return;
421 }
422 sp = &buf->b_data[idx];
423 sp->s_pid = l->l_proc->p_pid;
424 sp->s_lwpid = l->l_lid;
425 sp->s_cpuid = c->c_cpuid;
426 sp->s_flags = (tfi->tfi_inkernel) ? TPROF_SAMPLE_INKERNEL : 0;
427 sp->s_pc = pc;
428 buf->b_used = idx + 1;
429 }
430
431 /*
432 * tprof_backend_register:
433 */
434
435 int
436 tprof_backend_register(const char *name, const tprof_backend_ops_t *ops,
437 int vers)
438 {
439 tprof_backend_t *tb;
440
441 if (vers != TPROF_BACKEND_VERSION) {
442 return EINVAL;
443 }
444
445 mutex_enter(&tprof_startstop_lock);
446 tb = tprof_backend_lookup(name);
447 if (tb != NULL) {
448 mutex_exit(&tprof_startstop_lock);
449 return EEXIST;
450 }
451 #if 1 /* XXX for now */
452 if (!LIST_EMPTY(&tprof_backends)) {
453 mutex_exit(&tprof_startstop_lock);
454 return ENOTSUP;
455 }
456 #endif
457 tb = kmem_alloc(sizeof(*tb), KM_SLEEP);
458 tb->tb_name = name;
459 tb->tb_ops = ops;
460 tb->tb_usecount = 0;
461 LIST_INSERT_HEAD(&tprof_backends, tb, tb_list);
462 #if 1 /* XXX for now */
463 if (tprof_backend == NULL) {
464 tprof_backend = tb;
465 }
466 #endif
467 mutex_exit(&tprof_startstop_lock);
468
469 return 0;
470 }
471
472 /*
473 * tprof_backend_unregister:
474 */
475
476 int
477 tprof_backend_unregister(const char *name)
478 {
479 tprof_backend_t *tb;
480
481 mutex_enter(&tprof_startstop_lock);
482 tb = tprof_backend_lookup(name);
483 #if defined(DIAGNOSTIC)
484 if (tb == NULL) {
485 mutex_exit(&tprof_startstop_lock);
486 panic("%s: not found '%s'", __func__, name);
487 }
488 #endif /* defined(DIAGNOSTIC) */
489 if (tb->tb_usecount > 0) {
490 mutex_exit(&tprof_startstop_lock);
491 return EBUSY;
492 }
493 #if 1 /* XXX for now */
494 if (tprof_backend == tb) {
495 tprof_backend = NULL;
496 }
497 #endif
498 LIST_REMOVE(tb, tb_list);
499 mutex_exit(&tprof_startstop_lock);
500
501 kmem_free(tb, sizeof(*tb));
502
503 return 0;
504 }
505
506 /* -------------------- cdevsw interfaces */
507
508 static int
509 tprof_open(dev_t dev, int flags, int type, struct lwp *l)
510 {
511
512 if (minor(dev) != 0) {
513 return EXDEV;
514 }
515 mutex_enter(&tprof_lock);
516 if (tprof_owner != NULL) {
517 mutex_exit(&tprof_lock);
518 return EBUSY;
519 }
520 tprof_owner = curlwp;
521 mutex_exit(&tprof_lock);
522
523 return 0;
524 }
525
526 static int
527 tprof_close(dev_t dev, int flags, int type, struct lwp *l)
528 {
529
530 KASSERT(minor(dev) == 0);
531
532 mutex_enter(&tprof_startstop_lock);
533 mutex_enter(&tprof_lock);
534 tprof_owner = NULL;
535 mutex_exit(&tprof_lock);
536 tprof_stop();
537 tprof_clear();
538 mutex_exit(&tprof_startstop_lock);
539
540 return 0;
541 }
542
543 static int
544 tprof_read(dev_t dev, struct uio *uio, int flags)
545 {
546 tprof_buf_t *buf;
547 size_t bytes;
548 size_t resid;
549 size_t done;
550 int error = 0;
551
552 KASSERT(minor(dev) == 0);
553 mutex_enter(&tprof_reader_lock);
554 while (uio->uio_resid > 0 && error == 0) {
555 /*
556 * take the first buffer from the list.
557 */
558 mutex_enter(&tprof_lock);
559 buf = STAILQ_FIRST(&tprof_list);
560 if (buf == NULL) {
561 if (tprof_nworker == 0) {
562 mutex_exit(&tprof_lock);
563 error = 0;
564 break;
565 }
566 mutex_exit(&tprof_reader_lock);
567 error = cv_wait_sig(&tprof_reader_cv, &tprof_lock);
568 mutex_exit(&tprof_lock);
569 mutex_enter(&tprof_reader_lock);
570 continue;
571 }
572 STAILQ_REMOVE_HEAD(&tprof_list, b_list);
573 KASSERT(tprof_nbuf_on_list > 0);
574 tprof_nbuf_on_list--;
575 mutex_exit(&tprof_lock);
576
577 /*
578 * copy it out.
579 */
580 bytes = MIN(buf->b_used * sizeof(tprof_sample_t) -
581 tprof_reader_offset, uio->uio_resid);
582 resid = uio->uio_resid;
583 error = uiomove((char *)buf->b_data + tprof_reader_offset,
584 bytes, uio);
585 done = resid - uio->uio_resid;
586 tprof_reader_offset += done;
587
588 /*
589 * if we didn't consume the whole buffer,
590 * put it back to the list.
591 */
592 if (tprof_reader_offset <
593 buf->b_used * sizeof(tprof_sample_t)) {
594 mutex_enter(&tprof_lock);
595 STAILQ_INSERT_HEAD(&tprof_list, buf, b_list);
596 tprof_nbuf_on_list++;
597 cv_broadcast(&tprof_reader_cv);
598 mutex_exit(&tprof_lock);
599 } else {
600 tprof_buf_free(buf);
601 tprof_reader_offset = 0;
602 }
603 }
604 mutex_exit(&tprof_reader_lock);
605
606 return error;
607 }
608
609 static int
610 tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l)
611 {
612 const struct tprof_param *param;
613 int error = 0;
614
615 KASSERT(minor(dev) == 0);
616
617 switch (cmd) {
618 case TPROF_IOC_GETVERSION:
619 *(int *)data = TPROF_VERSION;
620 break;
621 case TPROF_IOC_START:
622 param = data;
623 mutex_enter(&tprof_startstop_lock);
624 error = tprof_start(param);
625 mutex_exit(&tprof_startstop_lock);
626 break;
627 case TPROF_IOC_STOP:
628 mutex_enter(&tprof_startstop_lock);
629 tprof_stop();
630 mutex_exit(&tprof_startstop_lock);
631 break;
632 case TPROF_IOC_GETSTAT:
633 mutex_enter(&tprof_lock);
634 memcpy(data, &tprof_stat, sizeof(tprof_stat));
635 mutex_exit(&tprof_lock);
636 break;
637 default:
638 error = EINVAL;
639 break;
640 }
641
642 return error;
643 }
644
645 const struct cdevsw tprof_cdevsw = {
646 DEVSW_MODULE_INIT
647 .d_open = tprof_open,
648 .d_close = tprof_close,
649 .d_read = tprof_read,
650 .d_write = nowrite,
651 .d_ioctl = tprof_ioctl,
652 .d_stop = nostop,
653 .d_tty = notty,
654 .d_poll = nopoll,
655 .d_mmap = nommap,
656 .d_kqfilter = nokqfilter,
657 .d_discard = nodiscard,
658 .d_flag = D_OTHER | D_MPSAFE
659 };
660
661 void
662 tprofattach(int nunits)
663 {
664
665 /* nothing */
666 }
667
668 MODULE(MODULE_CLASS_DRIVER, tprof, NULL);
669
670 static void
671 tprof_driver_init(void)
672 {
673 unsigned int i;
674
675 mutex_init(&tprof_lock, MUTEX_DEFAULT, IPL_NONE);
676 mutex_init(&tprof_reader_lock, MUTEX_DEFAULT, IPL_NONE);
677 mutex_init(&tprof_startstop_lock, MUTEX_DEFAULT, IPL_NONE);
678 cv_init(&tprof_cv, "tprof");
679 cv_init(&tprof_reader_cv, "tprof_rd");
680 STAILQ_INIT(&tprof_list);
681 for (i = 0; i < __arraycount(tprof_cpus); i++) {
682 tprof_cpu_t * const c = &tprof_cpus[i];
683
684 c->c_buf = NULL;
685 c->c_cpuid = i;
686 }
687 }
688
689 static void
690 tprof_driver_fini(void)
691 {
692
693 mutex_destroy(&tprof_lock);
694 mutex_destroy(&tprof_reader_lock);
695 mutex_destroy(&tprof_startstop_lock);
696 cv_destroy(&tprof_cv);
697 cv_destroy(&tprof_reader_cv);
698 }
699
700 static int
701 tprof_modcmd(modcmd_t cmd, void *arg)
702 {
703
704 switch (cmd) {
705 case MODULE_CMD_INIT:
706 tprof_driver_init();
707 #if defined(_MODULE)
708 {
709 devmajor_t bmajor = NODEVMAJOR;
710 devmajor_t cmajor = NODEVMAJOR;
711 int error;
712
713 error = devsw_attach("tprof", NULL, &bmajor,
714 &tprof_cdevsw, &cmajor);
715 if (error) {
716 tprof_driver_fini();
717 return error;
718 }
719 }
720 #endif /* defined(_MODULE) */
721 return 0;
722
723 case MODULE_CMD_FINI:
724 #if defined(_MODULE)
725 {
726 int error;
727 error = devsw_detach(NULL, &tprof_cdevsw);
728 if (error) {
729 return error;
730 }
731 }
732 #endif /* defined(_MODULE) */
733 tprof_driver_fini();
734 return 0;
735
736 default:
737 return ENOTTY;
738 }
739 }
740