tprof.c revision 1.14.6.1 1 /* $NetBSD: tprof.c,v 1.14.6.1 2023/08/01 17:34:33 martin Exp $ */
2
3 /*-
4 * Copyright (c)2008,2009,2010 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.14.6.1 2023/08/01 17:34:33 martin Exp $");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35
36 #include <sys/cpu.h>
37 #include <sys/conf.h>
38 #include <sys/callout.h>
39 #include <sys/kmem.h>
40 #include <sys/module.h>
41 #include <sys/proc.h>
42 #include <sys/workqueue.h>
43 #include <sys/queue.h>
44
45 #include <dev/tprof/tprof.h>
46 #include <dev/tprof/tprof_ioctl.h>
47
48 #include "ioconf.h"
49
50 /*
51 * locking order:
52 * tprof_reader_lock -> tprof_lock
53 * tprof_startstop_lock -> tprof_lock
54 */
55
56 /*
57 * protected by:
58 * L: tprof_lock
59 * R: tprof_reader_lock
60 * S: tprof_startstop_lock
61 * s: writer should hold tprof_startstop_lock and tprof_lock
62 * reader should hold tprof_startstop_lock or tprof_lock
63 */
64
65 typedef struct tprof_buf {
66 u_int b_used;
67 u_int b_size;
68 u_int b_overflow;
69 u_int b_unused;
70 STAILQ_ENTRY(tprof_buf) b_list;
71 tprof_sample_t b_data[];
72 } tprof_buf_t;
73 #define TPROF_BUF_BYTESIZE(sz) \
74 (sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t))
75 #define TPROF_MAX_SAMPLES_PER_BUF 10000
76
77 #define TPROF_MAX_BUF 100
78
79 typedef struct {
80 tprof_buf_t *c_buf;
81 uint32_t c_cpuid;
82 struct work c_work;
83 callout_t c_callout;
84 } __aligned(CACHE_LINE_SIZE) tprof_cpu_t;
85
86 typedef struct tprof_backend {
87 const char *tb_name;
88 const tprof_backend_ops_t *tb_ops;
89 LIST_ENTRY(tprof_backend) tb_list;
90 int tb_usecount; /* S: */
91 } tprof_backend_t;
92
93 static kmutex_t tprof_lock;
94 static bool tprof_running; /* s: */
95 static u_int tprof_nworker; /* L: # of running worker LWPs */
96 static lwp_t *tprof_owner;
97 static STAILQ_HEAD(, tprof_buf) tprof_list; /* L: global buffer list */
98 static u_int tprof_nbuf_on_list; /* L: # of buffers on tprof_list */
99 static struct workqueue *tprof_wq;
100 static tprof_cpu_t tprof_cpus[MAXCPUS] __aligned(CACHE_LINE_SIZE);
101 static u_int tprof_samples_per_buf;
102
103 static tprof_backend_t *tprof_backend; /* S: */
104 static LIST_HEAD(, tprof_backend) tprof_backends =
105 LIST_HEAD_INITIALIZER(tprof_backend); /* S: */
106
107 static kmutex_t tprof_reader_lock;
108 static kcondvar_t tprof_reader_cv; /* L: */
109 static off_t tprof_reader_offset; /* R: */
110
111 static kmutex_t tprof_startstop_lock;
112 static kcondvar_t tprof_cv; /* L: */
113
114 static struct tprof_stat tprof_stat; /* L: */
115
116 static tprof_cpu_t *
117 tprof_cpu(struct cpu_info *ci)
118 {
119
120 return &tprof_cpus[cpu_index(ci)];
121 }
122
123 static tprof_cpu_t *
124 tprof_curcpu(void)
125 {
126
127 return tprof_cpu(curcpu());
128 }
129
130 static tprof_buf_t *
131 tprof_buf_alloc(void)
132 {
133 tprof_buf_t *new;
134 u_int size = tprof_samples_per_buf;
135
136 new = kmem_alloc(TPROF_BUF_BYTESIZE(size), KM_SLEEP);
137 new->b_used = 0;
138 new->b_size = size;
139 new->b_overflow = 0;
140 return new;
141 }
142
143 static void
144 tprof_buf_free(tprof_buf_t *buf)
145 {
146
147 kmem_free(buf, TPROF_BUF_BYTESIZE(buf->b_size));
148 }
149
150 static tprof_buf_t *
151 tprof_buf_switch(tprof_cpu_t *c, tprof_buf_t *new)
152 {
153 tprof_buf_t *old;
154
155 old = c->c_buf;
156 c->c_buf = new;
157 return old;
158 }
159
160 static tprof_buf_t *
161 tprof_buf_refresh(void)
162 {
163 tprof_cpu_t * const c = tprof_curcpu();
164 tprof_buf_t *new;
165
166 new = tprof_buf_alloc();
167 return tprof_buf_switch(c, new);
168 }
169
170 static void
171 tprof_worker(struct work *wk, void *dummy)
172 {
173 tprof_cpu_t * const c = tprof_curcpu();
174 tprof_buf_t *buf;
175 bool shouldstop;
176
177 KASSERT(wk == &c->c_work);
178 KASSERT(dummy == NULL);
179
180 /*
181 * Get a per cpu buffer.
182 */
183 buf = tprof_buf_refresh();
184
185 /*
186 * and put it on the global list for read(2).
187 */
188 mutex_enter(&tprof_lock);
189 shouldstop = !tprof_running;
190 if (shouldstop) {
191 KASSERT(tprof_nworker > 0);
192 tprof_nworker--;
193 cv_broadcast(&tprof_cv);
194 cv_broadcast(&tprof_reader_cv);
195 }
196 if (buf->b_used == 0) {
197 tprof_stat.ts_emptybuf++;
198 } else if (tprof_nbuf_on_list < TPROF_MAX_BUF) {
199 tprof_stat.ts_sample += buf->b_used;
200 tprof_stat.ts_overflow += buf->b_overflow;
201 tprof_stat.ts_buf++;
202 STAILQ_INSERT_TAIL(&tprof_list, buf, b_list);
203 tprof_nbuf_on_list++;
204 buf = NULL;
205 cv_broadcast(&tprof_reader_cv);
206 } else {
207 tprof_stat.ts_dropbuf_sample += buf->b_used;
208 tprof_stat.ts_dropbuf++;
209 }
210 mutex_exit(&tprof_lock);
211 if (buf)
212 tprof_buf_free(buf);
213
214 if (!shouldstop)
215 callout_schedule(&c->c_callout, hz);
216 }
217
218 static void
219 tprof_kick(void *vp)
220 {
221 struct cpu_info * const ci = vp;
222 tprof_cpu_t * const c = tprof_cpu(ci);
223
224 workqueue_enqueue(tprof_wq, &c->c_work, ci);
225 }
226
227 static void
228 tprof_stop1(void)
229 {
230 CPU_INFO_ITERATOR cii;
231 struct cpu_info *ci;
232
233 KASSERT(mutex_owned(&tprof_startstop_lock));
234 KASSERT(tprof_nworker == 0);
235
236 for (CPU_INFO_FOREACH(cii, ci)) {
237 tprof_cpu_t * const c = tprof_cpu(ci);
238 tprof_buf_t *old;
239
240 old = tprof_buf_switch(c, NULL);
241 if (old != NULL)
242 tprof_buf_free(old);
243
244 callout_destroy(&c->c_callout);
245 }
246 workqueue_destroy(tprof_wq);
247 }
248
249 static void
250 tprof_getinfo(struct tprof_info *info)
251 {
252 tprof_backend_t *tb;
253
254 KASSERT(mutex_owned(&tprof_startstop_lock));
255
256 memset(info, 0, sizeof(*info));
257 info->ti_version = TPROF_VERSION;
258 if ((tb = tprof_backend) != NULL)
259 info->ti_ident = tb->tb_ops->tbo_ident();
260 }
261
262 static int
263 tprof_start(const tprof_param_t *param)
264 {
265 CPU_INFO_ITERATOR cii;
266 struct cpu_info *ci;
267 int error;
268 uint64_t freq;
269 tprof_backend_t *tb;
270
271 KASSERT(mutex_owned(&tprof_startstop_lock));
272 if (tprof_running) {
273 error = EBUSY;
274 goto done;
275 }
276
277 tb = tprof_backend;
278 if (tb == NULL) {
279 error = ENOENT;
280 goto done;
281 }
282 if (tb->tb_usecount > 0) {
283 error = EBUSY;
284 goto done;
285 }
286
287 tb->tb_usecount++;
288 freq = tb->tb_ops->tbo_estimate_freq();
289 tprof_samples_per_buf = MIN(freq * 2, TPROF_MAX_SAMPLES_PER_BUF);
290
291 error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker, NULL,
292 PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU);
293 if (error != 0) {
294 goto done;
295 }
296
297 for (CPU_INFO_FOREACH(cii, ci)) {
298 tprof_cpu_t * const c = tprof_cpu(ci);
299 tprof_buf_t *new;
300 tprof_buf_t *old;
301
302 new = tprof_buf_alloc();
303 old = tprof_buf_switch(c, new);
304 if (old != NULL) {
305 tprof_buf_free(old);
306 }
307 callout_init(&c->c_callout, CALLOUT_MPSAFE);
308 callout_setfunc(&c->c_callout, tprof_kick, ci);
309 }
310
311 error = tb->tb_ops->tbo_start(param);
312 if (error != 0) {
313 KASSERT(tb->tb_usecount > 0);
314 tb->tb_usecount--;
315 tprof_stop1();
316 goto done;
317 }
318
319 mutex_enter(&tprof_lock);
320 tprof_running = true;
321 mutex_exit(&tprof_lock);
322 for (CPU_INFO_FOREACH(cii, ci)) {
323 tprof_cpu_t * const c = tprof_cpu(ci);
324
325 mutex_enter(&tprof_lock);
326 tprof_nworker++;
327 mutex_exit(&tprof_lock);
328 workqueue_enqueue(tprof_wq, &c->c_work, ci);
329 }
330 done:
331 return error;
332 }
333
334 static void
335 tprof_stop(void)
336 {
337 tprof_backend_t *tb;
338
339 KASSERT(mutex_owned(&tprof_startstop_lock));
340 if (!tprof_running) {
341 goto done;
342 }
343
344 tb = tprof_backend;
345 KASSERT(tb->tb_usecount > 0);
346 tb->tb_ops->tbo_stop(NULL);
347 tb->tb_usecount--;
348
349 mutex_enter(&tprof_lock);
350 tprof_running = false;
351 cv_broadcast(&tprof_reader_cv);
352 while (tprof_nworker > 0)
353 cv_wait(&tprof_cv, &tprof_lock);
354
355 mutex_exit(&tprof_lock);
356
357 tprof_stop1();
358 done:
359 ;
360 }
361
362 /*
363 * tprof_clear: drain unread samples.
364 */
365
366 static void
367 tprof_clear(void)
368 {
369 tprof_buf_t *buf;
370
371 mutex_enter(&tprof_reader_lock);
372 mutex_enter(&tprof_lock);
373 while ((buf = STAILQ_FIRST(&tprof_list)) != NULL) {
374 if (buf != NULL) {
375 STAILQ_REMOVE_HEAD(&tprof_list, b_list);
376 KASSERT(tprof_nbuf_on_list > 0);
377 tprof_nbuf_on_list--;
378 mutex_exit(&tprof_lock);
379 tprof_buf_free(buf);
380 mutex_enter(&tprof_lock);
381 }
382 }
383 KASSERT(tprof_nbuf_on_list == 0);
384 mutex_exit(&tprof_lock);
385 tprof_reader_offset = 0;
386 mutex_exit(&tprof_reader_lock);
387
388 memset(&tprof_stat, 0, sizeof(tprof_stat));
389 }
390
391 static tprof_backend_t *
392 tprof_backend_lookup(const char *name)
393 {
394 tprof_backend_t *tb;
395
396 KASSERT(mutex_owned(&tprof_startstop_lock));
397
398 LIST_FOREACH(tb, &tprof_backends, tb_list) {
399 if (!strcmp(tb->tb_name, name)) {
400 return tb;
401 }
402 }
403 return NULL;
404 }
405
406 /* -------------------- backend interfaces */
407
408 /*
409 * tprof_sample: record a sample on the per-cpu buffer.
410 *
411 * be careful; can be called in NMI context.
412 * we are bluntly assuming the followings are safe.
413 * curcpu()
414 * curlwp->l_lid
415 * curlwp->l_proc->p_pid
416 */
417
418 void
419 tprof_sample(void *unused, const tprof_frame_info_t *tfi)
420 {
421 tprof_cpu_t * const c = tprof_curcpu();
422 tprof_buf_t * const buf = c->c_buf;
423 tprof_sample_t *sp;
424 const uintptr_t pc = tfi->tfi_pc;
425 const lwp_t * const l = curlwp;
426 u_int idx;
427
428 idx = buf->b_used;
429 if (__predict_false(idx >= buf->b_size)) {
430 buf->b_overflow++;
431 return;
432 }
433 sp = &buf->b_data[idx];
434 sp->s_pid = l->l_proc->p_pid;
435 sp->s_lwpid = l->l_lid;
436 sp->s_cpuid = c->c_cpuid;
437 sp->s_flags = (tfi->tfi_inkernel) ? TPROF_SAMPLE_INKERNEL : 0;
438 sp->s_pc = pc;
439 buf->b_used = idx + 1;
440 }
441
442 /*
443 * tprof_backend_register:
444 */
445
446 int
447 tprof_backend_register(const char *name, const tprof_backend_ops_t *ops,
448 int vers)
449 {
450 tprof_backend_t *tb;
451
452 if (vers != TPROF_BACKEND_VERSION)
453 return EINVAL;
454
455 mutex_enter(&tprof_startstop_lock);
456 tb = tprof_backend_lookup(name);
457 if (tb != NULL) {
458 mutex_exit(&tprof_startstop_lock);
459 return EEXIST;
460 }
461 #if 1 /* XXX for now */
462 if (!LIST_EMPTY(&tprof_backends)) {
463 mutex_exit(&tprof_startstop_lock);
464 return ENOTSUP;
465 }
466 #endif
467 tb = kmem_alloc(sizeof(*tb), KM_SLEEP);
468 tb->tb_name = name;
469 tb->tb_ops = ops;
470 tb->tb_usecount = 0;
471 LIST_INSERT_HEAD(&tprof_backends, tb, tb_list);
472 #if 1 /* XXX for now */
473 if (tprof_backend == NULL) {
474 tprof_backend = tb;
475 }
476 #endif
477 mutex_exit(&tprof_startstop_lock);
478
479 return 0;
480 }
481
482 /*
483 * tprof_backend_unregister:
484 */
485
486 int
487 tprof_backend_unregister(const char *name)
488 {
489 tprof_backend_t *tb;
490
491 mutex_enter(&tprof_startstop_lock);
492 tb = tprof_backend_lookup(name);
493 #if defined(DIAGNOSTIC)
494 if (tb == NULL) {
495 mutex_exit(&tprof_startstop_lock);
496 panic("%s: not found '%s'", __func__, name);
497 }
498 #endif /* defined(DIAGNOSTIC) */
499 if (tb->tb_usecount > 0) {
500 mutex_exit(&tprof_startstop_lock);
501 return EBUSY;
502 }
503 #if 1 /* XXX for now */
504 if (tprof_backend == tb)
505 tprof_backend = NULL;
506 #endif
507 LIST_REMOVE(tb, tb_list);
508 mutex_exit(&tprof_startstop_lock);
509
510 kmem_free(tb, sizeof(*tb));
511
512 return 0;
513 }
514
515 /* -------------------- cdevsw interfaces */
516
517 static int
518 tprof_open(dev_t dev, int flags, int type, struct lwp *l)
519 {
520
521 if (minor(dev) != 0)
522 return EXDEV;
523
524 mutex_enter(&tprof_lock);
525 if (tprof_owner != NULL) {
526 mutex_exit(&tprof_lock);
527 return EBUSY;
528 }
529 tprof_owner = curlwp;
530 mutex_exit(&tprof_lock);
531
532 return 0;
533 }
534
535 static int
536 tprof_close(dev_t dev, int flags, int type, struct lwp *l)
537 {
538
539 KASSERT(minor(dev) == 0);
540
541 mutex_enter(&tprof_startstop_lock);
542 mutex_enter(&tprof_lock);
543 tprof_owner = NULL;
544 mutex_exit(&tprof_lock);
545 tprof_stop();
546 tprof_clear();
547 mutex_exit(&tprof_startstop_lock);
548
549 return 0;
550 }
551
552 static int
553 tprof_read(dev_t dev, struct uio *uio, int flags)
554 {
555 tprof_buf_t *buf;
556 size_t bytes;
557 size_t resid;
558 size_t done;
559 int error = 0;
560
561 KASSERT(minor(dev) == 0);
562 mutex_enter(&tprof_reader_lock);
563 while (uio->uio_resid > 0 && error == 0) {
564 /*
565 * Take the first buffer from the list.
566 */
567 mutex_enter(&tprof_lock);
568 buf = STAILQ_FIRST(&tprof_list);
569 if (buf == NULL) {
570 if (tprof_nworker == 0) {
571 mutex_exit(&tprof_lock);
572 error = 0;
573 break;
574 }
575 mutex_exit(&tprof_reader_lock);
576 error = cv_wait_sig(&tprof_reader_cv, &tprof_lock);
577 mutex_exit(&tprof_lock);
578 mutex_enter(&tprof_reader_lock);
579 continue;
580 }
581 STAILQ_REMOVE_HEAD(&tprof_list, b_list);
582 KASSERT(tprof_nbuf_on_list > 0);
583 tprof_nbuf_on_list--;
584 mutex_exit(&tprof_lock);
585
586 /*
587 * Copy it out.
588 */
589 bytes = MIN(buf->b_used * sizeof(tprof_sample_t) -
590 tprof_reader_offset, uio->uio_resid);
591 resid = uio->uio_resid;
592 error = uiomove((char *)buf->b_data + tprof_reader_offset,
593 bytes, uio);
594 done = resid - uio->uio_resid;
595 tprof_reader_offset += done;
596
597 /*
598 * If we didn't consume the whole buffer,
599 * put it back to the list.
600 */
601 if (tprof_reader_offset <
602 buf->b_used * sizeof(tprof_sample_t)) {
603 mutex_enter(&tprof_lock);
604 STAILQ_INSERT_HEAD(&tprof_list, buf, b_list);
605 tprof_nbuf_on_list++;
606 cv_broadcast(&tprof_reader_cv);
607 mutex_exit(&tprof_lock);
608 } else {
609 tprof_buf_free(buf);
610 tprof_reader_offset = 0;
611 }
612 }
613 mutex_exit(&tprof_reader_lock);
614
615 return error;
616 }
617
618 static int
619 tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l)
620 {
621 const tprof_param_t *param;
622 int error = 0;
623
624 KASSERT(minor(dev) == 0);
625
626 switch (cmd) {
627 case TPROF_IOC_GETINFO:
628 mutex_enter(&tprof_startstop_lock);
629 tprof_getinfo(data);
630 mutex_exit(&tprof_startstop_lock);
631 break;
632 case TPROF_IOC_START:
633 param = data;
634 mutex_enter(&tprof_startstop_lock);
635 error = tprof_start(param);
636 mutex_exit(&tprof_startstop_lock);
637 break;
638 case TPROF_IOC_STOP:
639 mutex_enter(&tprof_startstop_lock);
640 tprof_stop();
641 mutex_exit(&tprof_startstop_lock);
642 break;
643 case TPROF_IOC_GETSTAT:
644 mutex_enter(&tprof_lock);
645 memcpy(data, &tprof_stat, sizeof(tprof_stat));
646 mutex_exit(&tprof_lock);
647 break;
648 default:
649 error = EINVAL;
650 break;
651 }
652
653 return error;
654 }
655
656 const struct cdevsw tprof_cdevsw = {
657 .d_open = tprof_open,
658 .d_close = tprof_close,
659 .d_read = tprof_read,
660 .d_write = nowrite,
661 .d_ioctl = tprof_ioctl,
662 .d_stop = nostop,
663 .d_tty = notty,
664 .d_poll = nopoll,
665 .d_mmap = nommap,
666 .d_kqfilter = nokqfilter,
667 .d_discard = nodiscard,
668 .d_flag = D_OTHER | D_MPSAFE
669 };
670
671 void
672 tprofattach(int nunits)
673 {
674
675 /* Nothing */
676 }
677
678 MODULE(MODULE_CLASS_DRIVER, tprof, NULL);
679
680 static void
681 tprof_driver_init(void)
682 {
683 unsigned int i;
684
685 mutex_init(&tprof_lock, MUTEX_DEFAULT, IPL_NONE);
686 mutex_init(&tprof_reader_lock, MUTEX_DEFAULT, IPL_NONE);
687 mutex_init(&tprof_startstop_lock, MUTEX_DEFAULT, IPL_NONE);
688 cv_init(&tprof_cv, "tprof");
689 cv_init(&tprof_reader_cv, "tprof_rd");
690 STAILQ_INIT(&tprof_list);
691 for (i = 0; i < __arraycount(tprof_cpus); i++) {
692 tprof_cpu_t * const c = &tprof_cpus[i];
693
694 c->c_buf = NULL;
695 c->c_cpuid = i;
696 }
697 }
698
699 static void
700 tprof_driver_fini(void)
701 {
702
703 mutex_destroy(&tprof_lock);
704 mutex_destroy(&tprof_reader_lock);
705 mutex_destroy(&tprof_startstop_lock);
706 cv_destroy(&tprof_cv);
707 cv_destroy(&tprof_reader_cv);
708 }
709
710 static int
711 tprof_modcmd(modcmd_t cmd, void *arg)
712 {
713
714 switch (cmd) {
715 case MODULE_CMD_INIT:
716 tprof_driver_init();
717 #if defined(_MODULE)
718 {
719 devmajor_t bmajor = NODEVMAJOR;
720 devmajor_t cmajor = NODEVMAJOR;
721 int error;
722
723 error = devsw_attach("tprof", NULL, &bmajor,
724 &tprof_cdevsw, &cmajor);
725 if (error) {
726 tprof_driver_fini();
727 return error;
728 }
729 }
730 #endif /* defined(_MODULE) */
731 return 0;
732
733 case MODULE_CMD_FINI:
734 #if defined(_MODULE)
735 {
736 int error;
737 error = devsw_detach(NULL, &tprof_cdevsw);
738 if (error) {
739 return error;
740 }
741 }
742 #endif /* defined(_MODULE) */
743 tprof_driver_fini();
744 return 0;
745
746 default:
747 return ENOTTY;
748 }
749 }
750