tprof.c revision 1.19 1 /* $NetBSD: tprof.c,v 1.19 2022/12/01 00:32:52 ryo Exp $ */
2
3 /*-
4 * Copyright (c)2008,2009,2010 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.19 2022/12/01 00:32:52 ryo Exp $");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35
36 #include <sys/callout.h>
37 #include <sys/conf.h>
38 #include <sys/cpu.h>
39 #include <sys/kmem.h>
40 #include <sys/module.h>
41 #include <sys/percpu.h>
42 #include <sys/proc.h>
43 #include <sys/queue.h>
44 #include <sys/workqueue.h>
45 #include <sys/xcall.h>
46
47 #include <dev/tprof/tprof.h>
48 #include <dev/tprof/tprof_ioctl.h>
49
50 #include "ioconf.h"
51
52 #ifndef TPROF_HZ
53 #define TPROF_HZ 10000
54 #endif
55
56 /*
57 * locking order:
58 * tprof_reader_lock -> tprof_lock
59 * tprof_startstop_lock -> tprof_lock
60 */
61
62 /*
63 * protected by:
64 * L: tprof_lock
65 * R: tprof_reader_lock
66 * S: tprof_startstop_lock
67 * s: writer should hold tprof_startstop_lock and tprof_lock
68 * reader should hold tprof_startstop_lock or tprof_lock
69 */
70
71 typedef struct tprof_buf {
72 u_int b_used;
73 u_int b_size;
74 u_int b_overflow;
75 u_int b_unused;
76 STAILQ_ENTRY(tprof_buf) b_list;
77 tprof_sample_t b_data[];
78 } tprof_buf_t;
79 #define TPROF_BUF_BYTESIZE(sz) \
80 (sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t))
81 #define TPROF_MAX_SAMPLES_PER_BUF (TPROF_HZ * 2)
82
83 #define TPROF_MAX_BUF 100
84
85 typedef struct {
86 tprof_buf_t *c_buf;
87 uint32_t c_cpuid;
88 struct work c_work;
89 callout_t c_callout;
90 } __aligned(CACHE_LINE_SIZE) tprof_cpu_t;
91
92 typedef struct tprof_backend {
93 /*
94 * tprof_backend_softc_t must be passed as an argument to the interrupt
95 * handler, but since this is difficult to implement in armv7/v8. Then,
96 * tprof_backend is exposed. Additionally, softc must be placed at the
97 * beginning of struct tprof_backend.
98 */
99 tprof_backend_softc_t tb_softc;
100
101 const char *tb_name;
102 const tprof_backend_ops_t *tb_ops;
103 LIST_ENTRY(tprof_backend) tb_list;
104 } tprof_backend_t;
105
106 static kmutex_t tprof_lock;
107 static u_int tprof_nworker; /* L: # of running worker LWPs */
108 static lwp_t *tprof_owner;
109 static STAILQ_HEAD(, tprof_buf) tprof_list; /* L: global buffer list */
110 static u_int tprof_nbuf_on_list; /* L: # of buffers on tprof_list */
111 static struct workqueue *tprof_wq;
112 static struct percpu *tprof_cpus __read_mostly; /* tprof_cpu_t * */
113 static u_int tprof_samples_per_buf;
114
115 tprof_backend_t *tprof_backend; /* S: */
116 static LIST_HEAD(, tprof_backend) tprof_backends =
117 LIST_HEAD_INITIALIZER(tprof_backend); /* S: */
118
119 static kmutex_t tprof_reader_lock;
120 static kcondvar_t tprof_reader_cv; /* L: */
121 static off_t tprof_reader_offset; /* R: */
122
123 static kmutex_t tprof_startstop_lock;
124 static kcondvar_t tprof_cv; /* L: */
125
126 static struct tprof_stat tprof_stat; /* L: */
127
128 static tprof_cpu_t *
129 tprof_cpu_direct(struct cpu_info *ci)
130 {
131 tprof_cpu_t **cp;
132
133 cp = percpu_getptr_remote(tprof_cpus, ci);
134 return *cp;
135 }
136
137 static tprof_cpu_t *
138 tprof_cpu(struct cpu_info *ci)
139 {
140 tprof_cpu_t *c;
141
142 /*
143 * As long as xcalls are blocked -- e.g., by kpreempt_disable
144 * -- the percpu object will not be swapped and destroyed. We
145 * can't write to it, because the data may have already been
146 * moved to a new buffer, but we can safely read from it.
147 */
148 kpreempt_disable();
149 c = tprof_cpu_direct(ci);
150 kpreempt_enable();
151
152 return c;
153 }
154
155 static tprof_cpu_t *
156 tprof_curcpu(void)
157 {
158
159 return tprof_cpu(curcpu());
160 }
161
162 static tprof_buf_t *
163 tprof_buf_alloc(void)
164 {
165 tprof_buf_t *new;
166 u_int size = tprof_samples_per_buf;
167
168 new = kmem_alloc(TPROF_BUF_BYTESIZE(size), KM_SLEEP);
169 new->b_used = 0;
170 new->b_size = size;
171 new->b_overflow = 0;
172 return new;
173 }
174
175 static void
176 tprof_buf_free(tprof_buf_t *buf)
177 {
178
179 kmem_free(buf, TPROF_BUF_BYTESIZE(buf->b_size));
180 }
181
182 static tprof_buf_t *
183 tprof_buf_switch(tprof_cpu_t *c, tprof_buf_t *new)
184 {
185 tprof_buf_t *old;
186
187 old = c->c_buf;
188 c->c_buf = new;
189 return old;
190 }
191
192 static tprof_buf_t *
193 tprof_buf_refresh(void)
194 {
195 tprof_cpu_t * const c = tprof_curcpu();
196 tprof_buf_t *new;
197
198 new = tprof_buf_alloc();
199 return tprof_buf_switch(c, new);
200 }
201
202 static void
203 tprof_worker(struct work *wk, void *dummy)
204 {
205 tprof_cpu_t * const c = tprof_curcpu();
206 tprof_buf_t *buf;
207 tprof_backend_t *tb;
208 bool shouldstop;
209
210 KASSERT(wk == &c->c_work);
211 KASSERT(dummy == NULL);
212
213 /*
214 * get a per cpu buffer.
215 */
216 buf = tprof_buf_refresh();
217
218 /*
219 * and put it on the global list for read(2).
220 */
221 mutex_enter(&tprof_lock);
222 tb = tprof_backend;
223 shouldstop = (tb == NULL || tb->tb_softc.sc_ctr_running_mask == 0);
224 if (shouldstop) {
225 KASSERT(tprof_nworker > 0);
226 tprof_nworker--;
227 cv_broadcast(&tprof_cv);
228 cv_broadcast(&tprof_reader_cv);
229 }
230 if (buf->b_used == 0) {
231 tprof_stat.ts_emptybuf++;
232 } else if (tprof_nbuf_on_list < TPROF_MAX_BUF) {
233 tprof_stat.ts_sample += buf->b_used;
234 tprof_stat.ts_overflow += buf->b_overflow;
235 tprof_stat.ts_buf++;
236 STAILQ_INSERT_TAIL(&tprof_list, buf, b_list);
237 tprof_nbuf_on_list++;
238 buf = NULL;
239 cv_broadcast(&tprof_reader_cv);
240 } else {
241 tprof_stat.ts_dropbuf_sample += buf->b_used;
242 tprof_stat.ts_dropbuf++;
243 }
244 mutex_exit(&tprof_lock);
245 if (buf) {
246 tprof_buf_free(buf);
247 }
248 if (!shouldstop) {
249 callout_schedule(&c->c_callout, hz);
250 }
251 }
252
253 static void
254 tprof_kick(void *vp)
255 {
256 struct cpu_info * const ci = vp;
257 tprof_cpu_t * const c = tprof_cpu(ci);
258
259 workqueue_enqueue(tprof_wq, &c->c_work, ci);
260 }
261
262 static void
263 tprof_stop1(void)
264 {
265 CPU_INFO_ITERATOR cii;
266 struct cpu_info *ci;
267
268 KASSERT(mutex_owned(&tprof_startstop_lock));
269 KASSERT(tprof_nworker == 0);
270
271 for (CPU_INFO_FOREACH(cii, ci)) {
272 tprof_cpu_t * const c = tprof_cpu(ci);
273 tprof_buf_t *old;
274
275 old = tprof_buf_switch(c, NULL);
276 if (old != NULL) {
277 tprof_buf_free(old);
278 }
279 callout_destroy(&c->c_callout);
280 }
281 workqueue_destroy(tprof_wq);
282 }
283
284 static void
285 tprof_getinfo(struct tprof_info *info)
286 {
287 tprof_backend_t *tb;
288
289 KASSERT(mutex_owned(&tprof_startstop_lock));
290
291 memset(info, 0, sizeof(*info));
292 info->ti_version = TPROF_VERSION;
293 if ((tb = tprof_backend) != NULL) {
294 info->ti_ident = tb->tb_ops->tbo_ident();
295 }
296 }
297
298 static int
299 tprof_getncounters(u_int *ncounters)
300 {
301 tprof_backend_t *tb;
302
303 tb = tprof_backend;
304 if (tb == NULL)
305 return ENOENT;
306
307 *ncounters = tb->tb_ops->tbo_ncounters();
308 return 0;
309 }
310
311 static void
312 tprof_start_cpu(void *arg1, void *arg2)
313 {
314 tprof_backend_t *tb = arg1;
315 tprof_countermask_t runmask = (uintptr_t)arg2;
316
317 tb->tb_ops->tbo_start(runmask);
318 }
319
320 static void
321 tprof_stop_cpu(void *arg1, void *arg2)
322 {
323 tprof_backend_t *tb = arg1;
324 tprof_countermask_t stopmask = (uintptr_t)arg2;
325
326 tb->tb_ops->tbo_stop(stopmask);
327 }
328
329 static int
330 tprof_start(tprof_countermask_t runmask)
331 {
332 CPU_INFO_ITERATOR cii;
333 struct cpu_info *ci;
334 tprof_backend_t *tb;
335 uint64_t xc;
336 int error;
337 bool firstrun;
338
339 KASSERT(mutex_owned(&tprof_startstop_lock));
340
341 tb = tprof_backend;
342 if (tb == NULL) {
343 error = ENOENT;
344 goto done;
345 }
346
347 runmask &= ~tb->tb_softc.sc_ctr_running_mask;
348 runmask &= tb->tb_softc.sc_ctr_configured_mask;
349 if (runmask == 0) {
350 /*
351 * targets are already running.
352 * unconfigured counters are ignored.
353 */
354 error = 0;
355 goto done;
356 }
357
358 firstrun = (tb->tb_softc.sc_ctr_running_mask == 0);
359 if (firstrun) {
360 if (tb->tb_ops->tbo_establish != NULL) {
361 error = tb->tb_ops->tbo_establish(&tb->tb_softc);
362 if (error != 0)
363 goto done;
364 }
365
366 tprof_samples_per_buf = TPROF_MAX_SAMPLES_PER_BUF;
367 error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker,
368 NULL, PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU);
369 if (error != 0) {
370 if (tb->tb_ops->tbo_disestablish != NULL)
371 tb->tb_ops->tbo_disestablish(&tb->tb_softc);
372 goto done;
373 }
374
375 for (CPU_INFO_FOREACH(cii, ci)) {
376 tprof_cpu_t * const c = tprof_cpu(ci);
377 tprof_buf_t *new;
378 tprof_buf_t *old;
379
380 new = tprof_buf_alloc();
381 old = tprof_buf_switch(c, new);
382 if (old != NULL) {
383 tprof_buf_free(old);
384 }
385 callout_init(&c->c_callout, CALLOUT_MPSAFE);
386 callout_setfunc(&c->c_callout, tprof_kick, ci);
387 }
388 }
389
390 runmask &= tb->tb_softc.sc_ctr_configured_mask;
391 xc = xc_broadcast(0, tprof_start_cpu, tb, (void *)(uintptr_t)runmask);
392 xc_wait(xc);
393 mutex_enter(&tprof_lock);
394 tb->tb_softc.sc_ctr_running_mask |= runmask;
395 mutex_exit(&tprof_lock);
396
397 if (firstrun) {
398 for (CPU_INFO_FOREACH(cii, ci)) {
399 tprof_cpu_t * const c = tprof_cpu(ci);
400
401 mutex_enter(&tprof_lock);
402 tprof_nworker++;
403 mutex_exit(&tprof_lock);
404 workqueue_enqueue(tprof_wq, &c->c_work, ci);
405 }
406 }
407 done:
408 return error;
409 }
410
411 static void
412 tprof_stop(tprof_countermask_t stopmask)
413 {
414 tprof_backend_t *tb;
415 uint64_t xc;
416
417 tb = tprof_backend;
418 if (tb == NULL)
419 return;
420
421 KASSERT(mutex_owned(&tprof_startstop_lock));
422 stopmask &= tb->tb_softc.sc_ctr_running_mask;
423 if (stopmask == 0) {
424 /* targets are not running */
425 goto done;
426 }
427
428 xc = xc_broadcast(0, tprof_stop_cpu, tb, (void *)(uintptr_t)stopmask);
429 xc_wait(xc);
430 mutex_enter(&tprof_lock);
431 tb->tb_softc.sc_ctr_running_mask &= ~stopmask;
432 mutex_exit(&tprof_lock);
433
434 /* all counters have stopped? */
435 if (tb->tb_softc.sc_ctr_running_mask == 0) {
436 mutex_enter(&tprof_lock);
437 cv_broadcast(&tprof_reader_cv);
438 while (tprof_nworker > 0) {
439 cv_wait(&tprof_cv, &tprof_lock);
440 }
441 mutex_exit(&tprof_lock);
442
443 tprof_stop1();
444 if (tb->tb_ops->tbo_disestablish != NULL)
445 tb->tb_ops->tbo_disestablish(&tb->tb_softc);
446 }
447 done:
448 ;
449 }
450
451 static void
452 tprof_init_percpu_counters_offset(void *vp, void *vp2, struct cpu_info *ci)
453 {
454 uint64_t *counters_offset = vp;
455 u_int counter = (uintptr_t)vp2;
456
457 tprof_backend_t *tb = tprof_backend;
458 tprof_param_t *param = &tb->tb_softc.sc_count[counter].ctr_param;
459 counters_offset[counter] = param->p_value;
460 }
461
462 static void
463 tprof_configure_event_cpu(void *arg1, void *arg2)
464 {
465 tprof_backend_t *tb = arg1;
466 u_int counter = (uintptr_t)arg2;
467 tprof_param_t *param = &tb->tb_softc.sc_count[counter].ctr_param;
468
469 tb->tb_ops->tbo_configure_event(counter, param);
470 }
471
472 static int
473 tprof_configure_event(const tprof_param_t *param)
474 {
475 tprof_backend_t *tb;
476 tprof_backend_softc_t *sc;
477 tprof_param_t *sc_param;
478 uint64_t xc;
479 int c, error;
480
481 if ((param->p_flags & (TPROF_PARAM_USER | TPROF_PARAM_KERN)) == 0) {
482 error = EINVAL;
483 goto done;
484 }
485
486 tb = tprof_backend;
487 if (tb == NULL) {
488 error = ENOENT;
489 goto done;
490 }
491 sc = &tb->tb_softc;
492
493 c = param->p_counter;
494 if (c >= tb->tb_softc.sc_ncounters) {
495 error = EINVAL;
496 goto done;
497 }
498
499 if (tb->tb_ops->tbo_valid_event != NULL) {
500 error = tb->tb_ops->tbo_valid_event(param->p_counter, param);
501 if (error != 0)
502 goto done;
503 }
504
505 /* if already running, stop the counter */
506 if (ISSET(c, tb->tb_softc.sc_ctr_running_mask))
507 tprof_stop(__BIT(c));
508
509 sc->sc_count[c].ctr_bitwidth =
510 tb->tb_ops->tbo_counter_bitwidth(param->p_counter);
511
512 sc_param = &sc->sc_count[c].ctr_param;
513 memcpy(sc_param, param, sizeof(*sc_param)); /* save copy of param */
514
515 if (ISSET(param->p_flags, TPROF_PARAM_PROFILE)) {
516 uint64_t freq, inum, dnum;
517
518 freq = tb->tb_ops->tbo_counter_estimate_freq(c);
519 sc->sc_count[c].ctr_counter_val = freq / TPROF_HZ;
520 if (sc->sc_count[c].ctr_counter_val == 0) {
521 printf("%s: counter#%d frequency (%"PRIu64") is"
522 " very low relative to TPROF_HZ (%u)\n", __func__,
523 c, freq, TPROF_HZ);
524 sc->sc_count[c].ctr_counter_val =
525 4000000000ULL / TPROF_HZ;
526 }
527
528 switch (param->p_flags & TPROF_PARAM_VALUE2_MASK) {
529 case TPROF_PARAM_VALUE2_SCALE:
530 if (sc_param->p_value2 == 0)
531 break;
532 /*
533 * p_value2 is 64-bit fixed-point
534 * upper 32 bits are the integer part
535 * lower 32 bits are the decimal part
536 */
537 inum = sc_param->p_value2 >> 32;
538 dnum = sc_param->p_value2 & __BITS(31, 0);
539 sc->sc_count[c].ctr_counter_val =
540 sc->sc_count[c].ctr_counter_val * inum +
541 (sc->sc_count[c].ctr_counter_val * dnum >> 32);
542 if (sc->sc_count[c].ctr_counter_val == 0)
543 sc->sc_count[c].ctr_counter_val = 1;
544 break;
545 case TPROF_PARAM_VALUE2_TRIGGERCOUNT:
546 if (sc_param->p_value2 == 0)
547 sc_param->p_value2 = 1;
548 if (sc_param->p_value2 >
549 __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0)) {
550 sc_param->p_value2 =
551 __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0);
552 }
553 sc->sc_count[c].ctr_counter_val = sc_param->p_value2;
554 break;
555 default:
556 break;
557 }
558 sc->sc_count[c].ctr_counter_reset_val =
559 -sc->sc_count[c].ctr_counter_val;
560 sc->sc_count[c].ctr_counter_reset_val &=
561 __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0);
562 } else {
563 sc->sc_count[c].ctr_counter_val = 0;
564 sc->sc_count[c].ctr_counter_reset_val = 0;
565 }
566
567 /* At this point, p_value is used as an initial value */
568 percpu_foreach(tb->tb_softc.sc_ctr_offset_percpu,
569 tprof_init_percpu_counters_offset, (void *)(uintptr_t)c);
570 /* On the backend side, p_value is used as the reset value */
571 sc_param->p_value = tb->tb_softc.sc_count[c].ctr_counter_reset_val;
572
573 xc = xc_broadcast(0, tprof_configure_event_cpu,
574 tb, (void *)(uintptr_t)c);
575 xc_wait(xc);
576
577 mutex_enter(&tprof_lock);
578 /* update counters bitmasks */
579 SET(tb->tb_softc.sc_ctr_configured_mask, __BIT(c));
580 CLR(tb->tb_softc.sc_ctr_prof_mask, __BIT(c));
581 CLR(tb->tb_softc.sc_ctr_ovf_mask, __BIT(c));
582 /* profiled counter requires overflow handling */
583 if (ISSET(param->p_flags, TPROF_PARAM_PROFILE)) {
584 SET(tb->tb_softc.sc_ctr_prof_mask, __BIT(c));
585 SET(tb->tb_softc.sc_ctr_ovf_mask, __BIT(c));
586 }
587 /* counters with less than 64bits also require overflow handling */
588 if (sc->sc_count[c].ctr_bitwidth != 64)
589 SET(tb->tb_softc.sc_ctr_ovf_mask, __BIT(c));
590 mutex_exit(&tprof_lock);
591
592 error = 0;
593
594 done:
595 return error;
596 }
597
598 static void
599 tprof_getcounts_cpu(void *arg1, void *arg2)
600 {
601 tprof_backend_t *tb = arg1;
602 tprof_backend_softc_t *sc = &tb->tb_softc;
603 uint64_t *counters = arg2;
604 uint64_t *counters_offset;
605 unsigned int c;
606
607 tprof_countermask_t configmask = sc->sc_ctr_configured_mask;
608 counters_offset = percpu_getref(sc->sc_ctr_offset_percpu);
609 for (c = 0; c < sc->sc_ncounters; c++) {
610 if (ISSET(configmask, __BIT(c))) {
611 uint64_t ctr = tb->tb_ops->tbo_counter_read(c);
612 counters[c] = counters_offset[c] +
613 ((ctr - sc->sc_count[c].ctr_counter_reset_val) &
614 __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0));
615 } else {
616 counters[c] = 0;
617 }
618 }
619 percpu_putref(sc->sc_ctr_offset_percpu);
620 }
621
622 static int
623 tprof_getcounts(tprof_counts_t *counts)
624 {
625 struct cpu_info *ci;
626 tprof_backend_t *tb;
627 uint64_t xc;
628
629 tb = tprof_backend;
630 if (tb == NULL)
631 return ENOENT;
632
633 if (counts->c_cpu >= ncpu)
634 return ESRCH;
635 ci = cpu_lookup(counts->c_cpu);
636 if (ci == NULL)
637 return ESRCH;
638
639 xc = xc_unicast(0, tprof_getcounts_cpu, tb, counts->c_count, ci);
640 xc_wait(xc);
641
642 counts->c_ncounters = tb->tb_softc.sc_ncounters;
643 counts->c_runningmask = tb->tb_softc.sc_ctr_running_mask;
644 return 0;
645 }
646
647 /*
648 * tprof_clear: drain unread samples.
649 */
650
651 static void
652 tprof_clear(void)
653 {
654 tprof_buf_t *buf;
655
656 mutex_enter(&tprof_reader_lock);
657 mutex_enter(&tprof_lock);
658 while ((buf = STAILQ_FIRST(&tprof_list)) != NULL) {
659 if (buf != NULL) {
660 STAILQ_REMOVE_HEAD(&tprof_list, b_list);
661 KASSERT(tprof_nbuf_on_list > 0);
662 tprof_nbuf_on_list--;
663 mutex_exit(&tprof_lock);
664 tprof_buf_free(buf);
665 mutex_enter(&tprof_lock);
666 }
667 }
668 KASSERT(tprof_nbuf_on_list == 0);
669 mutex_exit(&tprof_lock);
670 tprof_reader_offset = 0;
671 mutex_exit(&tprof_reader_lock);
672
673 memset(&tprof_stat, 0, sizeof(tprof_stat));
674 }
675
676 static tprof_backend_t *
677 tprof_backend_lookup(const char *name)
678 {
679 tprof_backend_t *tb;
680
681 KASSERT(mutex_owned(&tprof_startstop_lock));
682
683 LIST_FOREACH(tb, &tprof_backends, tb_list) {
684 if (!strcmp(tb->tb_name, name)) {
685 return tb;
686 }
687 }
688 return NULL;
689 }
690
691 /* -------------------- backend interfaces */
692
693 /*
694 * tprof_sample: record a sample on the per-cpu buffer.
695 *
696 * be careful; can be called in NMI context.
697 * we are bluntly assuming the followings are safe.
698 * curcpu()
699 * curlwp->l_lid
700 * curlwp->l_proc->p_pid
701 */
702
703 void
704 tprof_sample(void *unused, const tprof_frame_info_t *tfi)
705 {
706 tprof_cpu_t * const c = tprof_cpu_direct(curcpu());
707 tprof_buf_t * const buf = c->c_buf;
708 tprof_sample_t *sp;
709 const uintptr_t pc = tfi->tfi_pc;
710 const lwp_t * const l = curlwp;
711 u_int idx;
712
713 idx = buf->b_used;
714 if (__predict_false(idx >= buf->b_size)) {
715 buf->b_overflow++;
716 return;
717 }
718 sp = &buf->b_data[idx];
719 sp->s_pid = l->l_proc->p_pid;
720 sp->s_lwpid = l->l_lid;
721 sp->s_cpuid = c->c_cpuid;
722 sp->s_flags = ((tfi->tfi_inkernel) ? TPROF_SAMPLE_INKERNEL : 0) |
723 __SHIFTIN(tfi->tfi_counter, TPROF_SAMPLE_COUNTER_MASK);
724 sp->s_pc = pc;
725 buf->b_used = idx + 1;
726 }
727
728 /*
729 * tprof_backend_register:
730 */
731
732 int
733 tprof_backend_register(const char *name, const tprof_backend_ops_t *ops,
734 int vers)
735 {
736 tprof_backend_t *tb;
737
738 if (vers != TPROF_BACKEND_VERSION) {
739 return EINVAL;
740 }
741
742 mutex_enter(&tprof_startstop_lock);
743 tb = tprof_backend_lookup(name);
744 if (tb != NULL) {
745 mutex_exit(&tprof_startstop_lock);
746 return EEXIST;
747 }
748 #if 1 /* XXX for now */
749 if (!LIST_EMPTY(&tprof_backends)) {
750 mutex_exit(&tprof_startstop_lock);
751 return ENOTSUP;
752 }
753 #endif
754 tb = kmem_zalloc(sizeof(*tb), KM_SLEEP);
755 tb->tb_name = name;
756 tb->tb_ops = ops;
757 LIST_INSERT_HEAD(&tprof_backends, tb, tb_list);
758 #if 1 /* XXX for now */
759 if (tprof_backend == NULL) {
760 tprof_backend = tb;
761 }
762 #endif
763 mutex_exit(&tprof_startstop_lock);
764
765 /* init backend softc */
766 tb->tb_softc.sc_ncounters = tb->tb_ops->tbo_ncounters();
767 tb->tb_softc.sc_ctr_offset_percpu_size =
768 sizeof(uint64_t) * tb->tb_softc.sc_ncounters;
769 tb->tb_softc.sc_ctr_offset_percpu =
770 percpu_alloc(tb->tb_softc.sc_ctr_offset_percpu_size);
771
772 return 0;
773 }
774
775 /*
776 * tprof_backend_unregister:
777 */
778
779 int
780 tprof_backend_unregister(const char *name)
781 {
782 tprof_backend_t *tb;
783
784 mutex_enter(&tprof_startstop_lock);
785 tb = tprof_backend_lookup(name);
786 #if defined(DIAGNOSTIC)
787 if (tb == NULL) {
788 mutex_exit(&tprof_startstop_lock);
789 panic("%s: not found '%s'", __func__, name);
790 }
791 #endif /* defined(DIAGNOSTIC) */
792 if (tb->tb_softc.sc_ctr_running_mask != 0) {
793 mutex_exit(&tprof_startstop_lock);
794 return EBUSY;
795 }
796 #if 1 /* XXX for now */
797 if (tprof_backend == tb) {
798 tprof_backend = NULL;
799 }
800 #endif
801 LIST_REMOVE(tb, tb_list);
802 mutex_exit(&tprof_startstop_lock);
803
804 /* fini backend softc */
805 percpu_free(tb->tb_softc.sc_ctr_offset_percpu,
806 tb->tb_softc.sc_ctr_offset_percpu_size);
807
808 /* free backend */
809 kmem_free(tb, sizeof(*tb));
810
811 return 0;
812 }
813
814 /* -------------------- cdevsw interfaces */
815
816 static int
817 tprof_open(dev_t dev, int flags, int type, struct lwp *l)
818 {
819
820 if (minor(dev) != 0) {
821 return EXDEV;
822 }
823 mutex_enter(&tprof_lock);
824 if (tprof_owner != NULL) {
825 mutex_exit(&tprof_lock);
826 return EBUSY;
827 }
828 tprof_owner = curlwp;
829 mutex_exit(&tprof_lock);
830
831 return 0;
832 }
833
834 static int
835 tprof_close(dev_t dev, int flags, int type, struct lwp *l)
836 {
837
838 KASSERT(minor(dev) == 0);
839
840 mutex_enter(&tprof_startstop_lock);
841 mutex_enter(&tprof_lock);
842 tprof_owner = NULL;
843 mutex_exit(&tprof_lock);
844 tprof_stop(TPROF_COUNTERMASK_ALL);
845 tprof_clear();
846
847 tprof_backend_t *tb = tprof_backend;
848 if (tb != NULL) {
849 KASSERT(tb->tb_softc.sc_ctr_running_mask == 0);
850 tb->tb_softc.sc_ctr_configured_mask = 0;
851 tb->tb_softc.sc_ctr_prof_mask = 0;
852 tb->tb_softc.sc_ctr_ovf_mask = 0;
853 }
854
855 mutex_exit(&tprof_startstop_lock);
856
857 return 0;
858 }
859
860 static int
861 tprof_read(dev_t dev, struct uio *uio, int flags)
862 {
863 tprof_buf_t *buf;
864 size_t bytes;
865 size_t resid;
866 size_t done;
867 int error = 0;
868
869 KASSERT(minor(dev) == 0);
870 mutex_enter(&tprof_reader_lock);
871 while (uio->uio_resid > 0 && error == 0) {
872 /*
873 * take the first buffer from the list.
874 */
875 mutex_enter(&tprof_lock);
876 buf = STAILQ_FIRST(&tprof_list);
877 if (buf == NULL) {
878 if (tprof_nworker == 0) {
879 mutex_exit(&tprof_lock);
880 error = 0;
881 break;
882 }
883 mutex_exit(&tprof_reader_lock);
884 error = cv_wait_sig(&tprof_reader_cv, &tprof_lock);
885 mutex_exit(&tprof_lock);
886 mutex_enter(&tprof_reader_lock);
887 continue;
888 }
889 STAILQ_REMOVE_HEAD(&tprof_list, b_list);
890 KASSERT(tprof_nbuf_on_list > 0);
891 tprof_nbuf_on_list--;
892 mutex_exit(&tprof_lock);
893
894 /*
895 * copy it out.
896 */
897 bytes = MIN(buf->b_used * sizeof(tprof_sample_t) -
898 tprof_reader_offset, uio->uio_resid);
899 resid = uio->uio_resid;
900 error = uiomove((char *)buf->b_data + tprof_reader_offset,
901 bytes, uio);
902 done = resid - uio->uio_resid;
903 tprof_reader_offset += done;
904
905 /*
906 * if we didn't consume the whole buffer,
907 * put it back to the list.
908 */
909 if (tprof_reader_offset <
910 buf->b_used * sizeof(tprof_sample_t)) {
911 mutex_enter(&tprof_lock);
912 STAILQ_INSERT_HEAD(&tprof_list, buf, b_list);
913 tprof_nbuf_on_list++;
914 cv_broadcast(&tprof_reader_cv);
915 mutex_exit(&tprof_lock);
916 } else {
917 tprof_buf_free(buf);
918 tprof_reader_offset = 0;
919 }
920 }
921 mutex_exit(&tprof_reader_lock);
922
923 return error;
924 }
925
926 static int
927 tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l)
928 {
929 const tprof_param_t *param;
930 tprof_counts_t *counts;
931 int error = 0;
932
933 KASSERT(minor(dev) == 0);
934
935 switch (cmd) {
936 case TPROF_IOC_GETINFO:
937 mutex_enter(&tprof_startstop_lock);
938 tprof_getinfo(data);
939 mutex_exit(&tprof_startstop_lock);
940 break;
941 case TPROF_IOC_GETNCOUNTERS:
942 mutex_enter(&tprof_lock);
943 error = tprof_getncounters((u_int *)data);
944 mutex_exit(&tprof_lock);
945 break;
946 case TPROF_IOC_START:
947 mutex_enter(&tprof_startstop_lock);
948 error = tprof_start(*(tprof_countermask_t *)data);
949 mutex_exit(&tprof_startstop_lock);
950 break;
951 case TPROF_IOC_STOP:
952 mutex_enter(&tprof_startstop_lock);
953 tprof_stop(*(tprof_countermask_t *)data);
954 mutex_exit(&tprof_startstop_lock);
955 break;
956 case TPROF_IOC_GETSTAT:
957 mutex_enter(&tprof_lock);
958 memcpy(data, &tprof_stat, sizeof(tprof_stat));
959 mutex_exit(&tprof_lock);
960 break;
961 case TPROF_IOC_CONFIGURE_EVENT:
962 param = data;
963 mutex_enter(&tprof_startstop_lock);
964 error = tprof_configure_event(param);
965 mutex_exit(&tprof_startstop_lock);
966 break;
967 case TPROF_IOC_GETCOUNTS:
968 counts = data;
969 mutex_enter(&tprof_startstop_lock);
970 error = tprof_getcounts(counts);
971 mutex_exit(&tprof_startstop_lock);
972 break;
973 default:
974 error = EINVAL;
975 break;
976 }
977
978 return error;
979 }
980
981 const struct cdevsw tprof_cdevsw = {
982 .d_open = tprof_open,
983 .d_close = tprof_close,
984 .d_read = tprof_read,
985 .d_write = nowrite,
986 .d_ioctl = tprof_ioctl,
987 .d_stop = nostop,
988 .d_tty = notty,
989 .d_poll = nopoll,
990 .d_mmap = nommap,
991 .d_kqfilter = nokqfilter,
992 .d_discard = nodiscard,
993 .d_flag = D_OTHER | D_MPSAFE
994 };
995
996 void
997 tprofattach(int nunits)
998 {
999
1000 /* nothing */
1001 }
1002
1003 MODULE(MODULE_CLASS_DRIVER, tprof, NULL);
1004
1005 static void
1006 tprof_cpu_init(void *vcp, void *vcookie, struct cpu_info *ci)
1007 {
1008 tprof_cpu_t **cp = vcp, *c;
1009
1010 c = kmem_zalloc(sizeof(*c), KM_SLEEP);
1011 c->c_buf = NULL;
1012 c->c_cpuid = cpu_index(ci);
1013 *cp = c;
1014 }
1015
1016 static void
1017 tprof_cpu_fini(void *vcp, void *vcookie, struct cpu_info *ci)
1018 {
1019 tprof_cpu_t **cp = vcp, *c;
1020
1021 c = *cp;
1022 KASSERT(c->c_cpuid == cpu_index(ci));
1023 KASSERT(c->c_buf == NULL);
1024 kmem_free(c, sizeof(*c));
1025 *cp = NULL;
1026 }
1027
1028 static void
1029 tprof_driver_init(void)
1030 {
1031
1032 mutex_init(&tprof_lock, MUTEX_DEFAULT, IPL_NONE);
1033 mutex_init(&tprof_reader_lock, MUTEX_DEFAULT, IPL_NONE);
1034 mutex_init(&tprof_startstop_lock, MUTEX_DEFAULT, IPL_NONE);
1035 cv_init(&tprof_cv, "tprof");
1036 cv_init(&tprof_reader_cv, "tprof_rd");
1037 STAILQ_INIT(&tprof_list);
1038 tprof_cpus = percpu_create(sizeof(tprof_cpu_t *),
1039 tprof_cpu_init, tprof_cpu_fini, NULL);
1040 }
1041
1042 static void
1043 tprof_driver_fini(void)
1044 {
1045
1046 percpu_free(tprof_cpus, sizeof(tprof_cpu_t *));
1047 mutex_destroy(&tprof_lock);
1048 mutex_destroy(&tprof_reader_lock);
1049 mutex_destroy(&tprof_startstop_lock);
1050 cv_destroy(&tprof_cv);
1051 cv_destroy(&tprof_reader_cv);
1052 }
1053
1054 static int
1055 tprof_modcmd(modcmd_t cmd, void *arg)
1056 {
1057
1058 switch (cmd) {
1059 case MODULE_CMD_INIT:
1060 tprof_driver_init();
1061 #if defined(_MODULE)
1062 {
1063 devmajor_t bmajor = NODEVMAJOR;
1064 devmajor_t cmajor = NODEVMAJOR;
1065 int error;
1066
1067 error = devsw_attach("tprof", NULL, &bmajor,
1068 &tprof_cdevsw, &cmajor);
1069 if (error) {
1070 tprof_driver_fini();
1071 return error;
1072 }
1073 }
1074 #endif /* defined(_MODULE) */
1075 return 0;
1076
1077 case MODULE_CMD_FINI:
1078 #if defined(_MODULE)
1079 devsw_detach(NULL, &tprof_cdevsw);
1080 #endif /* defined(_MODULE) */
1081 tprof_driver_fini();
1082 return 0;
1083
1084 default:
1085 return ENOTTY;
1086 }
1087 }
1088