Home | History | Annotate | Line # | Download | only in tprof
tprof.c revision 1.14.14.1
      1 /*	$NetBSD: tprof.c,v 1.14.14.1 2020/12/14 14:38:09 thorpej Exp $	*/
      2 
      3 /*-
      4  * Copyright (c)2008,2009,2010 YAMAMOTO Takashi,
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.14.14.1 2020/12/14 14:38:09 thorpej Exp $");
     31 
     32 #include <sys/param.h>
     33 #include <sys/systm.h>
     34 #include <sys/kernel.h>
     35 
     36 #include <sys/callout.h>
     37 #include <sys/conf.h>
     38 #include <sys/cpu.h>
     39 #include <sys/kmem.h>
     40 #include <sys/module.h>
     41 #include <sys/percpu.h>
     42 #include <sys/proc.h>
     43 #include <sys/queue.h>
     44 #include <sys/workqueue.h>
     45 
     46 #include <dev/tprof/tprof.h>
     47 #include <dev/tprof/tprof_ioctl.h>
     48 
     49 #include "ioconf.h"
     50 
     51 /*
     52  * locking order:
     53  *	tprof_reader_lock -> tprof_lock
     54  *	tprof_startstop_lock -> tprof_lock
     55  */
     56 
     57 /*
     58  * protected by:
     59  *	L: tprof_lock
     60  *	R: tprof_reader_lock
     61  *	S: tprof_startstop_lock
     62  *	s: writer should hold tprof_startstop_lock and tprof_lock
     63  *	   reader should hold tprof_startstop_lock or tprof_lock
     64  */
     65 
     66 typedef struct tprof_buf {
     67 	u_int b_used;
     68 	u_int b_size;
     69 	u_int b_overflow;
     70 	u_int b_unused;
     71 	STAILQ_ENTRY(tprof_buf) b_list;
     72 	tprof_sample_t b_data[];
     73 } tprof_buf_t;
     74 #define	TPROF_BUF_BYTESIZE(sz) \
     75 	(sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t))
     76 #define	TPROF_MAX_SAMPLES_PER_BUF	10000
     77 
     78 #define	TPROF_MAX_BUF			100
     79 
     80 typedef struct {
     81 	tprof_buf_t *c_buf;
     82 	uint32_t c_cpuid;
     83 	struct work c_work;
     84 	callout_t c_callout;
     85 } __aligned(CACHE_LINE_SIZE) tprof_cpu_t;
     86 
     87 typedef struct tprof_backend {
     88 	const char *tb_name;
     89 	const tprof_backend_ops_t *tb_ops;
     90 	LIST_ENTRY(tprof_backend) tb_list;
     91 	int tb_usecount;	/* S: */
     92 } tprof_backend_t;
     93 
     94 static kmutex_t tprof_lock;
     95 static bool tprof_running;		/* s: */
     96 static u_int tprof_nworker;		/* L: # of running worker LWPs */
     97 static lwp_t *tprof_owner;
     98 static STAILQ_HEAD(, tprof_buf) tprof_list; /* L: global buffer list */
     99 static u_int tprof_nbuf_on_list;	/* L: # of buffers on tprof_list */
    100 static struct workqueue *tprof_wq;
    101 static struct percpu *tprof_cpus __read_mostly;	/* tprof_cpu_t * */
    102 static u_int tprof_samples_per_buf;
    103 
    104 static tprof_backend_t *tprof_backend;	/* S: */
    105 static LIST_HEAD(, tprof_backend) tprof_backends =
    106     LIST_HEAD_INITIALIZER(tprof_backend); /* S: */
    107 
    108 static kmutex_t tprof_reader_lock;
    109 static kcondvar_t tprof_reader_cv;	/* L: */
    110 static off_t tprof_reader_offset;	/* R: */
    111 
    112 static kmutex_t tprof_startstop_lock;
    113 static kcondvar_t tprof_cv;		/* L: */
    114 
    115 static struct tprof_stat tprof_stat;	/* L: */
    116 
    117 static tprof_cpu_t *
    118 tprof_cpu(struct cpu_info *ci)
    119 {
    120 	tprof_cpu_t **cp, *c;
    121 
    122 	/*
    123 	 * As long as xcalls are blocked -- e.g., by kpreempt_disable
    124 	 * -- the percpu object will not be swapped and destroyed.  We
    125 	 * can't write to it, because the data may have already been
    126 	 * moved to a new buffer, but we can safely read from it.
    127 	 */
    128 	kpreempt_disable();
    129 	cp = percpu_getptr_remote(tprof_cpus, ci);
    130 	c = *cp;
    131 	kpreempt_enable();
    132 
    133 	return c;
    134 }
    135 
    136 static tprof_cpu_t *
    137 tprof_curcpu(void)
    138 {
    139 
    140 	return tprof_cpu(curcpu());
    141 }
    142 
    143 static tprof_buf_t *
    144 tprof_buf_alloc(void)
    145 {
    146 	tprof_buf_t *new;
    147 	u_int size = tprof_samples_per_buf;
    148 
    149 	new = kmem_alloc(TPROF_BUF_BYTESIZE(size), KM_SLEEP);
    150 	new->b_used = 0;
    151 	new->b_size = size;
    152 	new->b_overflow = 0;
    153 	return new;
    154 }
    155 
    156 static void
    157 tprof_buf_free(tprof_buf_t *buf)
    158 {
    159 
    160 	kmem_free(buf, TPROF_BUF_BYTESIZE(buf->b_size));
    161 }
    162 
    163 static tprof_buf_t *
    164 tprof_buf_switch(tprof_cpu_t *c, tprof_buf_t *new)
    165 {
    166 	tprof_buf_t *old;
    167 
    168 	old = c->c_buf;
    169 	c->c_buf = new;
    170 	return old;
    171 }
    172 
    173 static tprof_buf_t *
    174 tprof_buf_refresh(void)
    175 {
    176 	tprof_cpu_t * const c = tprof_curcpu();
    177 	tprof_buf_t *new;
    178 
    179 	new = tprof_buf_alloc();
    180 	return tprof_buf_switch(c, new);
    181 }
    182 
    183 static void
    184 tprof_worker(struct work *wk, void *dummy)
    185 {
    186 	tprof_cpu_t * const c = tprof_curcpu();
    187 	tprof_buf_t *buf;
    188 	bool shouldstop;
    189 
    190 	KASSERT(wk == &c->c_work);
    191 	KASSERT(dummy == NULL);
    192 
    193 	/*
    194 	 * get a per cpu buffer.
    195 	 */
    196 	buf = tprof_buf_refresh();
    197 
    198 	/*
    199 	 * and put it on the global list for read(2).
    200 	 */
    201 	mutex_enter(&tprof_lock);
    202 	shouldstop = !tprof_running;
    203 	if (shouldstop) {
    204 		KASSERT(tprof_nworker > 0);
    205 		tprof_nworker--;
    206 		cv_broadcast(&tprof_cv);
    207 		cv_broadcast(&tprof_reader_cv);
    208 	}
    209 	if (buf->b_used == 0) {
    210 		tprof_stat.ts_emptybuf++;
    211 	} else if (tprof_nbuf_on_list < TPROF_MAX_BUF) {
    212 		tprof_stat.ts_sample += buf->b_used;
    213 		tprof_stat.ts_overflow += buf->b_overflow;
    214 		tprof_stat.ts_buf++;
    215 		STAILQ_INSERT_TAIL(&tprof_list, buf, b_list);
    216 		tprof_nbuf_on_list++;
    217 		buf = NULL;
    218 		cv_broadcast(&tprof_reader_cv);
    219 	} else {
    220 		tprof_stat.ts_dropbuf_sample += buf->b_used;
    221 		tprof_stat.ts_dropbuf++;
    222 	}
    223 	mutex_exit(&tprof_lock);
    224 	if (buf) {
    225 		tprof_buf_free(buf);
    226 	}
    227 	if (!shouldstop) {
    228 		callout_schedule(&c->c_callout, hz);
    229 	}
    230 }
    231 
    232 static void
    233 tprof_kick(void *vp)
    234 {
    235 	struct cpu_info * const ci = vp;
    236 	tprof_cpu_t * const c = tprof_cpu(ci);
    237 
    238 	workqueue_enqueue(tprof_wq, &c->c_work, ci);
    239 }
    240 
    241 static void
    242 tprof_stop1(void)
    243 {
    244 	CPU_INFO_ITERATOR cii;
    245 	struct cpu_info *ci;
    246 
    247 	KASSERT(mutex_owned(&tprof_startstop_lock));
    248 	KASSERT(tprof_nworker == 0);
    249 
    250 	for (CPU_INFO_FOREACH(cii, ci)) {
    251 		tprof_cpu_t * const c = tprof_cpu(ci);
    252 		tprof_buf_t *old;
    253 
    254 		old = tprof_buf_switch(c, NULL);
    255 		if (old != NULL) {
    256 			tprof_buf_free(old);
    257 		}
    258 		callout_destroy(&c->c_callout);
    259 	}
    260 	workqueue_destroy(tprof_wq);
    261 }
    262 
    263 static void
    264 tprof_getinfo(struct tprof_info *info)
    265 {
    266 	tprof_backend_t *tb;
    267 
    268 	KASSERT(mutex_owned(&tprof_startstop_lock));
    269 
    270 	memset(info, 0, sizeof(*info));
    271 	info->ti_version = TPROF_VERSION;
    272 	if ((tb = tprof_backend) != NULL) {
    273 		info->ti_ident = tb->tb_ops->tbo_ident();
    274 	}
    275 }
    276 
    277 static int
    278 tprof_start(const tprof_param_t *param)
    279 {
    280 	CPU_INFO_ITERATOR cii;
    281 	struct cpu_info *ci;
    282 	int error;
    283 	uint64_t freq;
    284 	tprof_backend_t *tb;
    285 
    286 	KASSERT(mutex_owned(&tprof_startstop_lock));
    287 	if (tprof_running) {
    288 		error = EBUSY;
    289 		goto done;
    290 	}
    291 
    292 	tb = tprof_backend;
    293 	if (tb == NULL) {
    294 		error = ENOENT;
    295 		goto done;
    296 	}
    297 	if (tb->tb_usecount > 0) {
    298 		error = EBUSY;
    299 		goto done;
    300 	}
    301 
    302 	tb->tb_usecount++;
    303 	freq = tb->tb_ops->tbo_estimate_freq();
    304 	tprof_samples_per_buf = MIN(freq * 2, TPROF_MAX_SAMPLES_PER_BUF);
    305 
    306 	error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker, NULL,
    307 	    PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU);
    308 	if (error != 0) {
    309 		goto done;
    310 	}
    311 
    312 	for (CPU_INFO_FOREACH(cii, ci)) {
    313 		tprof_cpu_t * const c = tprof_cpu(ci);
    314 		tprof_buf_t *new;
    315 		tprof_buf_t *old;
    316 
    317 		new = tprof_buf_alloc();
    318 		old = tprof_buf_switch(c, new);
    319 		if (old != NULL) {
    320 			tprof_buf_free(old);
    321 		}
    322 		callout_init(&c->c_callout, CALLOUT_MPSAFE);
    323 		callout_setfunc(&c->c_callout, tprof_kick, ci);
    324 	}
    325 
    326 	error = tb->tb_ops->tbo_start(param);
    327 	if (error != 0) {
    328 		KASSERT(tb->tb_usecount > 0);
    329 		tb->tb_usecount--;
    330 		tprof_stop1();
    331 		goto done;
    332 	}
    333 
    334 	mutex_enter(&tprof_lock);
    335 	tprof_running = true;
    336 	mutex_exit(&tprof_lock);
    337 	for (CPU_INFO_FOREACH(cii, ci)) {
    338 		tprof_cpu_t * const c = tprof_cpu(ci);
    339 
    340 		mutex_enter(&tprof_lock);
    341 		tprof_nworker++;
    342 		mutex_exit(&tprof_lock);
    343 		workqueue_enqueue(tprof_wq, &c->c_work, ci);
    344 	}
    345 done:
    346 	return error;
    347 }
    348 
    349 static void
    350 tprof_stop(void)
    351 {
    352 	tprof_backend_t *tb;
    353 
    354 	KASSERT(mutex_owned(&tprof_startstop_lock));
    355 	if (!tprof_running) {
    356 		goto done;
    357 	}
    358 
    359 	tb = tprof_backend;
    360 	KASSERT(tb->tb_usecount > 0);
    361 	tb->tb_ops->tbo_stop(NULL);
    362 	tb->tb_usecount--;
    363 
    364 	mutex_enter(&tprof_lock);
    365 	tprof_running = false;
    366 	cv_broadcast(&tprof_reader_cv);
    367 	while (tprof_nworker > 0) {
    368 		cv_wait(&tprof_cv, &tprof_lock);
    369 	}
    370 	mutex_exit(&tprof_lock);
    371 
    372 	tprof_stop1();
    373 done:
    374 	;
    375 }
    376 
    377 /*
    378  * tprof_clear: drain unread samples.
    379  */
    380 
    381 static void
    382 tprof_clear(void)
    383 {
    384 	tprof_buf_t *buf;
    385 
    386 	mutex_enter(&tprof_reader_lock);
    387 	mutex_enter(&tprof_lock);
    388 	while ((buf = STAILQ_FIRST(&tprof_list)) != NULL) {
    389 		if (buf != NULL) {
    390 			STAILQ_REMOVE_HEAD(&tprof_list, b_list);
    391 			KASSERT(tprof_nbuf_on_list > 0);
    392 			tprof_nbuf_on_list--;
    393 			mutex_exit(&tprof_lock);
    394 			tprof_buf_free(buf);
    395 			mutex_enter(&tprof_lock);
    396 		}
    397 	}
    398 	KASSERT(tprof_nbuf_on_list == 0);
    399 	mutex_exit(&tprof_lock);
    400 	tprof_reader_offset = 0;
    401 	mutex_exit(&tprof_reader_lock);
    402 
    403 	memset(&tprof_stat, 0, sizeof(tprof_stat));
    404 }
    405 
    406 static tprof_backend_t *
    407 tprof_backend_lookup(const char *name)
    408 {
    409 	tprof_backend_t *tb;
    410 
    411 	KASSERT(mutex_owned(&tprof_startstop_lock));
    412 
    413 	LIST_FOREACH(tb, &tprof_backends, tb_list) {
    414 		if (!strcmp(tb->tb_name, name)) {
    415 			return tb;
    416 		}
    417 	}
    418 	return NULL;
    419 }
    420 
    421 /* -------------------- backend interfaces */
    422 
    423 /*
    424  * tprof_sample: record a sample on the per-cpu buffer.
    425  *
    426  * be careful; can be called in NMI context.
    427  * we are bluntly assuming the followings are safe.
    428  *	curcpu()
    429  *	curlwp->l_lid
    430  *	curlwp->l_proc->p_pid
    431  */
    432 
    433 void
    434 tprof_sample(void *unused, const tprof_frame_info_t *tfi)
    435 {
    436 	tprof_cpu_t * const c = tprof_curcpu();
    437 	tprof_buf_t * const buf = c->c_buf;
    438 	tprof_sample_t *sp;
    439 	const uintptr_t pc = tfi->tfi_pc;
    440 	const lwp_t * const l = curlwp;
    441 	u_int idx;
    442 
    443 	idx = buf->b_used;
    444 	if (__predict_false(idx >= buf->b_size)) {
    445 		buf->b_overflow++;
    446 		return;
    447 	}
    448 	sp = &buf->b_data[idx];
    449 	sp->s_pid = l->l_proc->p_pid;
    450 	sp->s_lwpid = l->l_lid;
    451 	sp->s_cpuid = c->c_cpuid;
    452 	sp->s_flags = (tfi->tfi_inkernel) ? TPROF_SAMPLE_INKERNEL : 0;
    453 	sp->s_pc = pc;
    454 	buf->b_used = idx + 1;
    455 }
    456 
    457 /*
    458  * tprof_backend_register:
    459  */
    460 
    461 int
    462 tprof_backend_register(const char *name, const tprof_backend_ops_t *ops,
    463     int vers)
    464 {
    465 	tprof_backend_t *tb;
    466 
    467 	if (vers != TPROF_BACKEND_VERSION) {
    468 		return EINVAL;
    469 	}
    470 
    471 	mutex_enter(&tprof_startstop_lock);
    472 	tb = tprof_backend_lookup(name);
    473 	if (tb != NULL) {
    474 		mutex_exit(&tprof_startstop_lock);
    475 		return EEXIST;
    476 	}
    477 #if 1 /* XXX for now */
    478 	if (!LIST_EMPTY(&tprof_backends)) {
    479 		mutex_exit(&tprof_startstop_lock);
    480 		return ENOTSUP;
    481 	}
    482 #endif
    483 	tb = kmem_alloc(sizeof(*tb), KM_SLEEP);
    484 	tb->tb_name = name;
    485 	tb->tb_ops = ops;
    486 	tb->tb_usecount = 0;
    487 	LIST_INSERT_HEAD(&tprof_backends, tb, tb_list);
    488 #if 1 /* XXX for now */
    489 	if (tprof_backend == NULL) {
    490 		tprof_backend = tb;
    491 	}
    492 #endif
    493 	mutex_exit(&tprof_startstop_lock);
    494 
    495 	return 0;
    496 }
    497 
    498 /*
    499  * tprof_backend_unregister:
    500  */
    501 
    502 int
    503 tprof_backend_unregister(const char *name)
    504 {
    505 	tprof_backend_t *tb;
    506 
    507 	mutex_enter(&tprof_startstop_lock);
    508 	tb = tprof_backend_lookup(name);
    509 #if defined(DIAGNOSTIC)
    510 	if (tb == NULL) {
    511 		mutex_exit(&tprof_startstop_lock);
    512 		panic("%s: not found '%s'", __func__, name);
    513 	}
    514 #endif /* defined(DIAGNOSTIC) */
    515 	if (tb->tb_usecount > 0) {
    516 		mutex_exit(&tprof_startstop_lock);
    517 		return EBUSY;
    518 	}
    519 #if 1 /* XXX for now */
    520 	if (tprof_backend == tb) {
    521 		tprof_backend = NULL;
    522 	}
    523 #endif
    524 	LIST_REMOVE(tb, tb_list);
    525 	mutex_exit(&tprof_startstop_lock);
    526 
    527 	kmem_free(tb, sizeof(*tb));
    528 
    529 	return 0;
    530 }
    531 
    532 /* -------------------- cdevsw interfaces */
    533 
    534 static int
    535 tprof_open(dev_t dev, int flags, int type, struct lwp *l)
    536 {
    537 
    538 	if (minor(dev) != 0) {
    539 		return EXDEV;
    540 	}
    541 	mutex_enter(&tprof_lock);
    542 	if (tprof_owner != NULL) {
    543 		mutex_exit(&tprof_lock);
    544 		return  EBUSY;
    545 	}
    546 	tprof_owner = curlwp;
    547 	mutex_exit(&tprof_lock);
    548 
    549 	return 0;
    550 }
    551 
    552 static int
    553 tprof_close(dev_t dev, int flags, int type, struct lwp *l)
    554 {
    555 
    556 	KASSERT(minor(dev) == 0);
    557 
    558 	mutex_enter(&tprof_startstop_lock);
    559 	mutex_enter(&tprof_lock);
    560 	tprof_owner = NULL;
    561 	mutex_exit(&tprof_lock);
    562 	tprof_stop();
    563 	tprof_clear();
    564 	mutex_exit(&tprof_startstop_lock);
    565 
    566 	return 0;
    567 }
    568 
    569 static int
    570 tprof_read(dev_t dev, struct uio *uio, int flags)
    571 {
    572 	tprof_buf_t *buf;
    573 	size_t bytes;
    574 	size_t resid;
    575 	size_t done;
    576 	int error = 0;
    577 
    578 	KASSERT(minor(dev) == 0);
    579 	mutex_enter(&tprof_reader_lock);
    580 	while (uio->uio_resid > 0 && error == 0) {
    581 		/*
    582 		 * take the first buffer from the list.
    583 		 */
    584 		mutex_enter(&tprof_lock);
    585 		buf = STAILQ_FIRST(&tprof_list);
    586 		if (buf == NULL) {
    587 			if (tprof_nworker == 0) {
    588 				mutex_exit(&tprof_lock);
    589 				error = 0;
    590 				break;
    591 			}
    592 			mutex_exit(&tprof_reader_lock);
    593 			error = cv_wait_sig(&tprof_reader_cv, &tprof_lock);
    594 			mutex_exit(&tprof_lock);
    595 			mutex_enter(&tprof_reader_lock);
    596 			continue;
    597 		}
    598 		STAILQ_REMOVE_HEAD(&tprof_list, b_list);
    599 		KASSERT(tprof_nbuf_on_list > 0);
    600 		tprof_nbuf_on_list--;
    601 		mutex_exit(&tprof_lock);
    602 
    603 		/*
    604 		 * copy it out.
    605 		 */
    606 		bytes = MIN(buf->b_used * sizeof(tprof_sample_t) -
    607 		    tprof_reader_offset, uio->uio_resid);
    608 		resid = uio->uio_resid;
    609 		error = uiomove((char *)buf->b_data + tprof_reader_offset,
    610 		    bytes, uio);
    611 		done = resid - uio->uio_resid;
    612 		tprof_reader_offset += done;
    613 
    614 		/*
    615 		 * if we didn't consume the whole buffer,
    616 		 * put it back to the list.
    617 		 */
    618 		if (tprof_reader_offset <
    619 		    buf->b_used * sizeof(tprof_sample_t)) {
    620 			mutex_enter(&tprof_lock);
    621 			STAILQ_INSERT_HEAD(&tprof_list, buf, b_list);
    622 			tprof_nbuf_on_list++;
    623 			cv_broadcast(&tprof_reader_cv);
    624 			mutex_exit(&tprof_lock);
    625 		} else {
    626 			tprof_buf_free(buf);
    627 			tprof_reader_offset = 0;
    628 		}
    629 	}
    630 	mutex_exit(&tprof_reader_lock);
    631 
    632 	return error;
    633 }
    634 
    635 static int
    636 tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l)
    637 {
    638 	const tprof_param_t *param;
    639 	int error = 0;
    640 
    641 	KASSERT(minor(dev) == 0);
    642 
    643 	switch (cmd) {
    644 	case TPROF_IOC_GETINFO:
    645 		mutex_enter(&tprof_startstop_lock);
    646 		tprof_getinfo(data);
    647 		mutex_exit(&tprof_startstop_lock);
    648 		break;
    649 	case TPROF_IOC_START:
    650 		param = data;
    651 		mutex_enter(&tprof_startstop_lock);
    652 		error = tprof_start(param);
    653 		mutex_exit(&tprof_startstop_lock);
    654 		break;
    655 	case TPROF_IOC_STOP:
    656 		mutex_enter(&tprof_startstop_lock);
    657 		tprof_stop();
    658 		mutex_exit(&tprof_startstop_lock);
    659 		break;
    660 	case TPROF_IOC_GETSTAT:
    661 		mutex_enter(&tprof_lock);
    662 		memcpy(data, &tprof_stat, sizeof(tprof_stat));
    663 		mutex_exit(&tprof_lock);
    664 		break;
    665 	default:
    666 		error = EINVAL;
    667 		break;
    668 	}
    669 
    670 	return error;
    671 }
    672 
    673 const struct cdevsw tprof_cdevsw = {
    674 	.d_open = tprof_open,
    675 	.d_close = tprof_close,
    676 	.d_read = tprof_read,
    677 	.d_write = nowrite,
    678 	.d_ioctl = tprof_ioctl,
    679 	.d_stop = nostop,
    680 	.d_tty = notty,
    681 	.d_poll = nopoll,
    682 	.d_mmap = nommap,
    683 	.d_kqfilter = nokqfilter,
    684 	.d_discard = nodiscard,
    685 	.d_flag = D_OTHER | D_MPSAFE
    686 };
    687 
    688 void
    689 tprofattach(int nunits)
    690 {
    691 
    692 	/* nothing */
    693 }
    694 
    695 MODULE(MODULE_CLASS_DRIVER, tprof, NULL);
    696 
    697 static void
    698 tprof_cpu_init(void *vcp, void *vcookie, struct cpu_info *ci)
    699 {
    700 	tprof_cpu_t **cp = vcp, *c;
    701 
    702 	c = kmem_zalloc(sizeof(*c), KM_SLEEP);
    703 	c->c_buf = NULL;
    704 	c->c_cpuid = cpu_index(ci);
    705 	*cp = c;
    706 }
    707 
    708 static void
    709 tprof_cpu_fini(void *vcp, void *vcookie, struct cpu_info *ci)
    710 {
    711 	tprof_cpu_t **cp = vcp, *c;
    712 
    713 	c = *cp;
    714 	KASSERT(c->c_cpuid == cpu_index(ci));
    715 	KASSERT(c->c_buf == NULL);
    716 	kmem_free(c, sizeof(*c));
    717 	*cp = NULL;
    718 }
    719 
    720 static void
    721 tprof_driver_init(void)
    722 {
    723 
    724 	mutex_init(&tprof_lock, MUTEX_DEFAULT, IPL_NONE);
    725 	mutex_init(&tprof_reader_lock, MUTEX_DEFAULT, IPL_NONE);
    726 	mutex_init(&tprof_startstop_lock, MUTEX_DEFAULT, IPL_NONE);
    727 	cv_init(&tprof_cv, "tprof");
    728 	cv_init(&tprof_reader_cv, "tprof_rd");
    729 	STAILQ_INIT(&tprof_list);
    730 	tprof_cpus = percpu_create(sizeof(tprof_cpu_t *),
    731 	    tprof_cpu_init, tprof_cpu_fini, NULL);
    732 }
    733 
    734 static void
    735 tprof_driver_fini(void)
    736 {
    737 
    738 	percpu_free(tprof_cpus, sizeof(tprof_cpu_t *));
    739 	mutex_destroy(&tprof_lock);
    740 	mutex_destroy(&tprof_reader_lock);
    741 	mutex_destroy(&tprof_startstop_lock);
    742 	cv_destroy(&tprof_cv);
    743 	cv_destroy(&tprof_reader_cv);
    744 }
    745 
    746 static int
    747 tprof_modcmd(modcmd_t cmd, void *arg)
    748 {
    749 
    750 	switch (cmd) {
    751 	case MODULE_CMD_INIT:
    752 		tprof_driver_init();
    753 #if defined(_MODULE)
    754 		{
    755 			devmajor_t bmajor = NODEVMAJOR;
    756 			devmajor_t cmajor = NODEVMAJOR;
    757 			int error;
    758 
    759 			error = devsw_attach("tprof", NULL, &bmajor,
    760 			    &tprof_cdevsw, &cmajor);
    761 			if (error) {
    762 				tprof_driver_fini();
    763 				return error;
    764 			}
    765 		}
    766 #endif /* defined(_MODULE) */
    767 		return 0;
    768 
    769 	case MODULE_CMD_FINI:
    770 #if defined(_MODULE)
    771 		{
    772 			int error;
    773 			error = devsw_detach(NULL, &tprof_cdevsw);
    774 			if (error) {
    775 				return error;
    776 			}
    777 		}
    778 #endif /* defined(_MODULE) */
    779 		tprof_driver_fini();
    780 		return 0;
    781 
    782 	default:
    783 		return ENOTTY;
    784 	}
    785 }
    786