Home | History | Annotate | Line # | Download | only in tprof
tprof.c revision 1.2.8.1
      1 /*	$NetBSD: tprof.c,v 1.2.8.1 2009/03/03 18:31:52 skrll Exp $	*/
      2 
      3 /*-
      4  * Copyright (c)2008 YAMAMOTO Takashi,
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.2.8.1 2009/03/03 18:31:52 skrll Exp $");
     31 
     32 #include <sys/param.h>
     33 #include <sys/systm.h>
     34 #include <sys/kernel.h>
     35 
     36 #include <sys/cpu.h>
     37 #include <sys/conf.h>
     38 #include <sys/callout.h>
     39 #include <sys/kmem.h>
     40 #include <sys/workqueue.h>
     41 #include <sys/queue.h>
     42 
     43 #include <dev/tprof/tprof.h>
     44 #include <dev/tprof/tprof_ioctl.h>
     45 
     46 #include <machine/db_machdep.h> /* PC_REGS */
     47 
     48 typedef struct {
     49 	uintptr_t s_pc;	/* program counter */
     50 } tprof_sample_t;
     51 
     52 typedef struct tprof_buf {
     53 	u_int b_used;
     54 	u_int b_size;
     55 	u_int b_overflow;
     56 	u_int b_unused;
     57 	STAILQ_ENTRY(tprof_buf) b_list;
     58 	tprof_sample_t b_data[];
     59 } tprof_buf_t;
     60 #define	TPROF_BUF_BYTESIZE(sz) \
     61 	(sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t))
     62 #define	TPROF_MAX_SAMPLES_PER_BUF	10000
     63 
     64 #define	TPROF_MAX_BUF			100
     65 
     66 typedef struct {
     67 	tprof_buf_t *c_buf;
     68 	struct work c_work;
     69 	callout_t c_callout;
     70 } __aligned(CACHE_LINE_SIZE) tprof_cpu_t;
     71 
     72 /*
     73  * locking order:
     74  *	tprof_reader_lock -> tprof_lock
     75  *	tprof_startstop_lock -> tprof_lock
     76  */
     77 
     78 static kmutex_t tprof_lock;
     79 static bool tprof_running;
     80 static u_int tprof_nworker;
     81 static lwp_t *tprof_owner;
     82 static STAILQ_HEAD(, tprof_buf) tprof_list;
     83 static u_int tprof_nbuf_on_list;
     84 static struct workqueue *tprof_wq;
     85 static tprof_cpu_t tprof_cpus[MAXCPUS] __aligned(CACHE_LINE_SIZE);
     86 static u_int tprof_samples_per_buf;
     87 
     88 static kmutex_t tprof_reader_lock;
     89 static kcondvar_t tprof_reader_cv;
     90 static off_t tprof_reader_offset;
     91 
     92 static kmutex_t tprof_startstop_lock;
     93 static kcondvar_t tprof_cv;
     94 
     95 static struct tprof_stat tprof_stat;
     96 
     97 static tprof_cpu_t *
     98 tprof_cpu(struct cpu_info *ci)
     99 {
    100 
    101 	return &tprof_cpus[cpu_index(ci)];
    102 }
    103 
    104 static tprof_cpu_t *
    105 tprof_curcpu(void)
    106 {
    107 
    108 	return tprof_cpu(curcpu());
    109 }
    110 
    111 static tprof_buf_t *
    112 tprof_buf_alloc(void)
    113 {
    114 	tprof_buf_t *new;
    115 	u_int size = tprof_samples_per_buf;
    116 
    117 	new = kmem_alloc(TPROF_BUF_BYTESIZE(size), KM_SLEEP);
    118 	new->b_used = 0;
    119 	new->b_size = size;
    120 	new->b_overflow = 0;
    121 	return new;
    122 }
    123 
    124 static void
    125 tprof_buf_free(tprof_buf_t *buf)
    126 {
    127 
    128 	kmem_free(buf, TPROF_BUF_BYTESIZE(buf->b_size));
    129 }
    130 
    131 static tprof_buf_t *
    132 tprof_buf_switch(tprof_cpu_t *c, tprof_buf_t *new)
    133 {
    134 	tprof_buf_t *old;
    135 
    136 	old = c->c_buf;
    137 	c->c_buf = new;
    138 	return old;
    139 }
    140 
    141 static tprof_buf_t *
    142 tprof_buf_refresh(void)
    143 {
    144 	tprof_cpu_t * const c = tprof_curcpu();
    145 	tprof_buf_t *new;
    146 
    147 	new = tprof_buf_alloc();
    148 	return tprof_buf_switch(c, new);
    149 }
    150 
    151 static void
    152 tprof_worker(struct work *wk, void *dummy)
    153 {
    154 	tprof_cpu_t * const c = tprof_curcpu();
    155 	tprof_buf_t *buf;
    156 	bool shouldstop;
    157 
    158 	KASSERT(wk == &c->c_work);
    159 	KASSERT(dummy == NULL);
    160 
    161 	/*
    162 	 * get a per cpu buffer.
    163 	 */
    164 	buf = tprof_buf_refresh();
    165 
    166 	/*
    167 	 * and put it on the global list for read(2).
    168 	 */
    169 	mutex_enter(&tprof_lock);
    170 	shouldstop = !tprof_running;
    171 	if (shouldstop) {
    172 		KASSERT(tprof_nworker > 0);
    173 		tprof_nworker--;
    174 		cv_broadcast(&tprof_cv);
    175 		cv_broadcast(&tprof_reader_cv);
    176 	}
    177 	if (buf->b_used == 0) {
    178 		tprof_stat.ts_emptybuf++;
    179 	} else if (tprof_nbuf_on_list < TPROF_MAX_BUF) {
    180 		tprof_stat.ts_sample += buf->b_used;
    181 		tprof_stat.ts_overflow += buf->b_overflow;
    182 		tprof_stat.ts_buf++;
    183 		STAILQ_INSERT_TAIL(&tprof_list, buf, b_list);
    184 		tprof_nbuf_on_list++;
    185 		buf = NULL;
    186 		cv_broadcast(&tprof_reader_cv);
    187 	} else {
    188 		tprof_stat.ts_dropbuf_sample += buf->b_used;
    189 		tprof_stat.ts_dropbuf++;
    190 	}
    191 	mutex_exit(&tprof_lock);
    192 	if (buf) {
    193 		tprof_buf_free(buf);
    194 	}
    195 	if (!shouldstop) {
    196 		callout_schedule(&c->c_callout, hz);
    197 	}
    198 }
    199 
    200 static void
    201 tprof_kick(void *vp)
    202 {
    203 	struct cpu_info * const ci = vp;
    204 	tprof_cpu_t * const c = tprof_cpu(ci);
    205 
    206 	workqueue_enqueue(tprof_wq, &c->c_work, ci);
    207 }
    208 
    209 static void
    210 tprof_stop1(void)
    211 {
    212 	CPU_INFO_ITERATOR cii;
    213 	struct cpu_info *ci;
    214 
    215 	KASSERT(mutex_owned(&tprof_startstop_lock));
    216 
    217 	for (CPU_INFO_FOREACH(cii, ci)) {
    218 		tprof_cpu_t * const c = tprof_cpu(ci);
    219 		tprof_buf_t *old;
    220 
    221 		old = tprof_buf_switch(c, NULL);
    222 		if (old != NULL) {
    223 			tprof_buf_free(old);
    224 		}
    225 		callout_destroy(&c->c_callout);
    226 	}
    227 	workqueue_destroy(tprof_wq);
    228 }
    229 
    230 static int
    231 tprof_start(const struct tprof_param *param)
    232 {
    233 	CPU_INFO_ITERATOR cii;
    234 	struct cpu_info *ci;
    235 	int error;
    236 	uint64_t freq;
    237 
    238 	KASSERT(mutex_owned(&tprof_startstop_lock));
    239 	if (tprof_running) {
    240 		error = EBUSY;
    241 		goto done;
    242 	}
    243 
    244 	freq = tprof_backend_estimate_freq();
    245 	tprof_samples_per_buf = MIN(freq * 2, TPROF_MAX_SAMPLES_PER_BUF);
    246 
    247 	error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker, NULL,
    248 	    PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU);
    249 	if (error != 0) {
    250 		goto done;
    251 	}
    252 
    253 	for (CPU_INFO_FOREACH(cii, ci)) {
    254 		tprof_cpu_t * const c = tprof_cpu(ci);
    255 		tprof_buf_t *new;
    256 		tprof_buf_t *old;
    257 
    258 		new = tprof_buf_alloc();
    259 		old = tprof_buf_switch(c, new);
    260 		if (old != NULL) {
    261 			tprof_buf_free(old);
    262 		}
    263 		callout_init(&c->c_callout, CALLOUT_MPSAFE);
    264 		callout_setfunc(&c->c_callout, tprof_kick, ci);
    265 	}
    266 
    267 	error = tprof_backend_start();
    268 	if (error != 0) {
    269 		tprof_stop1();
    270 		goto done;
    271 	}
    272 
    273 	mutex_enter(&tprof_lock);
    274 	tprof_running = true;
    275 	mutex_exit(&tprof_lock);
    276 	for (CPU_INFO_FOREACH(cii, ci)) {
    277 		tprof_cpu_t * const c = tprof_cpu(ci);
    278 
    279 		mutex_enter(&tprof_lock);
    280 		tprof_nworker++;
    281 		mutex_exit(&tprof_lock);
    282 		workqueue_enqueue(tprof_wq, &c->c_work, ci);
    283 	}
    284 done:
    285 	return error;
    286 }
    287 
    288 static void
    289 tprof_stop(void)
    290 {
    291 	CPU_INFO_ITERATOR cii;
    292 	struct cpu_info *ci;
    293 
    294 	KASSERT(mutex_owned(&tprof_startstop_lock));
    295 	if (!tprof_running) {
    296 		goto done;
    297 	}
    298 
    299 	tprof_backend_stop();
    300 
    301 	mutex_enter(&tprof_lock);
    302 	tprof_running = false;
    303 	cv_broadcast(&tprof_reader_cv);
    304 	mutex_exit(&tprof_lock);
    305 
    306 	for (CPU_INFO_FOREACH(cii, ci)) {
    307 		mutex_enter(&tprof_lock);
    308 		while (tprof_nworker > 0) {
    309 			cv_wait(&tprof_cv, &tprof_lock);
    310 		}
    311 		mutex_exit(&tprof_lock);
    312 	}
    313 
    314 	tprof_stop1();
    315 done:
    316 	;
    317 }
    318 
    319 static void
    320 tprof_clear(void)
    321 {
    322 	tprof_buf_t *buf;
    323 
    324 	mutex_enter(&tprof_reader_lock);
    325 	mutex_enter(&tprof_lock);
    326 	while ((buf = STAILQ_FIRST(&tprof_list)) != NULL) {
    327 		if (buf != NULL) {
    328 			STAILQ_REMOVE_HEAD(&tprof_list, b_list);
    329 			KASSERT(tprof_nbuf_on_list > 0);
    330 			tprof_nbuf_on_list--;
    331 			mutex_exit(&tprof_lock);
    332 			tprof_buf_free(buf);
    333 			mutex_enter(&tprof_lock);
    334 		}
    335 	}
    336 	KASSERT(tprof_nbuf_on_list == 0);
    337 	mutex_exit(&tprof_lock);
    338 	tprof_reader_offset = 0;
    339 	mutex_exit(&tprof_reader_lock);
    340 
    341 	memset(&tprof_stat, 0, sizeof(tprof_stat));
    342 }
    343 
    344 /* -------------------- backend interfaces */
    345 
    346 /*
    347  * tprof_sample: record a sample on the per-cpu buffer.
    348  *
    349  * be careful; can be called in NMI context.
    350  * we are assuming that curcpu() is safe.
    351  */
    352 
    353 void
    354 tprof_sample(const struct trapframe *tf)
    355 {
    356 	tprof_cpu_t * const c = tprof_curcpu();
    357 	tprof_buf_t * const buf = c->c_buf;
    358 	const uintptr_t pc = PC_REGS(tf);
    359 	u_int idx;
    360 
    361 	idx = buf->b_used;
    362 	if (__predict_false(idx >= buf->b_size)) {
    363 		buf->b_overflow++;
    364 		return;
    365 	}
    366 	buf->b_data[idx].s_pc = pc;
    367 	buf->b_used = idx + 1;
    368 }
    369 
    370 /* -------------------- cdevsw interfaces */
    371 
    372 void tprofattach(int);
    373 
    374 static int
    375 tprof_open(dev_t dev, int flags, int type, struct lwp *l)
    376 {
    377 
    378 	if (minor(dev) != 0) {
    379 		return EXDEV;
    380 	}
    381 	mutex_enter(&tprof_lock);
    382 	if (tprof_owner != NULL) {
    383 		mutex_exit(&tprof_lock);
    384 		return  EBUSY;
    385 	}
    386 	tprof_owner = curlwp;
    387 	mutex_exit(&tprof_lock);
    388 
    389 	return 0;
    390 }
    391 
    392 static int
    393 tprof_close(dev_t dev, int flags, int type, struct lwp *l)
    394 {
    395 
    396 	KASSERT(minor(dev) == 0);
    397 
    398 	mutex_enter(&tprof_startstop_lock);
    399 	mutex_enter(&tprof_lock);
    400 	tprof_owner = NULL;
    401 	mutex_exit(&tprof_lock);
    402 	tprof_stop();
    403 	tprof_clear();
    404 	mutex_exit(&tprof_startstop_lock);
    405 
    406 	return 0;
    407 }
    408 
    409 static int
    410 tprof_read(dev_t dev, struct uio *uio, int flags)
    411 {
    412 	tprof_buf_t *buf;
    413 	size_t bytes;
    414 	size_t resid;
    415 	size_t done;
    416 	int error = 0;
    417 
    418 	KASSERT(minor(dev) == 0);
    419 	mutex_enter(&tprof_reader_lock);
    420 	while (uio->uio_resid > 0 && error == 0) {
    421 		/*
    422 		 * take the first buffer from the list.
    423 		 */
    424 		mutex_enter(&tprof_lock);
    425 		buf = STAILQ_FIRST(&tprof_list);
    426 		if (buf == NULL) {
    427 			if (tprof_nworker == 0) {
    428 				mutex_exit(&tprof_lock);
    429 				error = 0;
    430 				break;
    431 			}
    432 			mutex_exit(&tprof_reader_lock);
    433 			error = cv_wait_sig(&tprof_reader_cv, &tprof_lock);
    434 			mutex_exit(&tprof_lock);
    435 			mutex_enter(&tprof_reader_lock);
    436 			continue;
    437 		}
    438 		STAILQ_REMOVE_HEAD(&tprof_list, b_list);
    439 		KASSERT(tprof_nbuf_on_list > 0);
    440 		tprof_nbuf_on_list--;
    441 		mutex_exit(&tprof_lock);
    442 
    443 		/*
    444 		 * copy it out.
    445 		 */
    446 		bytes = MIN(buf->b_used * sizeof(tprof_sample_t) -
    447 		    tprof_reader_offset, uio->uio_resid);
    448 		resid = uio->uio_resid;
    449 		error = uiomove((char *)buf->b_data + tprof_reader_offset,
    450 		    bytes, uio);
    451 		done = resid - uio->uio_resid;
    452 		tprof_reader_offset += done;
    453 
    454 		/*
    455 		 * if we didn't consume the whole buffer,
    456 		 * put it back to the list.
    457 		 */
    458 		if (tprof_reader_offset <
    459 		    buf->b_used * sizeof(tprof_sample_t)) {
    460 			mutex_enter(&tprof_lock);
    461 			STAILQ_INSERT_HEAD(&tprof_list, buf, b_list);
    462 			tprof_nbuf_on_list++;
    463 			cv_broadcast(&tprof_reader_cv);
    464 			mutex_exit(&tprof_lock);
    465 		} else {
    466 			tprof_buf_free(buf);
    467 			tprof_reader_offset = 0;
    468 		}
    469 	}
    470 	mutex_exit(&tprof_reader_lock);
    471 
    472 	return error;
    473 }
    474 
    475 static int
    476 tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l)
    477 {
    478 	const struct tprof_param *param;
    479 	int error = 0;
    480 
    481 	KASSERT(minor(dev) == 0);
    482 
    483 	switch (cmd) {
    484 	case TPROF_IOC_GETVERSION:
    485 		*(int *)data = TPROF_VERSION;
    486 		break;
    487 	case TPROF_IOC_START:
    488 		param = data;
    489 		mutex_enter(&tprof_startstop_lock);
    490 		error = tprof_start(param);
    491 		mutex_exit(&tprof_startstop_lock);
    492 		break;
    493 	case TPROF_IOC_STOP:
    494 		mutex_enter(&tprof_startstop_lock);
    495 		tprof_stop();
    496 		mutex_exit(&tprof_startstop_lock);
    497 		break;
    498 	case TPROF_IOC_GETSTAT:
    499 		mutex_enter(&tprof_lock);
    500 		memcpy(data, &tprof_stat, sizeof(tprof_stat));
    501 		mutex_exit(&tprof_lock);
    502 		break;
    503 	default:
    504 		error = EINVAL;
    505 		break;
    506 	}
    507 
    508 	return error;
    509 }
    510 
    511 const struct cdevsw tprof_cdevsw = {
    512 	.d_open = tprof_open,
    513 	.d_close = tprof_close,
    514 	.d_read = tprof_read,
    515 	.d_write = nowrite,
    516 	.d_ioctl = tprof_ioctl,
    517 	.d_stop = nostop,
    518 	.d_tty = notty,
    519 	.d_poll = nopoll,
    520 	.d_mmap = nommap,
    521 	.d_kqfilter = nokqfilter,
    522 	.d_flag = D_OTHER | D_MPSAFE,
    523 };
    524 
    525 void
    526 tprofattach(int nunits)
    527 {
    528 
    529 	mutex_init(&tprof_lock, MUTEX_DEFAULT, IPL_NONE);
    530 	mutex_init(&tprof_reader_lock, MUTEX_DEFAULT, IPL_NONE);
    531 	mutex_init(&tprof_startstop_lock, MUTEX_DEFAULT, IPL_NONE);
    532 	cv_init(&tprof_cv, "tprof");
    533 	cv_init(&tprof_reader_cv, "tprofread");
    534 	STAILQ_INIT(&tprof_list);
    535 }
    536