Home | History | Annotate | Line # | Download | only in dev
lockstat.c revision 1.17
      1 /*	$NetBSD: lockstat.c,v 1.17 2014/03/16 05:20:26 dholland Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Lock statistics driver, providing kernel support for the lockstat(8)
     34  * command.
     35  *
     36  * We use a global lock word (lockstat_lock) to track device opens.
     37  * Only one thread can hold the device at a time, providing a global lock.
     38  *
     39  * XXX Timings for contention on sleep locks are currently incorrect.
     40  */
     41 
     42 #include <sys/cdefs.h>
     43 __KERNEL_RCSID(0, "$NetBSD: lockstat.c,v 1.17 2014/03/16 05:20:26 dholland Exp $");
     44 
     45 #include <sys/types.h>
     46 #include <sys/param.h>
     47 #include <sys/proc.h>
     48 #include <sys/resourcevar.h>
     49 #include <sys/systm.h>
     50 #include <sys/kernel.h>
     51 #include <sys/kmem.h>
     52 #include <sys/conf.h>
     53 #include <sys/syslog.h>
     54 #include <sys/atomic.h>
     55 
     56 #include <dev/lockstat.h>
     57 
     58 #include <machine/lock.h>
     59 
     60 #ifndef __HAVE_CPU_COUNTER
     61 #error CPU counters not available
     62 #endif
     63 
     64 #if LONG_BIT == 64
     65 #define	LOCKSTAT_HASH_SHIFT	3
     66 #elif LONG_BIT == 32
     67 #define	LOCKSTAT_HASH_SHIFT	2
     68 #endif
     69 
     70 #define	LOCKSTAT_MINBUFS	1000
     71 #define	LOCKSTAT_DEFBUFS	10000
     72 #define	LOCKSTAT_MAXBUFS	50000
     73 
     74 #define	LOCKSTAT_HASH_SIZE	128
     75 #define	LOCKSTAT_HASH_MASK	(LOCKSTAT_HASH_SIZE - 1)
     76 #define	LOCKSTAT_HASH(key)	\
     77 	((key >> LOCKSTAT_HASH_SHIFT) & LOCKSTAT_HASH_MASK)
     78 
     79 typedef struct lscpu {
     80 	SLIST_HEAD(, lsbuf)	lc_free;
     81 	u_int			lc_overflow;
     82 	LIST_HEAD(lslist, lsbuf) lc_hash[LOCKSTAT_HASH_SIZE];
     83 } lscpu_t;
     84 
     85 typedef struct lslist lslist_t;
     86 
     87 void	lockstatattach(int);
     88 void	lockstat_start(lsenable_t *);
     89 int	lockstat_alloc(lsenable_t *);
     90 void	lockstat_init_tables(lsenable_t *);
     91 int	lockstat_stop(lsdisable_t *);
     92 void	lockstat_free(void);
     93 
     94 dev_type_open(lockstat_open);
     95 dev_type_close(lockstat_close);
     96 dev_type_read(lockstat_read);
     97 dev_type_ioctl(lockstat_ioctl);
     98 
     99 volatile u_int	lockstat_enabled;
    100 uintptr_t	lockstat_csstart;
    101 uintptr_t	lockstat_csend;
    102 uintptr_t	lockstat_csmask;
    103 uintptr_t	lockstat_lamask;
    104 uintptr_t	lockstat_lockstart;
    105 uintptr_t	lockstat_lockend;
    106 __cpu_simple_lock_t lockstat_lock;
    107 lwp_t		*lockstat_lwp;
    108 lsbuf_t		*lockstat_baseb;
    109 size_t		lockstat_sizeb;
    110 int		lockstat_busy;
    111 struct timespec	lockstat_stime;
    112 
    113 const struct cdevsw lockstat_cdevsw = {
    114 	.d_open = lockstat_open,
    115 	.d_close = lockstat_close,
    116 	.d_read = lockstat_read,
    117 	.d_write = nowrite,
    118 	.d_ioctl = lockstat_ioctl,
    119 	.d_stop = nostop,
    120 	.d_tty = notty,
    121 	.d_poll = nopoll,
    122 	.d_mmap = nommap,
    123 	.d_kqfilter = nokqfilter,
    124 	.d_flag = D_OTHER | D_MPSAFE
    125 };
    126 
    127 /*
    128  * Called when the pseudo-driver is attached.
    129  */
    130 void
    131 lockstatattach(int nunits)
    132 {
    133 
    134 	(void)nunits;
    135 
    136 	__cpu_simple_lock_init(&lockstat_lock);
    137 }
    138 
    139 /*
    140  * Prepare the per-CPU tables for use, or clear down tables when tracing is
    141  * stopped.
    142  */
    143 void
    144 lockstat_init_tables(lsenable_t *le)
    145 {
    146 	int i, per, slop, cpuno;
    147 	CPU_INFO_ITERATOR cii;
    148 	struct cpu_info *ci;
    149 	lscpu_t *lc;
    150 	lsbuf_t *lb;
    151 
    152 	KASSERT(!lockstat_enabled);
    153 
    154 	for (CPU_INFO_FOREACH(cii, ci)) {
    155 		if (ci->ci_lockstat != NULL) {
    156 			kmem_free(ci->ci_lockstat, sizeof(lscpu_t));
    157 			ci->ci_lockstat = NULL;
    158 		}
    159 	}
    160 
    161 	if (le == NULL)
    162 		return;
    163 
    164 	lb = lockstat_baseb;
    165 	per = le->le_nbufs / ncpu;
    166 	slop = le->le_nbufs - (per * ncpu);
    167 	cpuno = 0;
    168 	for (CPU_INFO_FOREACH(cii, ci)) {
    169 		lc = kmem_alloc(sizeof(*lc), KM_SLEEP);
    170 		lc->lc_overflow = 0;
    171 		ci->ci_lockstat = lc;
    172 
    173 		SLIST_INIT(&lc->lc_free);
    174 		for (i = 0; i < LOCKSTAT_HASH_SIZE; i++)
    175 			LIST_INIT(&lc->lc_hash[i]);
    176 
    177 		for (i = per; i != 0; i--, lb++) {
    178 			lb->lb_cpu = (uint16_t)cpuno;
    179 			SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist);
    180 		}
    181 		if (--slop > 0) {
    182 			lb->lb_cpu = (uint16_t)cpuno;
    183 			SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist);
    184 			lb++;
    185 		}
    186 		cpuno++;
    187 	}
    188 }
    189 
    190 /*
    191  * Start collecting lock statistics.
    192  */
    193 void
    194 lockstat_start(lsenable_t *le)
    195 {
    196 
    197 	KASSERT(!lockstat_enabled);
    198 
    199 	lockstat_init_tables(le);
    200 
    201 	if ((le->le_flags & LE_CALLSITE) != 0)
    202 		lockstat_csmask = (uintptr_t)-1LL;
    203 	else
    204 		lockstat_csmask = 0;
    205 
    206 	if ((le->le_flags & LE_LOCK) != 0)
    207 		lockstat_lamask = (uintptr_t)-1LL;
    208 	else
    209 		lockstat_lamask = 0;
    210 
    211 	lockstat_csstart = le->le_csstart;
    212 	lockstat_csend = le->le_csend;
    213 	lockstat_lockstart = le->le_lockstart;
    214 	lockstat_lockstart = le->le_lockstart;
    215 	lockstat_lockend = le->le_lockend;
    216 	membar_sync();
    217 	getnanotime(&lockstat_stime);
    218 	lockstat_enabled = le->le_mask;
    219 	membar_producer();
    220 }
    221 
    222 /*
    223  * Stop collecting lock statistics.
    224  */
    225 int
    226 lockstat_stop(lsdisable_t *ld)
    227 {
    228 	CPU_INFO_ITERATOR cii;
    229 	struct cpu_info *ci;
    230 	u_int cpuno, overflow;
    231 	struct timespec ts;
    232 	int error;
    233 	lwp_t *l;
    234 
    235 	KASSERT(lockstat_enabled);
    236 
    237 	/*
    238 	 * Set enabled false, force a write barrier, and wait for other CPUs
    239 	 * to exit lockstat_event().
    240 	 */
    241 	lockstat_enabled = 0;
    242 	membar_producer();
    243 	getnanotime(&ts);
    244 	tsleep(&lockstat_stop, PPAUSE, "lockstat", mstohz(10));
    245 
    246 	/*
    247 	 * Did we run out of buffers while tracing?
    248 	 */
    249 	overflow = 0;
    250 	for (CPU_INFO_FOREACH(cii, ci))
    251 		overflow += ((lscpu_t *)ci->ci_lockstat)->lc_overflow;
    252 
    253 	if (overflow != 0) {
    254 		error = EOVERFLOW;
    255 		log(LOG_NOTICE, "lockstat: %d buffer allocations failed\n",
    256 		    overflow);
    257 	} else
    258 		error = 0;
    259 
    260 	lockstat_init_tables(NULL);
    261 
    262 	/* Run through all LWPs and clear the slate for the next run. */
    263 	mutex_enter(proc_lock);
    264 	LIST_FOREACH(l, &alllwp, l_list) {
    265 		l->l_pfailaddr = 0;
    266 		l->l_pfailtime = 0;
    267 		l->l_pfaillock = 0;
    268 	}
    269 	mutex_exit(proc_lock);
    270 
    271 	if (ld == NULL)
    272 		return error;
    273 
    274 	/*
    275 	 * Fill out the disable struct for the caller.
    276 	 */
    277 	timespecsub(&ts, &lockstat_stime, &ld->ld_time);
    278 	ld->ld_size = lockstat_sizeb;
    279 
    280 	cpuno = 0;
    281 	for (CPU_INFO_FOREACH(cii, ci)) {
    282 		if (cpuno >= sizeof(ld->ld_freq) / sizeof(ld->ld_freq[0])) {
    283 			log(LOG_WARNING, "lockstat: too many CPUs\n");
    284 			break;
    285 		}
    286 		ld->ld_freq[cpuno++] = cpu_frequency(ci);
    287 	}
    288 
    289 	return error;
    290 }
    291 
    292 /*
    293  * Allocate buffers for lockstat_start().
    294  */
    295 int
    296 lockstat_alloc(lsenable_t *le)
    297 {
    298 	lsbuf_t *lb;
    299 	size_t sz;
    300 
    301 	KASSERT(!lockstat_enabled);
    302 	lockstat_free();
    303 
    304 	sz = sizeof(*lb) * le->le_nbufs;
    305 
    306 	lb = kmem_zalloc(sz, KM_SLEEP);
    307 	if (lb == NULL)
    308 		return (ENOMEM);
    309 
    310 	KASSERT(!lockstat_enabled);
    311 	KASSERT(lockstat_baseb == NULL);
    312 	lockstat_sizeb = sz;
    313 	lockstat_baseb = lb;
    314 
    315 	return (0);
    316 }
    317 
    318 /*
    319  * Free allocated buffers after tracing has stopped.
    320  */
    321 void
    322 lockstat_free(void)
    323 {
    324 
    325 	KASSERT(!lockstat_enabled);
    326 
    327 	if (lockstat_baseb != NULL) {
    328 		kmem_free(lockstat_baseb, lockstat_sizeb);
    329 		lockstat_baseb = NULL;
    330 	}
    331 }
    332 
    333 /*
    334  * Main entry point from lock primatives.
    335  */
    336 void
    337 lockstat_event(uintptr_t lock, uintptr_t callsite, u_int flags, u_int count,
    338 	       uint64_t cycles)
    339 {
    340 	lslist_t *ll;
    341 	lscpu_t *lc;
    342 	lsbuf_t *lb;
    343 	u_int event;
    344 	int s;
    345 
    346 	if ((flags & lockstat_enabled) != flags || count == 0)
    347 		return;
    348 	if (lock < lockstat_lockstart || lock > lockstat_lockend)
    349 		return;
    350 	if (callsite < lockstat_csstart || callsite > lockstat_csend)
    351 		return;
    352 
    353 	callsite &= lockstat_csmask;
    354 	lock &= lockstat_lamask;
    355 
    356 	/*
    357 	 * Find the table for this lock+callsite pair, and try to locate a
    358 	 * buffer with the same key.
    359 	 */
    360 	s = splhigh();
    361 	lc = curcpu()->ci_lockstat;
    362 	ll = &lc->lc_hash[LOCKSTAT_HASH(lock ^ callsite)];
    363 	event = (flags & LB_EVENT_MASK) - 1;
    364 
    365 	LIST_FOREACH(lb, ll, lb_chain.list) {
    366 		if (lb->lb_lock == lock && lb->lb_callsite == callsite)
    367 			break;
    368 	}
    369 
    370 	if (lb != NULL) {
    371 		/*
    372 		 * We found a record.  Move it to the front of the list, as
    373 		 * we're likely to hit it again soon.
    374 		 */
    375 		if (lb != LIST_FIRST(ll)) {
    376 			LIST_REMOVE(lb, lb_chain.list);
    377 			LIST_INSERT_HEAD(ll, lb, lb_chain.list);
    378 		}
    379 		lb->lb_counts[event] += count;
    380 		lb->lb_times[event] += cycles;
    381 	} else if ((lb = SLIST_FIRST(&lc->lc_free)) != NULL) {
    382 		/*
    383 		 * Pinch a new buffer and fill it out.
    384 		 */
    385 		SLIST_REMOVE_HEAD(&lc->lc_free, lb_chain.slist);
    386 		LIST_INSERT_HEAD(ll, lb, lb_chain.list);
    387 		lb->lb_flags = (uint16_t)flags;
    388 		lb->lb_lock = lock;
    389 		lb->lb_callsite = callsite;
    390 		lb->lb_counts[event] = count;
    391 		lb->lb_times[event] = cycles;
    392 	} else {
    393 		/*
    394 		 * We didn't find a buffer and there were none free.
    395 		 * lockstat_stop() will notice later on and report the
    396 		 * error.
    397 		 */
    398 		 lc->lc_overflow++;
    399 	}
    400 
    401 	splx(s);
    402 }
    403 
    404 /*
    405  * Accept an open() on /dev/lockstat.
    406  */
    407 int
    408 lockstat_open(dev_t dev, int flag, int mode, lwp_t *l)
    409 {
    410 
    411 	if (!__cpu_simple_lock_try(&lockstat_lock))
    412 		return EBUSY;
    413 	lockstat_lwp = curlwp;
    414 	return 0;
    415 }
    416 
    417 /*
    418  * Accept the last close() on /dev/lockstat.
    419  */
    420 int
    421 lockstat_close(dev_t dev, int flag, int mode, lwp_t *l)
    422 {
    423 
    424 	lockstat_lwp = NULL;
    425 	__cpu_simple_unlock(&lockstat_lock);
    426 	return 0;
    427 }
    428 
    429 /*
    430  * Handle control operations.
    431  */
    432 int
    433 lockstat_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
    434 {
    435 	lsenable_t *le;
    436 	int error;
    437 
    438 	if (lockstat_lwp != curlwp)
    439 		return EBUSY;
    440 
    441 	switch (cmd) {
    442 	case IOC_LOCKSTAT_GVERSION:
    443 		*(int *)data = LS_VERSION;
    444 		error = 0;
    445 		break;
    446 
    447 	case IOC_LOCKSTAT_ENABLE:
    448 		le = (lsenable_t *)data;
    449 
    450 		if (!cpu_hascounter()) {
    451 			error = ENODEV;
    452 			break;
    453 		}
    454 		if (lockstat_enabled) {
    455 			error = EBUSY;
    456 			break;
    457 		}
    458 
    459 		/*
    460 		 * Sanitize the arguments passed in and set up filtering.
    461 		 */
    462 		if (le->le_nbufs == 0)
    463 			le->le_nbufs = LOCKSTAT_DEFBUFS;
    464 		else if (le->le_nbufs > LOCKSTAT_MAXBUFS ||
    465 		    le->le_nbufs < LOCKSTAT_MINBUFS) {
    466 			error = EINVAL;
    467 			break;
    468 		}
    469 		if ((le->le_flags & LE_ONE_CALLSITE) == 0) {
    470 			le->le_csstart = 0;
    471 			le->le_csend = le->le_csstart - 1;
    472 		}
    473 		if ((le->le_flags & LE_ONE_LOCK) == 0) {
    474 			le->le_lockstart = 0;
    475 			le->le_lockend = le->le_lockstart - 1;
    476 		}
    477 		if ((le->le_mask & LB_EVENT_MASK) == 0)
    478 			return EINVAL;
    479 		if ((le->le_mask & LB_LOCK_MASK) == 0)
    480 			return EINVAL;
    481 
    482 		/*
    483 		 * Start tracing.
    484 		 */
    485 		if ((error = lockstat_alloc(le)) == 0)
    486 			lockstat_start(le);
    487 		break;
    488 
    489 	case IOC_LOCKSTAT_DISABLE:
    490 		if (!lockstat_enabled)
    491 			error = EINVAL;
    492 		else
    493 			error = lockstat_stop((lsdisable_t *)data);
    494 		break;
    495 
    496 	default:
    497 		error = ENOTTY;
    498 		break;
    499 	}
    500 
    501 	return error;
    502 }
    503 
    504 /*
    505  * Copy buffers out to user-space.
    506  */
    507 int
    508 lockstat_read(dev_t dev, struct uio *uio, int flag)
    509 {
    510 
    511 	if (curlwp != lockstat_lwp || lockstat_enabled)
    512 		return EBUSY;
    513 	return uiomove(lockstat_baseb, lockstat_sizeb, uio);
    514 }
    515