Home | History | Annotate | Line # | Download | only in dev
lockstat.c revision 1.10
      1 /*	$NetBSD: lockstat.c,v 1.10 2007/07/14 13:30:44 ad Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 /*
     40  * Lock statistics driver, providing kernel support for the lockstat(8)
     41  * command.
     42  *
     43  * XXX Timings for contention on sleep locks are currently incorrect.
     44  */
     45 
     46 #include <sys/cdefs.h>
     47 __KERNEL_RCSID(0, "$NetBSD: lockstat.c,v 1.10 2007/07/14 13:30:44 ad Exp $");
     48 
     49 #include <sys/types.h>
     50 #include <sys/param.h>
     51 #include <sys/lock.h>
     52 #include <sys/proc.h>
     53 #include <sys/resourcevar.h>
     54 #include <sys/systm.h>
     55 #include <sys/kernel.h>
     56 #include <sys/malloc.h>
     57 #include <sys/conf.h>
     58 #include <sys/syslog.h>
     59 
     60 #include <dev/lockstat.h>
     61 
     62 #ifndef __HAVE_CPU_COUNTER
     63 #error CPU counters not available
     64 #endif
     65 
     66 #if LONG_BIT == 64
     67 #define	LOCKSTAT_HASH_SHIFT	3
     68 #elif LONG_BIT == 32
     69 #define	LOCKSTAT_HASH_SHIFT	2
     70 #endif
     71 
     72 #define	LOCKSTAT_MINBUFS	1000
     73 #define	LOCKSTAT_DEFBUFS	10000
     74 #define	LOCKSTAT_MAXBUFS	50000
     75 
     76 #define	LOCKSTAT_HASH_SIZE	64
     77 #define	LOCKSTAT_HASH_MASK	(LOCKSTAT_HASH_SIZE - 1)
     78 #define	LOCKSTAT_HASH(key)	\
     79 	((key >> LOCKSTAT_HASH_SHIFT) & LOCKSTAT_HASH_MASK)
     80 
     81 typedef struct lscpu {
     82 	SLIST_HEAD(, lsbuf)	lc_free;
     83 	u_int			lc_overflow;
     84 	LIST_HEAD(lslist, lsbuf) lc_hash[LOCKSTAT_HASH_SIZE];
     85 } lscpu_t;
     86 
     87 typedef struct lslist lslist_t;
     88 
     89 void	lockstatattach(int);
     90 void	lockstat_start(lsenable_t *);
     91 int	lockstat_alloc(lsenable_t *);
     92 void	lockstat_init_tables(lsenable_t *);
     93 int	lockstat_stop(lsdisable_t *);
     94 void	lockstat_free(void);
     95 
     96 dev_type_open(lockstat_open);
     97 dev_type_close(lockstat_close);
     98 dev_type_read(lockstat_read);
     99 dev_type_ioctl(lockstat_ioctl);
    100 
    101 /* Protected against write by lockstat_lock().  Used by lockstat_event(). */
    102 volatile u_int	lockstat_enabled;
    103 uintptr_t	lockstat_csstart;
    104 uintptr_t	lockstat_csend;
    105 uintptr_t	lockstat_csmask;
    106 uintptr_t	lockstat_lamask;
    107 uintptr_t	lockstat_lockstart;
    108 uintptr_t	lockstat_lockend;
    109 
    110 /* Protected by lockstat_lock(). */
    111 struct simplelock lockstat_slock;
    112 lsbuf_t		*lockstat_baseb;
    113 size_t		lockstat_sizeb;
    114 int		lockstat_busy;
    115 int		lockstat_devopen;
    116 struct timespec	lockstat_stime;
    117 
    118 const struct cdevsw lockstat_cdevsw = {
    119 	lockstat_open, lockstat_close, lockstat_read, nowrite, lockstat_ioctl,
    120 	nostop, notty, nopoll, nommap, nokqfilter, 0
    121 };
    122 
    123 MALLOC_DEFINE(M_LOCKSTAT, "lockstat", "lockstat event buffers");
    124 
    125 /*
    126  * Called when the pseudo-driver is attached.
    127  */
    128 void
    129 lockstatattach(int nunits)
    130 {
    131 
    132 	(void)nunits;
    133 
    134 	__cpu_simple_lock_init(&lockstat_slock.lock_data);
    135 }
    136 
    137 /*
    138  * Grab the global lock.  If busy is set, we want to block out operations on
    139  * the control device.
    140  */
    141 static inline int
    142 lockstat_lock(int busy)
    143 {
    144 
    145 	if (!__cpu_simple_lock_try(&lockstat_slock.lock_data))
    146 		return (EBUSY);
    147 	if (busy) {
    148 		if (lockstat_busy) {
    149 			__cpu_simple_unlock(&lockstat_slock.lock_data);
    150 			return (EBUSY);
    151 		}
    152 		lockstat_busy = 1;
    153 	}
    154 	KASSERT(lockstat_busy);
    155 
    156 	return 0;
    157 }
    158 
    159 /*
    160  * Release the global lock.  If unbusy is set, we want to allow new
    161  * operations on the control device.
    162  */
    163 static inline void
    164 lockstat_unlock(int unbusy)
    165 {
    166 
    167 	KASSERT(lockstat_busy);
    168 	if (unbusy)
    169 		lockstat_busy = 0;
    170 	__cpu_simple_unlock(&lockstat_slock.lock_data);
    171 }
    172 
    173 /*
    174  * Prepare the per-CPU tables for use, or clear down tables when tracing is
    175  * stopped.
    176  */
    177 void
    178 lockstat_init_tables(lsenable_t *le)
    179 {
    180 	int i, per, slop, cpuno;
    181 	CPU_INFO_ITERATOR cii;
    182 	struct cpu_info *ci;
    183 	lscpu_t *lc;
    184 	lsbuf_t *lb;
    185 
    186 	KASSERT(!lockstat_enabled);
    187 
    188 	for (CPU_INFO_FOREACH(cii, ci)) {
    189 		if (ci->ci_lockstat != NULL) {
    190 			free(ci->ci_lockstat, M_LOCKSTAT);
    191 			ci->ci_lockstat = NULL;
    192 		}
    193 	}
    194 
    195 	if (le == NULL)
    196 		return;
    197 
    198 	lb = lockstat_baseb;
    199 	per = le->le_nbufs / ncpu;
    200 	slop = le->le_nbufs - (per * ncpu);
    201 	cpuno = 0;
    202 	for (CPU_INFO_FOREACH(cii, ci)) {
    203 		lc = malloc(sizeof(*lc), M_LOCKSTAT, M_WAITOK);
    204 		lc->lc_overflow = 0;
    205 		ci->ci_lockstat = lc;
    206 
    207 		SLIST_INIT(&lc->lc_free);
    208 		for (i = 0; i < LOCKSTAT_HASH_SIZE; i++)
    209 			LIST_INIT(&lc->lc_hash[i]);
    210 
    211 		for (i = per; i != 0; i--, lb++) {
    212 			lb->lb_cpu = (uint16_t)cpuno;
    213 			SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist);
    214 		}
    215 		if (--slop > 0) {
    216 			lb->lb_cpu = (uint16_t)cpuno;
    217 			SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist);
    218 			lb++;
    219 		}
    220 		cpuno++;
    221 	}
    222 }
    223 
    224 /*
    225  * Start collecting lock statistics.
    226  */
    227 void
    228 lockstat_start(lsenable_t *le)
    229 {
    230 
    231 	KASSERT(!lockstat_enabled);
    232 
    233 	lockstat_init_tables(le);
    234 
    235 	if ((le->le_flags & LE_CALLSITE) != 0)
    236 		lockstat_csmask = (uintptr_t)-1LL;
    237 	else
    238 		lockstat_csmask = 0;
    239 
    240 	if ((le->le_flags & LE_LOCK) != 0)
    241 		lockstat_lamask = (uintptr_t)-1LL;
    242 	else
    243 		lockstat_lamask = 0;
    244 
    245 	lockstat_csstart = le->le_csstart;
    246 	lockstat_csend = le->le_csend;
    247 	lockstat_lockstart = le->le_lockstart;
    248 	lockstat_lockstart = le->le_lockstart;
    249 	lockstat_lockend = le->le_lockend;
    250 	mb_memory();
    251 	getnanotime(&lockstat_stime);
    252 	lockstat_enabled = le->le_mask;
    253 	mb_write();
    254 }
    255 
    256 /*
    257  * Stop collecting lock statistics.
    258  */
    259 int
    260 lockstat_stop(lsdisable_t *ld)
    261 {
    262 	CPU_INFO_ITERATOR cii;
    263 	struct cpu_info *ci;
    264 	u_int cpuno, overflow;
    265 	struct timespec ts;
    266 	int error;
    267 
    268 	KASSERT(lockstat_enabled);
    269 
    270 	/*
    271 	 * Set enabled false, force a write barrier, and wait for other CPUs
    272 	 * to exit lockstat_event().
    273 	 */
    274 	lockstat_enabled = 0;
    275 	lockstat_unlock(0);
    276 	getnanotime(&ts);
    277 	tsleep(&lockstat_stop, PPAUSE, "lockstat", mstohz(10));
    278  	(void)lockstat_lock(0);
    279 
    280 	/*
    281 	 * Did we run out of buffers while tracing?
    282 	 */
    283 	overflow = 0;
    284 	for (CPU_INFO_FOREACH(cii, ci))
    285 		overflow += ((lscpu_t *)ci->ci_lockstat)->lc_overflow;
    286 
    287 	if (overflow != 0) {
    288 		error = EOVERFLOW;
    289 		log(LOG_NOTICE, "lockstat: %d buffer allocations failed\n",
    290 		    overflow);
    291 	} else
    292 		error = 0;
    293 
    294 	lockstat_init_tables(NULL);
    295 
    296 	if (ld == NULL)
    297 		return (error);
    298 
    299 	/*
    300 	 * Fill out the disable struct for the caller.
    301 	 */
    302 	timespecsub(&ts, &lockstat_stime, &ld->ld_time);
    303 	ld->ld_size = lockstat_sizeb;
    304 
    305 	cpuno = 0;
    306 	for (CPU_INFO_FOREACH(cii, ci)) {
    307 		if (cpuno > sizeof(ld->ld_freq) / sizeof(ld->ld_freq[0])) {
    308 			log(LOG_WARNING, "lockstat: too many CPUs\n");
    309 			break;
    310 		}
    311 		ld->ld_freq[cpuno++] = cpu_frequency(ci);
    312 	}
    313 
    314 	return (error);
    315 }
    316 
    317 /*
    318  * Allocate buffers for lockstat_start().
    319  */
    320 int
    321 lockstat_alloc(lsenable_t *le)
    322 {
    323 	lsbuf_t *lb;
    324 	size_t sz;
    325 
    326 	KASSERT(!lockstat_enabled);
    327 	lockstat_free();
    328 
    329 	sz = sizeof(*lb) * le->le_nbufs;
    330 
    331 	lockstat_unlock(0);
    332 	lb = malloc(sz, M_LOCKSTAT, M_WAITOK | M_ZERO);
    333 	(void)lockstat_lock(0);
    334 
    335 	if (lb == NULL)
    336 		return (ENOMEM);
    337 
    338 	KASSERT(!lockstat_enabled);
    339 	KASSERT(lockstat_baseb == NULL);
    340 	lockstat_sizeb = sz;
    341 	lockstat_baseb = lb;
    342 
    343 	return (0);
    344 }
    345 
    346 /*
    347  * Free allocated buffers after tracing has stopped.
    348  */
    349 void
    350 lockstat_free(void)
    351 {
    352 
    353 	KASSERT(!lockstat_enabled);
    354 
    355 	if (lockstat_baseb != NULL) {
    356 		free(lockstat_baseb, M_LOCKSTAT);
    357 		lockstat_baseb = NULL;
    358 	}
    359 }
    360 
    361 /*
    362  * Main entry point from lock primatives.
    363  */
    364 void
    365 lockstat_event(uintptr_t lock, uintptr_t callsite, u_int flags, u_int count,
    366 	       uint64_t cycles)
    367 {
    368 	lslist_t *ll;
    369 	lscpu_t *lc;
    370 	lsbuf_t *lb;
    371 	u_int event;
    372 	int s;
    373 
    374 	if ((flags & lockstat_enabled) != flags || count == 0)
    375 		return;
    376 	if (lock < lockstat_lockstart || lock > lockstat_lockend)
    377 		return;
    378 	if (callsite < lockstat_csstart || callsite > lockstat_csend)
    379 		return;
    380 
    381 	callsite &= lockstat_csmask;
    382 	lock &= lockstat_lamask;
    383 
    384 	/*
    385 	 * Find the table for this lock+callsite pair, and try to locate a
    386 	 * buffer with the same key.
    387 	 */
    388 	lc = curcpu()->ci_lockstat;
    389 	ll = &lc->lc_hash[LOCKSTAT_HASH(lock ^ callsite)];
    390 	event = (flags & LB_EVENT_MASK) - 1;
    391 	s = splhigh();
    392 
    393 	LIST_FOREACH(lb, ll, lb_chain.list) {
    394 		if (lb->lb_lock == lock && lb->lb_callsite == callsite)
    395 			break;
    396 	}
    397 
    398 	if (lb != NULL) {
    399 		/*
    400 		 * We found a record.  Move it to the front of the list, as
    401 		 * we're likely to hit it again soon.
    402 		 */
    403 		if (lb != LIST_FIRST(ll)) {
    404 			LIST_REMOVE(lb, lb_chain.list);
    405 			LIST_INSERT_HEAD(ll, lb, lb_chain.list);
    406 		}
    407 		lb->lb_counts[event] += count;
    408 		lb->lb_times[event] += cycles;
    409 	} else if ((lb = SLIST_FIRST(&lc->lc_free)) != NULL) {
    410 		/*
    411 		 * Pinch a new buffer and fill it out.
    412 		 */
    413 		SLIST_REMOVE_HEAD(&lc->lc_free, lb_chain.slist);
    414 		LIST_INSERT_HEAD(ll, lb, lb_chain.list);
    415 		lb->lb_flags = (uint16_t)flags;
    416 		lb->lb_lock = lock;
    417 		lb->lb_callsite = callsite;
    418 		lb->lb_counts[event] = count;
    419 		lb->lb_times[event] = cycles;
    420 	} else {
    421 		/*
    422 		 * We didn't find a buffer and there were none free.
    423 		 * lockstat_stop() will notice later on and report the
    424 		 * error.
    425 		 */
    426 		 lc->lc_overflow++;
    427 	}
    428 
    429 	splx(s);
    430 }
    431 
    432 /*
    433  * Accept an open() on /dev/lockstat.
    434  */
    435 int
    436 lockstat_open(dev_t dev, int flag, int mode,
    437 	struct lwp *l)
    438 {
    439 	int error;
    440 
    441 	if ((error = lockstat_lock(1)) != 0)
    442 		return error;
    443 
    444 	if (lockstat_devopen)
    445 		error = EBUSY;
    446 	else {
    447 		lockstat_devopen = 1;
    448 		error = 0;
    449 	}
    450 
    451 	lockstat_unlock(1);
    452 
    453 	return error;
    454 }
    455 
    456 /*
    457  * Accept the last close() on /dev/lockstat.
    458  */
    459 int
    460 lockstat_close(dev_t dev, int flag, int mode,
    461 	struct lwp *l)
    462 {
    463 	int error;
    464 
    465 	if ((error = lockstat_lock(1)) == 0) {
    466 		if (lockstat_enabled)
    467 			(void)lockstat_stop(NULL);
    468 		lockstat_free();
    469 		lockstat_devopen = 0;
    470 		lockstat_unlock(1);
    471 	}
    472 
    473 	return error;
    474 }
    475 
    476 /*
    477  * Handle control operations.
    478  */
    479 int
    480 lockstat_ioctl(dev_t dev, u_long cmd, void *data,
    481 	int flag, struct lwp *l)
    482 {
    483 	lsenable_t *le;
    484 	int error;
    485 
    486 	if ((error = lockstat_lock(1)) != 0)
    487 		return error;
    488 
    489 	switch (cmd) {
    490 	case IOC_LOCKSTAT_GVERSION:
    491 		*(int *)data = LS_VERSION;
    492 		error = 0;
    493 		break;
    494 
    495 	case IOC_LOCKSTAT_ENABLE:
    496 		le = (lsenable_t *)data;
    497 
    498 		if (!cpu_hascounter()) {
    499 			error = ENODEV;
    500 			break;
    501 		}
    502 		if (lockstat_enabled) {
    503 			error = EBUSY;
    504 			break;
    505 		}
    506 
    507 		/*
    508 		 * Sanitize the arguments passed in and set up filtering.
    509 		 */
    510 		if (le->le_nbufs == 0)
    511 			le->le_nbufs = LOCKSTAT_DEFBUFS;
    512 		else if (le->le_nbufs > LOCKSTAT_MAXBUFS ||
    513 		    le->le_nbufs < LOCKSTAT_MINBUFS) {
    514 			error = EINVAL;
    515 			break;
    516 		}
    517 		if ((le->le_flags & LE_ONE_CALLSITE) == 0) {
    518 			le->le_csstart = 0;
    519 			le->le_csend = le->le_csstart - 1;
    520 		}
    521 		if ((le->le_flags & LE_ONE_LOCK) == 0) {
    522 			le->le_lockstart = 0;
    523 			le->le_lockend = le->le_lockstart - 1;
    524 		}
    525 		if ((le->le_mask & LB_EVENT_MASK) == 0)
    526 			return (EINVAL);
    527 		if ((le->le_mask & LB_LOCK_MASK) == 0)
    528 			return (EINVAL);
    529 
    530 		/*
    531 		 * Start tracing.
    532 		 */
    533 		if ((error = lockstat_alloc(le)) == 0)
    534 			lockstat_start(le);
    535 		break;
    536 
    537 	case IOC_LOCKSTAT_DISABLE:
    538 		if (!lockstat_enabled)
    539 			error = EINVAL;
    540 		else
    541 			error = lockstat_stop((lsdisable_t *)data);
    542 		break;
    543 
    544 	default:
    545 		error = ENOTTY;
    546 		break;
    547 	}
    548 
    549 	lockstat_unlock(1);
    550 	return error;
    551 }
    552 
    553 /*
    554  * Copy buffers out to user-space.
    555  */
    556 int
    557 lockstat_read(dev_t dev, struct uio *uio, int flag)
    558 {
    559 	int error;
    560 
    561 	if ((error = lockstat_lock(1)) != 0)
    562 		return (error);
    563 
    564 	if (lockstat_enabled) {
    565 		lockstat_unlock(1);
    566 		return (EBUSY);
    567 	}
    568 
    569 	lockstat_unlock(0);
    570 	error = uiomove(lockstat_baseb, lockstat_sizeb, uio);
    571 	lockstat_lock(0);
    572 
    573 	lockstat_unlock(1);
    574 
    575 	return (error);
    576 }
    577