lockstat.c revision 1.8.2.3 1 1.8.2.3 ad /* $NetBSD: lockstat.c,v 1.8.2.3 2007/10/29 00:22:43 ad Exp $ */
2 1.1 ad
3 1.1 ad /*-
4 1.8.2.1 ad * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
5 1.1 ad * All rights reserved.
6 1.1 ad *
7 1.1 ad * This code is derived from software contributed to The NetBSD Foundation
8 1.1 ad * by Andrew Doran.
9 1.1 ad *
10 1.1 ad * Redistribution and use in source and binary forms, with or without
11 1.1 ad * modification, are permitted provided that the following conditions
12 1.1 ad * are met:
13 1.1 ad * 1. Redistributions of source code must retain the above copyright
14 1.1 ad * notice, this list of conditions and the following disclaimer.
15 1.1 ad * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 ad * notice, this list of conditions and the following disclaimer in the
17 1.1 ad * documentation and/or other materials provided with the distribution.
18 1.1 ad * 3. All advertising materials mentioning features or use of this software
19 1.1 ad * must display the following acknowledgement:
20 1.1 ad * This product includes software developed by the NetBSD
21 1.1 ad * Foundation, Inc. and its contributors.
22 1.1 ad * 4. Neither the name of The NetBSD Foundation nor the names of its
23 1.1 ad * contributors may be used to endorse or promote products derived
24 1.1 ad * from this software without specific prior written permission.
25 1.1 ad *
26 1.1 ad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 1.1 ad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 1.1 ad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 1.1 ad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 1.1 ad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 1.1 ad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 1.1 ad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 1.1 ad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 1.1 ad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 1.1 ad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 1.1 ad * POSSIBILITY OF SUCH DAMAGE.
37 1.1 ad */
38 1.1 ad
39 1.1 ad /*
40 1.1 ad * Lock statistics driver, providing kernel support for the lockstat(8)
41 1.1 ad * command.
42 1.5 ad *
43 1.8.2.2 ad * We use a global lock word (lockstat_lock) to track device opens.
44 1.8.2.2 ad * Only one thread can hold the device at a time, providing a global lock.
45 1.8.2.2 ad *
46 1.5 ad * XXX Timings for contention on sleep locks are currently incorrect.
47 1.1 ad */
48 1.1 ad
49 1.1 ad #include <sys/cdefs.h>
50 1.8.2.3 ad __KERNEL_RCSID(0, "$NetBSD: lockstat.c,v 1.8.2.3 2007/10/29 00:22:43 ad Exp $");
51 1.1 ad
52 1.1 ad #include <sys/types.h>
53 1.1 ad #include <sys/param.h>
54 1.1 ad #include <sys/lock.h>
55 1.1 ad #include <sys/proc.h>
56 1.1 ad #include <sys/resourcevar.h>
57 1.1 ad #include <sys/systm.h>
58 1.1 ad #include <sys/kernel.h>
59 1.8.2.2 ad #include <sys/kmem.h>
60 1.1 ad #include <sys/conf.h>
61 1.1 ad #include <sys/syslog.h>
62 1.1 ad
63 1.1 ad #include <dev/lockstat.h>
64 1.1 ad
65 1.1 ad #ifndef __HAVE_CPU_COUNTER
66 1.1 ad #error CPU counters not available
67 1.1 ad #endif
68 1.1 ad
69 1.1 ad #if LONG_BIT == 64
70 1.1 ad #define LOCKSTAT_HASH_SHIFT 3
71 1.1 ad #elif LONG_BIT == 32
72 1.1 ad #define LOCKSTAT_HASH_SHIFT 2
73 1.1 ad #endif
74 1.1 ad
75 1.8.2.1 ad #define LOCKSTAT_MINBUFS 1000
76 1.8.2.1 ad #define LOCKSTAT_DEFBUFS 10000
77 1.8.2.1 ad #define LOCKSTAT_MAXBUFS 50000
78 1.1 ad
79 1.8.2.2 ad #define LOCKSTAT_HASH_SIZE 128
80 1.1 ad #define LOCKSTAT_HASH_MASK (LOCKSTAT_HASH_SIZE - 1)
81 1.1 ad #define LOCKSTAT_HASH(key) \
82 1.1 ad ((key >> LOCKSTAT_HASH_SHIFT) & LOCKSTAT_HASH_MASK)
83 1.1 ad
84 1.1 ad typedef struct lscpu {
85 1.1 ad SLIST_HEAD(, lsbuf) lc_free;
86 1.1 ad u_int lc_overflow;
87 1.1 ad LIST_HEAD(lslist, lsbuf) lc_hash[LOCKSTAT_HASH_SIZE];
88 1.1 ad } lscpu_t;
89 1.1 ad
90 1.1 ad typedef struct lslist lslist_t;
91 1.1 ad
92 1.1 ad void lockstatattach(int);
93 1.1 ad void lockstat_start(lsenable_t *);
94 1.1 ad int lockstat_alloc(lsenable_t *);
95 1.1 ad void lockstat_init_tables(lsenable_t *);
96 1.1 ad int lockstat_stop(lsdisable_t *);
97 1.1 ad void lockstat_free(void);
98 1.1 ad
99 1.1 ad dev_type_open(lockstat_open);
100 1.1 ad dev_type_close(lockstat_close);
101 1.1 ad dev_type_read(lockstat_read);
102 1.1 ad dev_type_ioctl(lockstat_ioctl);
103 1.1 ad
104 1.1 ad volatile u_int lockstat_enabled;
105 1.1 ad uintptr_t lockstat_csstart;
106 1.1 ad uintptr_t lockstat_csend;
107 1.1 ad uintptr_t lockstat_csmask;
108 1.8.2.1 ad uintptr_t lockstat_lamask;
109 1.5 ad uintptr_t lockstat_lockstart;
110 1.5 ad uintptr_t lockstat_lockend;
111 1.8.2.2 ad __cpu_simple_lock_t lockstat_lock;
112 1.1 ad lsbuf_t *lockstat_baseb;
113 1.1 ad size_t lockstat_sizeb;
114 1.1 ad int lockstat_busy;
115 1.1 ad struct timespec lockstat_stime;
116 1.1 ad
117 1.1 ad const struct cdevsw lockstat_cdevsw = {
118 1.1 ad lockstat_open, lockstat_close, lockstat_read, nowrite, lockstat_ioctl,
119 1.8.2.2 ad nostop, notty, nopoll, nommap, nokqfilter, D_OTHER | D_MPSAFE
120 1.1 ad };
121 1.1 ad
122 1.1 ad /*
123 1.1 ad * Called when the pseudo-driver is attached.
124 1.1 ad */
125 1.1 ad void
126 1.1 ad lockstatattach(int nunits)
127 1.1 ad {
128 1.1 ad
129 1.1 ad (void)nunits;
130 1.1 ad
131 1.8.2.2 ad __cpu_simple_lock_init(&lockstat_lock);
132 1.1 ad }
133 1.1 ad
134 1.1 ad /*
135 1.1 ad * Prepare the per-CPU tables for use, or clear down tables when tracing is
136 1.1 ad * stopped.
137 1.1 ad */
138 1.1 ad void
139 1.1 ad lockstat_init_tables(lsenable_t *le)
140 1.1 ad {
141 1.7 ad int i, per, slop, cpuno;
142 1.1 ad CPU_INFO_ITERATOR cii;
143 1.1 ad struct cpu_info *ci;
144 1.1 ad lscpu_t *lc;
145 1.1 ad lsbuf_t *lb;
146 1.1 ad
147 1.1 ad KASSERT(!lockstat_enabled);
148 1.1 ad
149 1.1 ad for (CPU_INFO_FOREACH(cii, ci)) {
150 1.1 ad if (ci->ci_lockstat != NULL) {
151 1.8.2.2 ad kmem_free(ci->ci_lockstat, sizeof(lscpu_t));
152 1.1 ad ci->ci_lockstat = NULL;
153 1.1 ad }
154 1.1 ad }
155 1.1 ad
156 1.1 ad if (le == NULL)
157 1.1 ad return;
158 1.1 ad
159 1.1 ad lb = lockstat_baseb;
160 1.1 ad per = le->le_nbufs / ncpu;
161 1.1 ad slop = le->le_nbufs - (per * ncpu);
162 1.1 ad cpuno = 0;
163 1.1 ad for (CPU_INFO_FOREACH(cii, ci)) {
164 1.8.2.2 ad lc = kmem_alloc(sizeof(*lc), KM_SLEEP);
165 1.1 ad lc->lc_overflow = 0;
166 1.1 ad ci->ci_lockstat = lc;
167 1.1 ad
168 1.1 ad SLIST_INIT(&lc->lc_free);
169 1.1 ad for (i = 0; i < LOCKSTAT_HASH_SIZE; i++)
170 1.1 ad LIST_INIT(&lc->lc_hash[i]);
171 1.1 ad
172 1.1 ad for (i = per; i != 0; i--, lb++) {
173 1.1 ad lb->lb_cpu = (uint16_t)cpuno;
174 1.1 ad SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist);
175 1.1 ad }
176 1.1 ad if (--slop > 0) {
177 1.1 ad lb->lb_cpu = (uint16_t)cpuno;
178 1.1 ad SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist);
179 1.1 ad lb++;
180 1.1 ad }
181 1.1 ad cpuno++;
182 1.1 ad }
183 1.1 ad }
184 1.1 ad
185 1.1 ad /*
186 1.1 ad * Start collecting lock statistics.
187 1.1 ad */
188 1.1 ad void
189 1.1 ad lockstat_start(lsenable_t *le)
190 1.1 ad {
191 1.1 ad
192 1.1 ad KASSERT(!lockstat_enabled);
193 1.1 ad
194 1.1 ad lockstat_init_tables(le);
195 1.1 ad
196 1.1 ad if ((le->le_flags & LE_CALLSITE) != 0)
197 1.1 ad lockstat_csmask = (uintptr_t)-1LL;
198 1.1 ad else
199 1.1 ad lockstat_csmask = 0;
200 1.1 ad
201 1.8.2.1 ad if ((le->le_flags & LE_LOCK) != 0)
202 1.8.2.1 ad lockstat_lamask = (uintptr_t)-1LL;
203 1.8.2.1 ad else
204 1.8.2.1 ad lockstat_lamask = 0;
205 1.8.2.1 ad
206 1.1 ad lockstat_csstart = le->le_csstart;
207 1.1 ad lockstat_csend = le->le_csend;
208 1.5 ad lockstat_lockstart = le->le_lockstart;
209 1.6 ad lockstat_lockstart = le->le_lockstart;
210 1.5 ad lockstat_lockend = le->le_lockend;
211 1.5 ad mb_memory();
212 1.1 ad getnanotime(&lockstat_stime);
213 1.1 ad lockstat_enabled = le->le_mask;
214 1.5 ad mb_write();
215 1.1 ad }
216 1.1 ad
217 1.1 ad /*
218 1.1 ad * Stop collecting lock statistics.
219 1.1 ad */
220 1.1 ad int
221 1.1 ad lockstat_stop(lsdisable_t *ld)
222 1.1 ad {
223 1.1 ad CPU_INFO_ITERATOR cii;
224 1.1 ad struct cpu_info *ci;
225 1.1 ad u_int cpuno, overflow;
226 1.1 ad struct timespec ts;
227 1.1 ad int error;
228 1.1 ad
229 1.1 ad KASSERT(lockstat_enabled);
230 1.1 ad
231 1.1 ad /*
232 1.1 ad * Set enabled false, force a write barrier, and wait for other CPUs
233 1.5 ad * to exit lockstat_event().
234 1.1 ad */
235 1.1 ad lockstat_enabled = 0;
236 1.8.2.2 ad mb_write();
237 1.1 ad getnanotime(&ts);
238 1.1 ad tsleep(&lockstat_stop, PPAUSE, "lockstat", mstohz(10));
239 1.1 ad
240 1.1 ad /*
241 1.1 ad * Did we run out of buffers while tracing?
242 1.1 ad */
243 1.1 ad overflow = 0;
244 1.1 ad for (CPU_INFO_FOREACH(cii, ci))
245 1.1 ad overflow += ((lscpu_t *)ci->ci_lockstat)->lc_overflow;
246 1.1 ad
247 1.1 ad if (overflow != 0) {
248 1.1 ad error = EOVERFLOW;
249 1.1 ad log(LOG_NOTICE, "lockstat: %d buffer allocations failed\n",
250 1.1 ad overflow);
251 1.1 ad } else
252 1.1 ad error = 0;
253 1.1 ad
254 1.1 ad lockstat_init_tables(NULL);
255 1.1 ad
256 1.1 ad if (ld == NULL)
257 1.8.2.2 ad return error;
258 1.1 ad
259 1.1 ad /*
260 1.1 ad * Fill out the disable struct for the caller.
261 1.1 ad */
262 1.1 ad timespecsub(&ts, &lockstat_stime, &ld->ld_time);
263 1.1 ad ld->ld_size = lockstat_sizeb;
264 1.1 ad
265 1.1 ad cpuno = 0;
266 1.1 ad for (CPU_INFO_FOREACH(cii, ci)) {
267 1.1 ad if (cpuno > sizeof(ld->ld_freq) / sizeof(ld->ld_freq[0])) {
268 1.1 ad log(LOG_WARNING, "lockstat: too many CPUs\n");
269 1.1 ad break;
270 1.1 ad }
271 1.1 ad ld->ld_freq[cpuno++] = cpu_frequency(ci);
272 1.1 ad }
273 1.1 ad
274 1.8.2.2 ad return error;
275 1.1 ad }
276 1.1 ad
277 1.1 ad /*
278 1.1 ad * Allocate buffers for lockstat_start().
279 1.1 ad */
280 1.1 ad int
281 1.1 ad lockstat_alloc(lsenable_t *le)
282 1.1 ad {
283 1.1 ad lsbuf_t *lb;
284 1.1 ad size_t sz;
285 1.1 ad
286 1.1 ad KASSERT(!lockstat_enabled);
287 1.1 ad lockstat_free();
288 1.1 ad
289 1.1 ad sz = sizeof(*lb) * le->le_nbufs;
290 1.1 ad
291 1.8.2.2 ad lb = kmem_zalloc(sz, KM_SLEEP);
292 1.1 ad if (lb == NULL)
293 1.1 ad return (ENOMEM);
294 1.1 ad
295 1.1 ad KASSERT(!lockstat_enabled);
296 1.1 ad KASSERT(lockstat_baseb == NULL);
297 1.1 ad lockstat_sizeb = sz;
298 1.1 ad lockstat_baseb = lb;
299 1.1 ad
300 1.1 ad return (0);
301 1.1 ad }
302 1.1 ad
303 1.1 ad /*
304 1.1 ad * Free allocated buffers after tracing has stopped.
305 1.1 ad */
306 1.1 ad void
307 1.1 ad lockstat_free(void)
308 1.1 ad {
309 1.1 ad
310 1.1 ad KASSERT(!lockstat_enabled);
311 1.1 ad
312 1.1 ad if (lockstat_baseb != NULL) {
313 1.8.2.2 ad kmem_free(lockstat_baseb, lockstat_sizeb);
314 1.1 ad lockstat_baseb = NULL;
315 1.1 ad }
316 1.1 ad }
317 1.1 ad
318 1.1 ad /*
319 1.1 ad * Main entry point from lock primatives.
320 1.1 ad */
321 1.1 ad void
322 1.1 ad lockstat_event(uintptr_t lock, uintptr_t callsite, u_int flags, u_int count,
323 1.6 ad uint64_t cycles)
324 1.1 ad {
325 1.1 ad lslist_t *ll;
326 1.1 ad lscpu_t *lc;
327 1.1 ad lsbuf_t *lb;
328 1.1 ad u_int event;
329 1.1 ad int s;
330 1.1 ad
331 1.1 ad if ((flags & lockstat_enabled) != flags || count == 0)
332 1.1 ad return;
333 1.5 ad if (lock < lockstat_lockstart || lock > lockstat_lockend)
334 1.1 ad return;
335 1.1 ad if (callsite < lockstat_csstart || callsite > lockstat_csend)
336 1.1 ad return;
337 1.1 ad
338 1.1 ad callsite &= lockstat_csmask;
339 1.8.2.1 ad lock &= lockstat_lamask;
340 1.1 ad
341 1.1 ad /*
342 1.1 ad * Find the table for this lock+callsite pair, and try to locate a
343 1.1 ad * buffer with the same key.
344 1.1 ad */
345 1.8.2.3 ad s = splhigh();
346 1.1 ad lc = curcpu()->ci_lockstat;
347 1.1 ad ll = &lc->lc_hash[LOCKSTAT_HASH(lock ^ callsite)];
348 1.1 ad event = (flags & LB_EVENT_MASK) - 1;
349 1.1 ad
350 1.1 ad LIST_FOREACH(lb, ll, lb_chain.list) {
351 1.1 ad if (lb->lb_lock == lock && lb->lb_callsite == callsite)
352 1.1 ad break;
353 1.1 ad }
354 1.1 ad
355 1.1 ad if (lb != NULL) {
356 1.1 ad /*
357 1.1 ad * We found a record. Move it to the front of the list, as
358 1.1 ad * we're likely to hit it again soon.
359 1.1 ad */
360 1.1 ad if (lb != LIST_FIRST(ll)) {
361 1.1 ad LIST_REMOVE(lb, lb_chain.list);
362 1.1 ad LIST_INSERT_HEAD(ll, lb, lb_chain.list);
363 1.1 ad }
364 1.1 ad lb->lb_counts[event] += count;
365 1.6 ad lb->lb_times[event] += cycles;
366 1.1 ad } else if ((lb = SLIST_FIRST(&lc->lc_free)) != NULL) {
367 1.1 ad /*
368 1.1 ad * Pinch a new buffer and fill it out.
369 1.1 ad */
370 1.1 ad SLIST_REMOVE_HEAD(&lc->lc_free, lb_chain.slist);
371 1.1 ad LIST_INSERT_HEAD(ll, lb, lb_chain.list);
372 1.1 ad lb->lb_flags = (uint16_t)flags;
373 1.1 ad lb->lb_lock = lock;
374 1.1 ad lb->lb_callsite = callsite;
375 1.1 ad lb->lb_counts[event] = count;
376 1.6 ad lb->lb_times[event] = cycles;
377 1.1 ad } else {
378 1.1 ad /*
379 1.1 ad * We didn't find a buffer and there were none free.
380 1.1 ad * lockstat_stop() will notice later on and report the
381 1.1 ad * error.
382 1.1 ad */
383 1.1 ad lc->lc_overflow++;
384 1.1 ad }
385 1.1 ad
386 1.1 ad splx(s);
387 1.1 ad }
388 1.1 ad
389 1.1 ad /*
390 1.1 ad * Accept an open() on /dev/lockstat.
391 1.1 ad */
392 1.1 ad int
393 1.8.2.2 ad lockstat_open(dev_t dev, int flag, int mode, lwp_t *l)
394 1.1 ad {
395 1.1 ad
396 1.8.2.2 ad if (!__cpu_simple_lock_try(&lockstat_lock))
397 1.8.2.2 ad return EBUSY;
398 1.8.2.2 ad return 0;
399 1.1 ad }
400 1.1 ad
401 1.1 ad /*
402 1.1 ad * Accept the last close() on /dev/lockstat.
403 1.1 ad */
404 1.1 ad int
405 1.8.2.2 ad lockstat_close(dev_t dev, int flag, int mode, lwp_t *l)
406 1.1 ad {
407 1.1 ad
408 1.8.2.2 ad __cpu_simple_unlock(&lockstat_lock);
409 1.8.2.2 ad return 0;
410 1.1 ad }
411 1.1 ad
412 1.1 ad /*
413 1.1 ad * Handle control operations.
414 1.1 ad */
415 1.1 ad int
416 1.8.2.2 ad lockstat_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
417 1.1 ad {
418 1.1 ad lsenable_t *le;
419 1.1 ad int error;
420 1.1 ad
421 1.1 ad switch (cmd) {
422 1.1 ad case IOC_LOCKSTAT_GVERSION:
423 1.1 ad *(int *)data = LS_VERSION;
424 1.1 ad error = 0;
425 1.1 ad break;
426 1.1 ad
427 1.1 ad case IOC_LOCKSTAT_ENABLE:
428 1.1 ad le = (lsenable_t *)data;
429 1.1 ad
430 1.1 ad if (!cpu_hascounter()) {
431 1.1 ad error = ENODEV;
432 1.1 ad break;
433 1.1 ad }
434 1.1 ad if (lockstat_enabled) {
435 1.1 ad error = EBUSY;
436 1.1 ad break;
437 1.1 ad }
438 1.1 ad
439 1.1 ad /*
440 1.1 ad * Sanitize the arguments passed in and set up filtering.
441 1.1 ad */
442 1.1 ad if (le->le_nbufs == 0)
443 1.1 ad le->le_nbufs = LOCKSTAT_DEFBUFS;
444 1.1 ad else if (le->le_nbufs > LOCKSTAT_MAXBUFS ||
445 1.1 ad le->le_nbufs < LOCKSTAT_MINBUFS) {
446 1.1 ad error = EINVAL;
447 1.1 ad break;
448 1.1 ad }
449 1.1 ad if ((le->le_flags & LE_ONE_CALLSITE) == 0) {
450 1.1 ad le->le_csstart = 0;
451 1.1 ad le->le_csend = le->le_csstart - 1;
452 1.1 ad }
453 1.5 ad if ((le->le_flags & LE_ONE_LOCK) == 0) {
454 1.5 ad le->le_lockstart = 0;
455 1.5 ad le->le_lockend = le->le_lockstart - 1;
456 1.5 ad }
457 1.1 ad if ((le->le_mask & LB_EVENT_MASK) == 0)
458 1.8.2.2 ad return EINVAL;
459 1.1 ad if ((le->le_mask & LB_LOCK_MASK) == 0)
460 1.8.2.2 ad return EINVAL;
461 1.1 ad
462 1.1 ad /*
463 1.1 ad * Start tracing.
464 1.1 ad */
465 1.1 ad if ((error = lockstat_alloc(le)) == 0)
466 1.1 ad lockstat_start(le);
467 1.1 ad break;
468 1.1 ad
469 1.1 ad case IOC_LOCKSTAT_DISABLE:
470 1.1 ad if (!lockstat_enabled)
471 1.1 ad error = EINVAL;
472 1.1 ad else
473 1.1 ad error = lockstat_stop((lsdisable_t *)data);
474 1.1 ad break;
475 1.1 ad
476 1.1 ad default:
477 1.1 ad error = ENOTTY;
478 1.1 ad break;
479 1.1 ad }
480 1.1 ad
481 1.1 ad return error;
482 1.1 ad }
483 1.1 ad
484 1.1 ad /*
485 1.1 ad * Copy buffers out to user-space.
486 1.1 ad */
487 1.1 ad int
488 1.4 christos lockstat_read(dev_t dev, struct uio *uio, int flag)
489 1.1 ad {
490 1.1 ad
491 1.8.2.2 ad if (lockstat_enabled)
492 1.8.2.2 ad return EBUSY;
493 1.8.2.2 ad return uiomove(lockstat_baseb, lockstat_sizeb, uio);
494 1.1 ad }
495