main.c revision 1.7 1 /* $NetBSD: main.c,v 1.7 2006/12/25 11:57:40 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2006 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * TODO:
41 *
42 * - Tracking of times for sleep locks is broken.
43 * - Need better analysis and tracking of events.
44 * - Shouldn't have to parse the namelist here. We should use something like
45 * FreeBSD's libelf.
46 * - The way the namelist is searched sucks, is it worth doing something
47 * better?
48 */
49
50 #include <sys/cdefs.h>
51 #ifndef lint
52 __RCSID("$NetBSD: main.c,v 1.7 2006/12/25 11:57:40 ad Exp $");
53 #endif /* not lint */
54
55 #include <sys/types.h>
56 #include <sys/param.h>
57 #include <sys/time.h>
58 #include <sys/fcntl.h>
59 #include <sys/ioctl.h>
60 #include <sys/wait.h>
61 #include <sys/signal.h>
62 #include <sys/sysctl.h>
63
64 #include <dev/lockstat.h>
65
66 #include <stdio.h>
67 #include <stdlib.h>
68 #include <string.h>
69 #include <limits.h>
70 #include <unistd.h>
71 #include <err.h>
72 #include <paths.h>
73 #include <util.h>
74 #include <ctype.h>
75 #include <errno.h>
76
77 #include "extern.h"
78
79 #define _PATH_DEV_LOCKSTAT "/dev/lockstat"
80
81 #define MILLI 1000.0
82 #define MICRO 1000000.0
83 #define NANO 1000000000.0
84 #define PICO 1000000000000.0
85
86 TAILQ_HEAD(lock_head, lockstruct);
87 typedef struct lock_head locklist_t;
88 TAILQ_HEAD(buf_head, lsbuf);
89 typedef struct buf_head buflist_t;
90
91 typedef struct lockstruct {
92 TAILQ_ENTRY(lockstruct) chain;
93 buflist_t bufs;
94 buflist_t tosort;
95 uintptr_t lock;
96 double time;
97 uint32_t count;
98 u_int flags;
99 u_int nbufs;
100 char name[NAME_SIZE];
101 } lock_t;
102
103 typedef struct name {
104 const char *name;
105 int mask;
106 } name_t;
107
108 const name_t locknames[] = {
109 { "adaptive_mutex", LB_ADAPTIVE_MUTEX },
110 { "spin_mutex", LB_SPIN_MUTEX },
111 { "lockmgr", LB_LOCKMGR },
112 { "rwlock", LB_RWLOCK },
113 { "kernel_lock", LB_KERNEL_LOCK },
114 { NULL, 0 }
115 };
116
117 const name_t eventnames[] = {
118 { "spin", LB_SPIN },
119 { "sleep_exclusive", LB_SLEEP1 },
120 { "sleep_shared", LB_SLEEP2 },
121 { NULL, 0 },
122 };
123
124 const name_t alltypes[] = {
125 { "Adaptive mutex spin", LB_ADAPTIVE_MUTEX | LB_SPIN },
126 { "Adaptive mutex sleep", LB_ADAPTIVE_MUTEX | LB_SLEEP1 },
127 { "Spin mutex spin", LB_SPIN_MUTEX | LB_SPIN },
128 { "lockmgr sleep", LB_LOCKMGR | LB_SLEEP1 },
129 { "RW lock sleep (writer)", LB_RWLOCK | LB_SLEEP1 },
130 { "RW lock sleep (reader)", LB_RWLOCK | LB_SLEEP2 },
131 { "Kernel lock spin", LB_KERNEL_LOCK | LB_SPIN },
132 { NULL, 0 }
133 };
134
135 locklist_t locklist;
136 locklist_t freelist;
137 locklist_t sortlist;
138
139 lsbuf_t *bufs;
140 lsdisable_t ld;
141 int lflag;
142 int nbufs;
143 int cflag;
144 int lsfd;
145 int displayed;
146 int bin64;
147 double tscale;
148 double cscale;
149 double cpuscale[sizeof(ld.ld_freq) / sizeof(ld.ld_freq[0])];
150 FILE *outfp;
151
152 void findsym(findsym_t, char *, uintptr_t *, uintptr_t *);
153 void spawn(int, char **);
154 void display(int, const char *name);
155 void listnames(const name_t *);
156 int matchname(const name_t *, const char *);
157 void makelists(int, int);
158 void nullsig(int);
159 void usage(void);
160 int ncpu(void);
161 lock_t *morelocks(void);
162
163 int
164 main(int argc, char **argv)
165 {
166 int eventtype, locktype, ch, nlfd, sflag, fd, i, pflag;
167 const char *nlistf, *outf;
168 char *lockname, *funcname;
169 const name_t *name;
170 lsenable_t le;
171 double ms;
172 char *p;
173
174 nlistf = NULL;
175 outf = NULL;
176 lockname = NULL;
177 funcname = NULL;
178 eventtype = -1;
179 locktype = -1;
180 nbufs = 0;
181 sflag = 0;
182 pflag = 0;
183
184 while ((ch = getopt(argc, argv, "E:F:L:M:N:T:b:ceflo:pst")) != -1)
185 switch (ch) {
186 case 'E':
187 eventtype = matchname(eventnames, optarg);
188 break;
189 case 'F':
190 funcname = optarg;
191 break;
192 case 'L':
193 lockname = optarg;
194 break;
195 case 'N':
196 nlistf = optarg;
197 break;
198 case 'T':
199 locktype = matchname(locknames, optarg);
200 break;
201 case 'b':
202 nbufs = (int)strtol(optarg, &p, 0);
203 if (!isdigit((u_int)*optarg) || *p != '\0')
204 usage();
205 break;
206 case 'c':
207 cflag = 1;
208 break;
209 case 'e':
210 listnames(eventnames);
211 break;
212 case 'l':
213 lflag = 1;
214 break;
215 case 'o':
216 outf = optarg;
217 break;
218 case 'p':
219 pflag = 1;
220 break;
221 case 's':
222 sflag = 1;
223 break;
224 case 't':
225 listnames(locknames);
226 break;
227 default:
228 usage();
229 }
230 argc -= optind;
231 argv += optind;
232
233 if (*argv == NULL)
234 usage();
235
236 if (outf) {
237 fd = open(outf, O_WRONLY | O_CREAT | O_TRUNC, 0600);
238 if (fd == -1)
239 err(EXIT_FAILURE, "opening %s", outf);
240 outfp = fdopen(fd, "w");
241 } else
242 outfp = stdout;
243
244 /*
245 * Find the name list for resolving symbol names, and load it into
246 * memory.
247 */
248 if (nlistf == NULL) {
249 nlfd = open(_PATH_KSYMS, O_RDONLY);
250 nlistf = getbootfile();
251 } else
252 nlfd = -1;
253 if (nlfd == -1) {
254 if ((nlfd = open(nlistf, O_RDONLY)) < 0)
255 err(EXIT_FAILURE, "cannot open " _PATH_KSYMS " or %s",
256 nlistf);
257 }
258 if (loadsym32(nlfd) != 0) {
259 if (loadsym64(nlfd) != 0)
260 errx(EXIT_FAILURE, "unable to load symbol table");
261 bin64 = 1;
262 }
263 close(nlfd);
264
265 memset(&le, 0, sizeof(le));
266 le.le_nbufs = nbufs;
267
268 /*
269 * Set up initial filtering.
270 */
271 if (lockname != NULL) {
272 findsym(LOCK_BYNAME, lockname, &le.le_lockstart,
273 &le.le_lockend);
274 le.le_flags |= LE_ONE_LOCK;
275 }
276 if (!lflag)
277 le.le_flags |= LE_CALLSITE;
278 if (funcname != NULL) {
279 if (lflag)
280 usage();
281 findsym(FUNC_BYNAME, funcname, &le.le_csstart, &le.le_csend);
282 le.le_flags |= LE_ONE_CALLSITE;
283 }
284 le.le_mask = (eventtype & LB_EVENT_MASK) | (locktype & LB_LOCK_MASK);
285
286 /*
287 * Start tracing.
288 */
289 if ((lsfd = open(_PATH_DEV_LOCKSTAT, O_RDONLY)) < 0)
290 err(EXIT_FAILURE, "cannot open " _PATH_DEV_LOCKSTAT);
291 if (ioctl(lsfd, IOC_LOCKSTAT_GVERSION, &ch) < 0)
292 err(EXIT_FAILURE, "ioctl");
293 if (ch != LS_VERSION)
294 errx(EXIT_FAILURE,
295 "incompatible lockstat interface version (%d, kernel %d)",
296 LS_VERSION, ch);
297 if (ioctl(lsfd, IOC_LOCKSTAT_ENABLE, &le))
298 err(EXIT_FAILURE, "cannot enable tracing");
299
300 /*
301 * Execute the traced program.
302 */
303 spawn(argc, argv);
304
305 /*
306 * Stop tracing, and read the trace buffers from the kernel.
307 */
308 if (ioctl(lsfd, IOC_LOCKSTAT_DISABLE, &ld) == -1) {
309 if (errno == EOVERFLOW) {
310 warnx("overflowed available kernel trace buffers");
311 exit(EXIT_FAILURE);
312 }
313 err(EXIT_FAILURE, "cannot disable tracing");
314 }
315 if ((bufs = malloc(ld.ld_size)) == NULL)
316 err(EXIT_FAILURE, "cannot allocate memory for user buffers");
317 if (read(lsfd, bufs, ld.ld_size) != ld.ld_size)
318 err(EXIT_FAILURE, "reading from " _PATH_DEV_LOCKSTAT);
319 if (close(lsfd))
320 err(EXIT_FAILURE, "close(" _PATH_DEV_LOCKSTAT ")");
321
322 /*
323 * Figure out how to scale the results. For internal use we convert
324 * all times from CPU frequency based to picoseconds, and values are
325 * eventually displayed in ms.
326 */
327 for (i = 0; i < sizeof(ld.ld_freq) / sizeof(ld.ld_freq[0]); i++)
328 if (ld.ld_freq[i] != 0)
329 cpuscale[i] = PICO / ld.ld_freq[i];
330 ms = ld.ld_time.tv_sec * MILLI + ld.ld_time.tv_nsec / MICRO;
331 if (pflag)
332 cscale = 1.0 / ncpu();
333 else
334 cscale = 1.0;
335 cscale *= (sflag ? MILLI / ms : 1.0);
336 tscale = cscale / NANO;
337 nbufs = (int)(ld.ld_size / sizeof(lsbuf_t));
338
339 TAILQ_INIT(&locklist);
340 TAILQ_INIT(&sortlist);
341 TAILQ_INIT(&freelist);
342
343 /*
344 * Display the results.
345 */
346 fprintf(outfp, "Elapsed time: %.2f seconds.", ms / MILLI);
347 if (sflag || pflag) {
348 fprintf(outfp, " Displaying ");
349 if (pflag)
350 fprintf(outfp, "per-CPU ");
351 if (sflag)
352 fprintf(outfp, "per-second ");
353 fprintf(outfp, "averages.");
354 }
355 putc('\n', outfp);
356
357 for (name = alltypes; name->name != NULL; name++) {
358 if (eventtype != -1 &&
359 (name->mask & LB_EVENT_MASK) != eventtype)
360 continue;
361 if (locktype != -1 &&
362 (name->mask & LB_LOCK_MASK) != locktype)
363 continue;
364
365 display(name->mask, name->name);
366 }
367
368 if (displayed == 0)
369 fprintf(outfp, "None of the selected events were recorded.\n");
370 exit(EXIT_SUCCESS);
371 }
372
373 void
374 usage(void)
375 {
376
377 fprintf(stderr,
378 "%s: usage:\n"
379 "%s [options] <command>\n\n"
380 "-b nbuf\t\tset number of event buffers to allocate\n"
381 "-c\t\treport percentage of total events by count, not time\n"
382 "-E evt\t\tdisplay only one type of event\n"
383 "-e\t\tlist event types\n"
384 "-F func\t\tlimit trace to one function\n"
385 "-L lock\t\tlimit trace to one lock (name, or address)\n"
386 "-l\t\ttrace only by lock\n"
387 "-N nlist\tspecify name list file\n"
388 "-o file\t\tsend output to named file, not stdout\n"
389 "-p\t\tshow average count/time per CPU, not total\n"
390 "-s\t\tshow average count/time per second, not total\n"
391 "-T type\t\tdisplay only one type of lock\n"
392 "-t\t\tlist lock types\n",
393 getprogname(), getprogname());
394
395 exit(EXIT_FAILURE);
396 }
397
398 void
399 nullsig(int junk)
400 {
401
402 (void)junk;
403 }
404
405 void
406 listnames(const name_t *name)
407 {
408
409 for (; name->name != NULL; name++)
410 printf("%s\n", name->name);
411
412 exit(EXIT_SUCCESS);
413 }
414
415 int
416 matchname(const name_t *name, const char *string)
417 {
418
419 for (; name->name != NULL; name++)
420 if (strcasecmp(name->name, string) == 0)
421 return name->mask;
422
423 warnx("unknown type `%s'", string);
424 usage();
425 return 0;
426 }
427
428 /*
429 * Return the number of CPUs in the running system.
430 */
431 int
432 ncpu(void)
433 {
434 int rv, mib[2];
435 size_t varlen;
436
437 mib[0] = CTL_HW;
438 mib[1] = HW_NCPU;
439 varlen = sizeof(rv);
440 if (sysctl(mib, 2, &rv, &varlen, NULL, (size_t)0) < 0)
441 rv = 1;
442
443 return (rv);
444 }
445
446 /*
447 * Call into the ELF parser and look up a symbol by name or by address.
448 */
449 void
450 findsym(findsym_t find, char *name, uintptr_t *start, uintptr_t *end)
451 {
452 uintptr_t tend;
453 char *p;
454 int rv;
455
456 if (end == NULL)
457 end = &tend;
458
459 if (find == LOCK_BYNAME) {
460 if (isdigit((u_int)name[0])) {
461 *start = (uintptr_t)strtoul(name, &p, 0);
462 if (*p == '\0')
463 return;
464 }
465 }
466
467 if (bin64)
468 rv = findsym64(find, name, start, end);
469 else
470 rv = findsym32(find, name, start, end);
471
472 if (find == FUNC_BYNAME || find == LOCK_BYNAME) {
473 if (rv == -1)
474 errx(EXIT_FAILURE, "unable to find symbol `%s'", name);
475 return;
476 }
477
478 if (rv == -1)
479 snprintf(name, NAME_SIZE, "%016lx", (long)*start);
480 }
481
482 /*
483 * Fork off the child process and wait for it to complete. We trap SIGINT
484 * so that the caller can use Ctrl-C to stop tracing early and still get
485 * useful results.
486 */
487 void
488 spawn(int argc, char **argv)
489 {
490 pid_t pid;
491
492 switch (pid = fork()) {
493 case 0:
494 close(lsfd);
495 if (execvp(argv[0], argv) == -1)
496 err(EXIT_FAILURE, "cannot exec");
497 break;
498 case -1:
499 err(EXIT_FAILURE, "cannot fork to exec");
500 break;
501 default:
502 signal(SIGINT, nullsig);
503 wait(NULL);
504 signal(SIGINT, SIG_DFL);
505 break;
506 }
507 }
508
509 /*
510 * Allocate a new block of lock_t structures.
511 */
512 lock_t *
513 morelocks(void)
514 {
515 const static int batch = 32;
516 lock_t *l, *lp, *max;
517
518 l = (lock_t *)malloc(sizeof(*l) * batch);
519
520 for (lp = l, max = l + batch; lp < max; lp++)
521 TAILQ_INSERT_TAIL(&freelist, lp, chain);
522
523 return l;
524 }
525
526 /*
527 * From the kernel supplied data, construct two dimensional lists of locks
528 * and event buffers, indexed by lock type and sorted by event type.
529 */
530 void
531 makelists(int mask, int event)
532 {
533 lsbuf_t *lb, *lb2, *max;
534 lock_t *l, *l2;
535 int type;
536
537 /*
538 * Recycle lock_t structures from the last run.
539 */
540 while ((l = TAILQ_FIRST(&locklist)) != NULL) {
541 TAILQ_REMOVE(&locklist, l, chain);
542 TAILQ_INSERT_HEAD(&freelist, l, chain);
543 }
544
545 type = mask & LB_LOCK_MASK;
546
547 for (lb = bufs, max = bufs + nbufs; lb < max; lb++) {
548 if ((lb->lb_flags & LB_LOCK_MASK) != type)
549 continue;
550
551 /*
552 * Look for a record descibing this lock, and allocate a
553 * new one if needed.
554 */
555 TAILQ_FOREACH(l, &sortlist, chain) {
556 if (l->lock == lb->lb_lock)
557 break;
558 }
559 if (l == NULL) {
560 if ((l = TAILQ_FIRST(&freelist)) == NULL)
561 l = morelocks();
562 TAILQ_REMOVE(&freelist, l, chain);
563 l->flags = lb->lb_flags;
564 l->lock = lb->lb_lock;
565 l->nbufs = 0;
566 l->name[0] = '\0';
567 l->count = 0;
568 l->time = 0;
569 TAILQ_INIT(&l->tosort);
570 TAILQ_INIT(&l->bufs);
571 TAILQ_INSERT_TAIL(&sortlist, l, chain);
572 }
573
574 /*
575 * Scale the time values per buffer and summarise
576 * times+counts per lock.
577 */
578 lb->lb_times[event] *= cpuscale[lb->lb_cpu];
579 l->count += lb->lb_counts[event];
580 l->time += lb->lb_times[event];
581
582 /*
583 * Merge same lock+callsite pairs from multiple CPUs
584 * together.
585 */
586 TAILQ_FOREACH(lb2, &l->tosort, lb_chain.tailq) {
587 if (lb->lb_callsite == lb2->lb_callsite)
588 break;
589 }
590 if (lb2 != NULL) {
591 lb2->lb_counts[event] += lb->lb_counts[event];
592 lb2->lb_times[event] += lb->lb_times[event];
593 } else {
594 TAILQ_INSERT_HEAD(&l->tosort, lb, lb_chain.tailq);
595 l->nbufs++;
596 }
597 }
598
599 /*
600 * Now sort the lists.
601 */
602 while ((l = TAILQ_FIRST(&sortlist)) != NULL) {
603 TAILQ_REMOVE(&sortlist, l, chain);
604
605 /*
606 * Sort the buffers into the per-lock list.
607 */
608 while ((lb = TAILQ_FIRST(&l->tosort)) != NULL) {
609 TAILQ_REMOVE(&l->tosort, lb, lb_chain.tailq);
610
611 lb2 = TAILQ_FIRST(&l->bufs);
612 while (lb2 != NULL) {
613 if (cflag) {
614 if (lb->lb_counts[event] >
615 lb2->lb_counts[event])
616 break;
617 } else if (lb->lb_times[event] >
618 lb2->lb_times[event])
619 break;
620 lb2 = TAILQ_NEXT(lb2, lb_chain.tailq);
621 }
622 if (lb2 == NULL)
623 TAILQ_INSERT_TAIL(&l->bufs, lb,
624 lb_chain.tailq);
625 else
626 TAILQ_INSERT_BEFORE(lb2, lb, lb_chain.tailq);
627 }
628
629 /*
630 * Sort this lock into the per-type list, based on the
631 * totals per lock.
632 */
633 l2 = TAILQ_FIRST(&locklist);
634 while (l2 != NULL) {
635 if (cflag) {
636 if (l->count > l2->count)
637 break;
638 } else if (l->time > l2->time)
639 break;
640 l2 = TAILQ_NEXT(l2, chain);
641 }
642 if (l2 == NULL)
643 TAILQ_INSERT_TAIL(&locklist, l, chain);
644 else
645 TAILQ_INSERT_BEFORE(l2, l, chain);
646 }
647 }
648
649 /*
650 * Display a summary table for one lock type / event type pair.
651 */
652 void
653 display(int mask, const char *name)
654 {
655 lock_t *l;
656 lsbuf_t *lb;
657 double pcscale, metric;
658 char fname[NAME_SIZE];
659 int event;
660
661 event = (mask & LB_EVENT_MASK) - 1;
662 makelists(mask, event);
663
664 if (TAILQ_EMPTY(&locklist))
665 return;
666
667 fprintf(outfp, "\n-- %s\n\n"
668 "Total%% Count Time/ms Lock Caller\n"
669 "------ ------- --------- ---------------------- ------------------------------\n",
670 name);
671
672 /*
673 * Sum up all events for this type of lock + event.
674 */
675 pcscale = 0;
676 TAILQ_FOREACH(l, &locklist, chain) {
677 if (cflag)
678 pcscale += l->count;
679 else
680 pcscale += l->time;
681 displayed++;
682 }
683 if (pcscale == 0)
684 pcscale = 100;
685 else
686 pcscale = (100.0 / pcscale);
687
688 /*
689 * For each lock, print a summary total, followed by a breakdown by
690 * caller.
691 */
692 TAILQ_FOREACH(l, &locklist, chain) {
693 if (cflag)
694 metric = l->count;
695 else
696 metric = l->time;
697 metric *= pcscale;
698
699 if (l->name[0] == '\0')
700 findsym(LOCK_BYADDR, l->name, &l->lock, NULL);
701
702 if (lflag || l->nbufs > 1)
703 fprintf(outfp, "%6.2f %7d %9.2f %-22s <all>\n",
704 metric, (int)(l->count * cscale),
705 l->time * tscale, l->name);
706
707 if (lflag)
708 continue;
709
710 TAILQ_FOREACH(lb, &l->bufs, lb_chain.tailq) {
711 if (cflag)
712 metric = lb->lb_counts[event];
713 else
714 metric = lb->lb_times[event];
715 metric *= pcscale;
716
717 findsym(FUNC_BYADDR, fname, &lb->lb_callsite, NULL);
718 fprintf(outfp, "%6.2f %7d %9.2f %-22s %s\n",
719 metric, (int)(lb->lb_counts[event] * cscale),
720 lb->lb_times[event] * tscale, l->name, fname);
721 }
722 }
723 }
724