1 #!/usr/bin/ksh 2 # 3 # dexplorer - DTrace system explorer, runs a collection of scripts. 4 # Written using DTrace (Solaris 10 3/05). 5 # 6 # This program automatically runs a collection of DTrace scripts to examine 7 # many areas of the system, and places the output in a meaningful directory 8 # structure that is tar'd and gzip'd. 9 # 10 # $Id: dexplorer,v 1.1.1.1 2015/09/30 22:01:07 christos Exp $ 11 # 12 # USAGE: dexplorer [-yDT] [-d outputdir] [-i interval] 13 # 14 # -q # quiet mode 15 # -y # "yes", don't prompt for confirmation 16 # -D # don't delete output dir 17 # -T # don't create output tar.gz 18 # -d outputdir # output directory 19 # -i interval # interval for each sample 20 # eg, 21 # dexplorer # default is 5 second samples 22 # dexplorer -y -i30 # no prompting, with 30 second samples 23 # 24 # SEE ALSO: DTraceToolkit 25 # 26 # THANKS: David Visser, et all. for the idea and encouragement. 27 # 28 # COPYRIGHT: Copyright (c) 2005 Brendan Gregg. 29 # 30 # CDDL HEADER START 31 # 32 # The contents of this file are subject to the terms of the 33 # Common Development and Distribution License, Version 1.0 only 34 # (the "License"). You may not use this file except in compliance 35 # with the License. 36 # 37 # You can obtain a copy of the license at Docs/cddl1.txt 38 # or http://www.opensolaris.org/os/licensing. 39 # See the License for the specific language governing permissions 40 # and limitations under the License. 41 # 42 # CDDL HEADER END 43 # 44 # CODE: 45 # 46 # This is currently a monolithic script, and while it contains only 47 # a few dozen straigftforward DTrace scripts I think it's desirable to 48 # keep it that way. The scripts themselves have designed to be very 49 # generic (eg, switching on all sdt:::), and are aggregations to keep a 50 # limit on the size of the output. 51 # 52 # Author: Brendan Gregg [Sydney, Australia] 53 # 54 # 23-Jun-2005 Brendan Gregg Created this. 55 # 28-Jun-2005 " " Last update. 56 57 # 58 # Default variables 59 # 60 interval=5 # time of each sample 61 verbose=1 # print screen output 62 prompt=1 # prompt before run 63 tar=1 # create tar file 64 delete=1 # delete output dirs 65 dtrace=/usr/sbin/dtrace # path to dtrace 66 root=. # default output dir 67 PATH=/usr/bin:/usr/sbin # safe path 68 dir=de_`uname -n`_`date +%Y%m%d%H%M` # OUTPUT FILENAME 69 samples=20 # max number of tests 70 current=0 # current sample 71 72 # 73 # Process options 74 # 75 while getopts d:hi:qyDT name 76 do 77 case $name in 78 d) root=$OPTARG ;; 79 i) interval=$OPTARG ;; 80 q) verbose=0 ;; 81 y) prompt=0 ;; 82 D) delete=0 ;; 83 T) tar=0 ;; 84 h|?) cat <<-END >&2 85 USAGE: dexplorer [-qyDT] [-d outputdir] [-i interval] 86 87 -q # quiet mode 88 -y # "yes", don't prompt for confirmation 89 -D # don't delete output dir 90 -T # don't create output tar.gz 91 -d outputdir # output directory 92 -i interval # interval for each sample 93 eg, 94 dexplorer # default is 5 second samples 95 dexplorer -y -i30 # no prompting, with 30 second samples 96 END 97 exit 1 98 esac 99 done 100 shift $(( OPTIND - 1 )) 101 102 # 103 # Confirm path 104 # 105 if [[ "$prompt" == "1" ]] ; then 106 if [[ "$root" == "." ]]; then 107 print "Output dir will be the current dir ($PWD)." 108 else 109 print "Output dir will be $root" 110 fi 111 print -n "Hit enter for yes, or type path: " 112 read ans junk 113 if [[ "$ans" == [yY] || "$ans" == [yY]es ]]; then 114 print "WARNING: I didn't ask for \"$ans\"!" 115 print "\tI was asking for the path or just enter." 116 print "\tignoring \"$ans\"..." 117 fi 118 if [[ "$ans" != "" ]]; then 119 root=$ans 120 print "Output is now $root." 121 fi 122 fi 123 124 # 125 # Sanity checks 126 # 127 if [[ "$interval" == *[a-zA-Z]* ]]; then 128 print "ERROR2: Invalid interval $interval.\n" 129 print "Please use a number of seconds." 130 exit 2 131 fi 132 if (( ${#interval} < 1 )); then 133 print "ERROR3: Length of interval $interval too short.\n" 134 print "Minimum 1 second." 135 exit 3 136 fi 137 if [[ ! -d "$root" ]]; then 138 print "ERROR4: Output directory \"$root\" does not exist.\n" 139 print "Perhaps try a mkdir first?" 140 print "or use an existing dir, eg \"/tmp\"" 141 exit 4 142 fi 143 if [[ ! -w "$root" ]]; then 144 print "ERROR5: Can't write to output directory \"$root\".\n" 145 print "Are you logged in as root?" 146 print "Perhaps try another directory, eg \"/tmp\"" 147 exit 5 148 fi 149 if [[ `$dtrace -b1k -qn 'BEGIN { trace(pid); exit(0); }'` == "" ]]; then 150 print "ERROR6: Unable to run dtrace!\n" 151 print "Perhaps this is a permission problem? Try running as root." 152 exit 6 153 fi 154 155 # calculate total time 156 (( total = interval * samples )) 157 if (( total > 180 )); then 158 (( total = total / 60 )) 159 total="$total minutes" 160 else 161 total="$total seconds" 162 fi 163 164 # 165 # Common Functions 166 # 167 function decho { 168 if (( verbose )); then print "$*"; fi 169 } 170 clean="sed /^\$/d" 171 header='dtrace:::BEGIN { 172 printf("%Y, ", walltimestamp); 173 printf("%s %s %s %s %s, ", `utsname.sysname, `utsname.nodename, 174 `utsname.release, `utsname.version, `utsname.machine); 175 printf("%d secs\n",'$interval'); 176 } 177 profile:::tick-'$interval'sec { exit(0); } 178 ' 179 function dstatus { 180 if (( verbose )); then 181 (( percent = current * 100 / samples )) 182 printf "%3d%% $*\n" $percent 183 (( current = current + 1 )) 184 fi 185 } 186 187 ######################################## 188 # START # 189 ######################################## 190 191 # 192 # Make dirs 193 # 194 err=0 195 cd $root 196 (( err = err + $? )) 197 mkdir $dir 198 (( err = err + $? )) 199 cd $dir 200 (( err = err + $? )) 201 base1=${PWD##*/} 202 base2=${dir##*/} 203 if [[ "$base1" != "$base2" || "$err" != "0" ]]; then 204 print "ERROR7: tried to mkdir $dir from $root, but something failed.\n" 205 print "Check directories before rerunning." 206 exit 7 207 fi 208 mkdir Cpu 209 mkdir Disk 210 mkdir Mem 211 mkdir Net 212 mkdir Proc 213 mkdir Info 214 215 # 216 # Create Log 217 # 218 decho "Starting dexplorer ver 0.76." 219 decho "Sample interval is $interval seconds. Total run is > $total." 220 ( print "dexplorer ver 0.76\n------------------" 221 print -n "System: " 222 uname -a 223 print -n "Start: " 224 date ) > log 225 226 # 227 # Capture Standard Info 228 # 229 args='pid,ppid,uid,gid,projid,zoneid,pset,pri,nice,' 230 args=$args'class,vsz,rss,time,pcpu,pmem,args' 231 uname -a > Info/uname-a # System 232 psrinfo -v > Info/psrinfo-v # CPU 233 prtconf > Info/prtconf # Memory (+ devices) 234 df -k > Info/df-k # Disk 235 ifconfig -a > Info/ifconfig-a # Network 236 ps -eo $args > Info/ps-o # Processes 237 uptime > Info/uptime # Load 238 239 # 240 # Cpu Tests, DTrace 241 # 242 243 dstatus "Interrupts by CPU..." 244 $dtrace -qn "$header"' 245 sdt:::interrupt-start { @num[cpu] = count(); } 246 dtrace:::END 247 { 248 printf("%-16s %16s\n", "CPU", "INTERRUPTS"); 249 printa("%-16d %@16d\n", @num); 250 } 251 ' | $clean > Cpu/interrupt_by_cpu 252 253 dstatus "Interrupt times..." 254 $dtrace -qn "$header"' 255 sdt:::interrupt-start { self->ts = vtimestamp; } 256 sdt:::interrupt-complete 257 /self->ts && arg0 != 0/ 258 { 259 this->devi = (struct dev_info *)arg0; 260 self->name = this->devi != 0 ? 261 stringof(`devnamesp[this->devi->devi_major].dn_name) : "?"; 262 this->inst = this->devi != 0 ? this->devi->devi_instance : 0; 263 @num[self->name, this->inst] = sum(vtimestamp - self->ts); 264 self->name = 0; 265 } 266 sdt:::interrupt-complete { self->ts = 0; } 267 dtrace:::END 268 { 269 printf("%11s %16s\n", "DEVICE", "TIME (ns)"); 270 printa("%10s%-3d %@16d\n", @num); 271 } 272 ' | $clean > Cpu/interrupt_time 273 274 dstatus "Dispatcher queue length by CPU..." 275 $dtrace -qn "$header"' 276 profile:::profile-1000 277 { 278 this->num = curthread->t_cpu->cpu_disp->disp_nrunnable; 279 @length[cpu] = lquantize(this->num, 0, 100, 1); 280 } 281 dtrace:::END { printa(" CPU %d%@d\n", @length); } 282 ' | $clean > Cpu/dispqlen_by_cpu 283 284 dstatus "Sdt counts..." 285 $dtrace -qn "$header"' 286 sdt:::{ @num[probefunc, probename] = count(); } 287 dtrace:::END 288 { 289 printf("%-32s %-32s %10s\n", "FUNC", "NAME", "COUNT"); 290 printa("%-32s %-32s %@10d\n", @num); 291 } 292 ' | $clean > Cpu/sdt_count 293 294 # 295 # Disk Tests, DTrace 296 # 297 298 dstatus "Pages paged in by process..." 299 $dtrace -qn "$header"' 300 vminfo:::pgpgin { @pg[pid, execname] = sum(arg0); } 301 dtrace:::END 302 { 303 printf("%6s %-16s %16s\n", "PID", "CMD", "PAGES"); 304 printa("%6d %-16s %@16d\n", @pg); 305 } 306 ' | $clean > Disk/pgpgin_by_process 307 308 dstatus "Files opened successfully count..." 309 $dtrace -qn "$header"' 310 syscall::open*:entry { self->file = copyinstr(arg0); self->ok = 1; } 311 syscall::open*:return /self->ok && arg0 != -1/ 312 { 313 @num[self->file] = count(); 314 } 315 syscall::open*:return /self->ok/ { self->file = 0; self->ok = 0; } 316 dtrace:::END 317 { 318 printf("%-64s %8s\n", "FILE", "COUNT"); 319 printa("%-64s %@8d\n", @num); 320 } 321 ' | $clean > Disk/fileopen_count 322 323 dstatus "Disk I/O size distribution by process..." 324 $dtrace -qn "$header"' 325 io:::start { @size[pid, execname] = quantize(args[0]->b_bcount); } 326 ' | $clean > Disk/sizedist_by_process 327 328 # 329 # Mem Tests, DTrace 330 # 331 332 dstatus "Minor faults by process..." 333 $dtrace -qn "$header"' 334 vminfo:::as_fault { @mem[pid, execname] = sum(arg0); } 335 dtrace:::END 336 { 337 printf("%6s %-16s %16s\n", "PID", "CMD", "MINFAULTS"); 338 printa("%6d %-16s %@16d\n", @mem); 339 } 340 ' | $clean > Mem/minf_by_process 341 342 343 dstatus "Vminfo data by process..." 344 $dtrace -qn "$header"' 345 vminfo::: { @data[pid, execname, probename] = sum(arg0); } 346 dtrace:::END 347 { 348 printf("%6s %-16s %-16s %16s\n", 349 "PID", "CMD", "STATISTIC", "VALUE"); 350 printa("%6d %-16s %-16s %@16d\n", @data); 351 } 352 ' | $clean > Mem/vminfo_by_process 353 354 # 355 # Net Tests, DTrace 356 # 357 358 dstatus "Mib data by mib statistic..." 359 $dtrace -qn "$header"' 360 mib::: { @data[probename] = sum(arg0); } 361 dtrace:::END 362 { 363 printf("%-32s %16s\n", "STATISTIC", "VALUE"); 364 printa("%-32s %@16d\n", @data); 365 } 366 ' | $clean > Net/mib_data 367 368 dstatus "TCP write bytes by process..." 369 $dtrace -qn "$header"' 370 fbt:ip:tcp_output:entry 371 { 372 this->size = msgdsize(args[1]); 373 @size[pid, execname] = sum(this->size); 374 } 375 dtrace:::END 376 { 377 printf("%6s %-16s %12s\n", "PID", "CMD", "BYTES"); 378 printa("%6d %-16s %@12d\n", @size); 379 } 380 ' | $clean > Net/tcpw_by_process 381 382 # 383 # Proc Tests, DTrace 384 # 385 386 dstatus "Sample process @ 1000 Hz..." 387 $dtrace -qn "$header"' 388 profile:::profile-1000 389 { 390 @num[pid, curpsinfo->pr_psargs] = count(); 391 } 392 dtrace:::END 393 { 394 printf("%6s %12s %s\n", "PID", "SAMPLES", "ARGS"); 395 printa("%6d %@12d %S\n", @num); 396 } 397 ' | $clean > Proc/sample_process 398 399 dstatus "Syscall count by process..." 400 $dtrace -qn "$header"' 401 syscall:::entry { @num[pid, execname, probefunc] = count(); } 402 dtrace:::END 403 { 404 printf("%6s %-24s %-24s %8s\n", 405 "PID", "CMD", "SYSCALL", "COUNT"); 406 printa("%6d %-24s %-24s %@8d\n", @num); 407 } 408 ' | $clean > Proc/syscall_by_process 409 410 dstatus "Syscall count by syscall..." 411 $dtrace -qn "$header"' 412 syscall:::entry { @num[probefunc] = count(); } 413 dtrace:::END 414 { 415 printf("%-32s %16s\n", "SYSCALL", "COUNT"); 416 printa("%-32s %@16d\n", @num); 417 } 418 ' | $clean > Proc/syscall_count 419 420 dstatus "Read bytes by process..." 421 $dtrace -qn "$header"' 422 sysinfo:::readch { @bytes[pid, execname] = sum(arg0); } 423 dtrace:::END 424 { 425 printf("%6s %-16s %16s\n", "PID", "CMD", "BYTES"); 426 printa("%6d %-16s %@16d\n", @bytes); 427 } 428 ' | $clean > Proc/readb_by_process 429 430 dstatus "Write bytes by process..." 431 $dtrace -qn "$header"' 432 sysinfo:::writech { @bytes[pid, execname] = sum(arg0); } 433 dtrace:::END 434 { 435 printf("%6s %-16s %16s\n", "PID", "CMD", "BYTES"); 436 printa("%6d %-16s %@16d\n", @bytes); 437 } 438 ' | $clean > Proc/writeb_by_process 439 440 dstatus "Sysinfo counts by process..." 441 $dtrace -qn "$header"' 442 sysinfo::: { @num[pid, execname, probename] = sum(arg0); } 443 dtrace:::END 444 { 445 printf("%6s %-16s %-16s %16s\n", 446 "PID", "CMD", "STATISTIC", "COUNT"); 447 printa("%6d %-16s %-16s %@16d\n", @num); 448 } 449 ' | $clean > Proc/sysinfo_by_process 450 451 dstatus "New process counts with arguments..." 452 $dtrace -qn "$header"' 453 proc:::exec-success 454 { 455 @num[pid, ppid, curpsinfo->pr_psargs] = count(); 456 } 457 dtrace:::END 458 { 459 printf("%6s %6s %8s %s\n", "PID", "PPID", "COUNT", "ARGS"); 460 printa("%6d %6d %@8d %S\n", @num); 461 } 462 ' | $clean > Proc/newprocess_count 463 464 dstatus "Signal counts..." 465 $dtrace -qn "$header"' 466 proc:::signal-send { 467 @num[execname,args[2],stringof(args[1]->pr_fname)] = count(); 468 } 469 dtrace:::END 470 { 471 printf("%-16s %-8s %-16s %8s\n", 472 "FROM", "SIG", "TO", "COUNT"); 473 printa("%-16s %-8d %-16s %@8d\n", @num); 474 } 475 ' | $clean > Proc/signal_count 476 477 dstatus "Syscall error counts..." 478 $dtrace -qn "$header"' 479 syscall:::return /(int)arg0 == -1/ 480 { 481 @num[pid, execname, probefunc, errno] = count(); 482 } 483 dtrace:::END 484 { 485 printf("%6s %-16s %-32s %-6s %8s\n", 486 "PID", "CMD", "SYSCALL", "ERRNO", "COUNT"); 487 printa("%6d %-16s %-32s %-6d %@8d\n", @num); 488 } 489 ' | $clean > Proc/syscall_errors 490 491 492 ########### 493 # Done 494 # 495 ( print -n "End: " 496 date ) >> log 497 decho "100% Done." 498 if (( tar )); then 499 cd .. 500 tar cf $dir.tar $dir 501 gzip $dir.tar 502 decho "File is $dir.tar.gz" 503 fi 504 if (( delete && tar )); then 505 cd $dir 506 # this could be all an "rm -r $dir", but since it will be run 507 # as root on production servers - lets be analy cautious, 508 rm Cpu/interrupt_by_cpu 509 rm Cpu/interrupt_time 510 rm Cpu/dispqlen_by_cpu 511 rm Cpu/sdt_count 512 rm Disk/pgpgin_by_process 513 rm Disk/fileopen_count 514 rm Disk/sizedist_by_process 515 rm Mem/minf_by_process 516 rm Mem/vminfo_by_process 517 rm Net/mib_data 518 rm Net/tcpw_by_process 519 rm Proc/sample_process 520 rm Proc/syscall_by_process 521 rm Proc/syscall_count 522 rm Proc/readb_by_process 523 rm Proc/writeb_by_process 524 rm Proc/sysinfo_by_process 525 rm Proc/newprocess_count 526 rm Proc/signal_count 527 rm Proc/syscall_errors 528 rmdir Cpu 529 rmdir Disk 530 rmdir Mem 531 rmdir Net 532 rmdir Proc 533 rm Info/uname-a 534 rm Info/psrinfo-v 535 rm Info/prtconf 536 rm Info/df-k 537 rm Info/ifconfig-a 538 rm Info/ps-o 539 rm Info/uptime 540 rmdir Info 541 rm log 542 cd .. 543 rmdir $dir 544 else 545 decho "Directory is $dir" 546 fi 547 548