Home | History | Annotate | Line # | Download | only in Bin
      1 #!/usr/bin/ksh
      2 #
      3 # dexplorer - DTrace system explorer, runs a collection of scripts.
      4 #             Written using DTrace (Solaris 10 3/05).
      5 #
      6 # This program automatically runs a collection of DTrace scripts to examine
      7 # many areas of the system, and places the output in a meaningful directory
      8 # structure that is tar'd and gzip'd.
      9 #
     10 # $Id: dexplorer,v 1.1.1.1 2015/09/30 22:01:07 christos Exp $
     11 #
     12 # USAGE:	dexplorer [-yDT] [-d outputdir] [-i interval]
     13 #
     14 #                  -q              # quiet mode
     15 #                  -y              # "yes", don't prompt for confirmation
     16 #                  -D              # don't delete output dir
     17 #                  -T              # don't create output tar.gz
     18 #                  -d outputdir    # output directory
     19 #                  -i interval     # interval for each sample
     20 #    eg,
     21 #               dexplorer          # default is 5 second samples
     22 #               dexplorer -y -i30  # no prompting, with 30 second samples
     23 #
     24 # SEE ALSO:	DTraceToolkit
     25 #
     26 # THANKS: David Visser, et all. for the idea and encouragement.
     27 #
     28 # COPYRIGHT: Copyright (c) 2005 Brendan Gregg.
     29 #
     30 # CDDL HEADER START
     31 #
     32 #  The contents of this file are subject to the terms of the
     33 #  Common Development and Distribution License, Version 1.0 only
     34 #  (the "License").  You may not use this file except in compliance
     35 #  with the License.
     36 #
     37 #  You can obtain a copy of the license at Docs/cddl1.txt
     38 #  or http://www.opensolaris.org/os/licensing.
     39 #  See the License for the specific language governing permissions
     40 #  and limitations under the License.
     41 #
     42 # CDDL HEADER END
     43 #
     44 # CODE:
     45 #
     46 #  This is currently a monolithic script, and while it contains only
     47 #  a few dozen straigftforward DTrace scripts I think it's desirable to
     48 #  keep it that way. The scripts themselves have designed to be very
     49 #  generic (eg, switching on all sdt:::), and are aggregations to keep a 
     50 #  limit on the size of the output.
     51 #
     52 # Author: Brendan Gregg  [Sydney, Australia]
     53 #
     54 # 23-Jun-2005	Brendan Gregg	Created this.
     55 # 28-Jun-2005	   "      "	Last update.
     56 
     57 #
     58 #  Default variables
     59 #
     60 interval=5				# time of each sample
     61 verbose=1				# print screen output
     62 prompt=1				# prompt before run
     63 tar=1					# create tar file
     64 delete=1				# delete output dirs
     65 dtrace=/usr/sbin/dtrace			# path to dtrace
     66 root=.					# default output dir
     67 PATH=/usr/bin:/usr/sbin			# safe path
     68 dir=de_`uname -n`_`date +%Y%m%d%H%M`	# OUTPUT FILENAME
     69 samples=20				# max number of tests
     70 current=0				# current sample
     71 
     72 #
     73 #  Process options
     74 #
     75 while getopts d:hi:qyDT name
     76 do
     77 	case $name in
     78 	d)      root=$OPTARG ;;
     79 	i)      interval=$OPTARG ;;
     80 	q)      verbose=0 ;;
     81 	y)      prompt=0 ;;
     82 	D)      delete=0 ;;
     83 	T)      tar=0 ;;
     84 	h|?)    cat <<-END >&2
     85 		USAGE: dexplorer [-qyDT] [-d outputdir] [-i interval]
     86 		 
     87 		        -q               # quiet mode
     88 		        -y               # "yes", don't prompt for confirmation
     89 		        -D               # don't delete output dir
     90 		        -T               # don't create output tar.gz
     91 		        -d outputdir     # output directory
     92 		        -i interval      # interval for each sample
     93 		   eg,
     94 		       dexplorer         # default is 5 second samples
     95 		       dexplorer -y -i30 # no prompting, with 30 second samples
     96 		END
     97 		exit 1
     98 	esac
     99 done
    100 shift $(( OPTIND - 1 ))
    101 
    102 #
    103 #  Confirm path
    104 #
    105 if [[ "$prompt" == "1" ]] ; then
    106 	if [[ "$root" == "." ]]; then
    107 		print "Output dir will be the current dir ($PWD)."
    108 	else
    109 		print "Output dir will be $root"
    110 	fi
    111 	print -n "Hit enter for yes, or type path: "
    112 	read ans junk
    113 	if [[ "$ans" == [yY] || "$ans" == [yY]es ]]; then
    114 		print "WARNING: I didn't ask for \"$ans\"!"
    115 		print "\tI was asking for the path or just enter."
    116 		print "\tignoring \"$ans\"..."
    117 	fi
    118 	if [[ "$ans" != "" ]]; then
    119 		root=$ans
    120 		print "Output is now $root."
    121 	fi
    122 fi
    123 
    124 #
    125 #  Sanity checks
    126 #
    127 if [[ "$interval" == *[a-zA-Z]* ]]; then
    128 	print "ERROR2: Invalid interval $interval.\n"
    129 	print "Please use a number of seconds."
    130 	exit 2
    131 fi
    132 if (( ${#interval} < 1 )); then
    133 	print "ERROR3: Length of interval $interval too short.\n"
    134 	print "Minimum 1 second."
    135 	exit 3
    136 fi
    137 if [[ ! -d "$root" ]]; then
    138 	print "ERROR4: Output directory \"$root\" does not exist.\n"
    139 	print "Perhaps try a mkdir first?"
    140 	print "or use an existing dir, eg \"/tmp\""
    141 	exit 4
    142 fi
    143 if [[ ! -w "$root" ]]; then
    144 	print "ERROR5: Can't write to output directory \"$root\".\n"
    145 	print "Are you logged in as root?"
    146 	print "Perhaps try another directory, eg \"/tmp\""
    147 	exit 5
    148 fi
    149 if [[ `$dtrace -b1k -qn 'BEGIN { trace(pid); exit(0); }'` == "" ]]; then
    150 	print "ERROR6: Unable to run dtrace!\n"
    151 	print "Perhaps this is a permission problem? Try running as root."
    152 	exit 6
    153 fi
    154 
    155 # calculate total time
    156 (( total = interval * samples ))
    157 if (( total > 180 )); then
    158 	(( total = total / 60 ))
    159 	total="$total minutes"
    160 else
    161 	total="$total seconds"
    162 fi
    163 
    164 #
    165 #  Common Functions
    166 #
    167 function decho {
    168 	if (( verbose )); then print "$*"; fi
    169 }
    170 clean="sed /^\$/d"
    171 header='dtrace:::BEGIN {
    172 		printf("%Y, ", walltimestamp);
    173 		printf("%s %s %s %s %s, ", `utsname.sysname, `utsname.nodename,
    174 		    `utsname.release, `utsname.version, `utsname.machine);
    175 		printf("%d secs\n",'$interval');
    176 	}
    177 	profile:::tick-'$interval'sec { exit(0); }
    178 	'
    179 function dstatus {
    180 	if (( verbose )); then 
    181 		(( percent = current * 100 / samples ))
    182 		printf "%3d%% $*\n" $percent
    183 		(( current = current + 1 ))
    184 	fi
    185 }
    186 
    187 ########################################
    188 #  START                               #
    189 ########################################
    190 
    191 #
    192 #  Make dirs
    193 #
    194 err=0
    195 cd $root
    196 (( err = err + $? ))
    197 mkdir $dir
    198 (( err = err + $? ))
    199 cd $dir
    200 (( err = err + $? ))
    201 base1=${PWD##*/}
    202 base2=${dir##*/}
    203 if [[ "$base1" != "$base2" || "$err" != "0" ]]; then
    204 	print "ERROR7: tried to mkdir $dir from $root, but something failed.\n"
    205 	print "Check directories before rerunning."
    206 	exit 7
    207 fi
    208 mkdir Cpu
    209 mkdir Disk
    210 mkdir Mem
    211 mkdir Net
    212 mkdir Proc
    213 mkdir Info
    214 
    215 #
    216 #  Create Log
    217 #
    218 decho "Starting dexplorer ver 0.76."
    219 decho "Sample interval is $interval seconds. Total run is > $total."
    220 ( print "dexplorer ver 0.76\n------------------"
    221 print -n "System: "
    222 uname -a
    223 print -n "Start:  "
    224 date ) > log
    225 
    226 #
    227 #  Capture Standard Info
    228 #
    229 args='pid,ppid,uid,gid,projid,zoneid,pset,pri,nice,'
    230 args=$args'class,vsz,rss,time,pcpu,pmem,args'
    231 uname -a > Info/uname-a		# System
    232 psrinfo -v > Info/psrinfo-v	# CPU
    233 prtconf > Info/prtconf		# Memory (+ devices)
    234 df -k > Info/df-k		# Disk
    235 ifconfig -a > Info/ifconfig-a	# Network
    236 ps -eo $args > Info/ps-o	# Processes
    237 uptime > Info/uptime		# Load
    238 
    239 #
    240 #  Cpu Tests, DTrace
    241 #
    242 
    243 dstatus "Interrupts by CPU..."
    244 $dtrace -qn "$header"'
    245 	sdt:::interrupt-start { @num[cpu] = count(); }
    246 	dtrace:::END
    247 	{ 
    248 		printf("%-16s %16s\n", "CPU", "INTERRUPTS");
    249 		printa("%-16d %@16d\n", @num);
    250 	}
    251 ' | $clean > Cpu/interrupt_by_cpu
    252 
    253 dstatus "Interrupt times..."
    254 $dtrace -qn "$header"'
    255 	sdt:::interrupt-start { self->ts = vtimestamp; }
    256 	sdt:::interrupt-complete
    257 	/self->ts && arg0 != 0/
    258 	{
    259 		this->devi = (struct dev_info *)arg0;
    260 		self->name = this->devi != 0 ?
    261 		    stringof(`devnamesp[this->devi->devi_major].dn_name) : "?";
    262 		this->inst = this->devi != 0 ? this->devi->devi_instance : 0;
    263 		@num[self->name, this->inst] = sum(vtimestamp - self->ts);
    264 		self->name = 0;
    265 	}
    266 	sdt:::interrupt-complete { self->ts = 0; }
    267 	dtrace:::END
    268 	{ 
    269 		printf("%11s    %16s\n", "DEVICE", "TIME (ns)");
    270 		printa("%10s%-3d %@16d\n", @num);
    271 	}
    272 ' | $clean > Cpu/interrupt_time
    273 
    274 dstatus "Dispatcher queue length by CPU..."
    275 $dtrace -qn "$header"'
    276 	profile:::profile-1000
    277 	{
    278 		this->num = curthread->t_cpu->cpu_disp->disp_nrunnable;
    279 		@length[cpu] = lquantize(this->num, 0, 100, 1);
    280 	}
    281 	dtrace:::END { printa(" CPU %d%@d\n", @length); }
    282 ' | $clean > Cpu/dispqlen_by_cpu
    283 
    284 dstatus "Sdt counts..."
    285 $dtrace -qn "$header"'
    286 	sdt:::{ @num[probefunc, probename] = count(); }
    287 	dtrace:::END
    288 	{ 
    289 		printf("%-32s %-32s %10s\n", "FUNC", "NAME", "COUNT");
    290 		printa("%-32s %-32s %@10d\n", @num);
    291 	}
    292 ' | $clean > Cpu/sdt_count
    293 
    294 #
    295 #  Disk Tests, DTrace
    296 #
    297 
    298 dstatus "Pages paged in by process..."
    299 $dtrace -qn "$header"'
    300 	vminfo:::pgpgin { @pg[pid, execname] = sum(arg0); }
    301 	dtrace:::END
    302 	{ 
    303 		printf("%6s %-16s %16s\n", "PID", "CMD", "PAGES");
    304 		printa("%6d %-16s %@16d\n", @pg);
    305 	}
    306 ' | $clean > Disk/pgpgin_by_process
    307 
    308 dstatus "Files opened successfully count..."
    309 $dtrace -qn "$header"'
    310 	syscall::open*:entry { self->file = copyinstr(arg0); self->ok = 1; }
    311 	syscall::open*:return /self->ok && arg0 != -1/ 
    312 	{ 
    313 		@num[self->file] = count();
    314 	}
    315 	syscall::open*:return /self->ok/ { self->file = 0; self->ok = 0; }
    316 	dtrace:::END
    317 	{ 
    318 		printf("%-64s %8s\n", "FILE", "COUNT");
    319 		printa("%-64s %@8d\n", @num);
    320 	}
    321 ' | $clean > Disk/fileopen_count
    322 
    323 dstatus "Disk I/O size distribution by process..."
    324 $dtrace -qn "$header"'
    325 	io:::start { @size[pid, execname] = quantize(args[0]->b_bcount); }
    326 ' | $clean > Disk/sizedist_by_process
    327 
    328 #
    329 #  Mem Tests, DTrace
    330 #
    331 
    332 dstatus "Minor faults by process..."
    333 $dtrace -qn "$header"'
    334 	vminfo:::as_fault { @mem[pid, execname] = sum(arg0); }
    335 	dtrace:::END
    336 	{ 
    337 		printf("%6s %-16s %16s\n", "PID", "CMD", "MINFAULTS");
    338 		printa("%6d %-16s %@16d\n", @mem);
    339 	}
    340 ' | $clean > Mem/minf_by_process
    341 
    342 
    343 dstatus "Vminfo data by process..."
    344 $dtrace -qn "$header"'
    345 	vminfo::: { @data[pid, execname, probename] = sum(arg0); }
    346 	dtrace:::END
    347 	{ 
    348 		printf("%6s %-16s %-16s %16s\n",
    349 		    "PID", "CMD", "STATISTIC", "VALUE");
    350 		printa("%6d %-16s %-16s %@16d\n", @data);
    351 	}
    352 ' | $clean > Mem/vminfo_by_process
    353 
    354 #
    355 #  Net Tests, DTrace
    356 #
    357 
    358 dstatus "Mib data by mib statistic..."
    359 $dtrace -qn "$header"'
    360 	mib::: { @data[probename] = sum(arg0); }
    361 	dtrace:::END
    362 	{ 
    363 		printf("%-32s %16s\n", "STATISTIC", "VALUE");
    364 		printa("%-32s %@16d\n", @data);
    365 	}
    366 ' | $clean > Net/mib_data
    367 
    368 dstatus "TCP write bytes by process..."
    369 $dtrace -qn "$header"'
    370 	fbt:ip:tcp_output:entry
    371 	{
    372 		this->size = msgdsize(args[1]);
    373 		@size[pid, execname] = sum(this->size);
    374 	}
    375 	dtrace:::END
    376 	{ 
    377 		printf("%6s %-16s %12s\n", "PID", "CMD", "BYTES");
    378 		printa("%6d %-16s %@12d\n", @size);
    379 	}
    380 ' | $clean > Net/tcpw_by_process
    381 
    382 #
    383 #  Proc Tests, DTrace
    384 #
    385 
    386 dstatus "Sample process @ 1000 Hz..."
    387 $dtrace -qn "$header"'
    388 	profile:::profile-1000
    389 	{
    390 		@num[pid, curpsinfo->pr_psargs] = count();
    391 	}
    392 	dtrace:::END
    393 	{ 
    394 		printf("%6s %12s %s\n", "PID", "SAMPLES", "ARGS");
    395 		printa("%6d %@12d %S\n", @num);
    396 	}
    397 ' | $clean > Proc/sample_process
    398 
    399 dstatus "Syscall count by process..."
    400 $dtrace -qn "$header"'
    401 	syscall:::entry { @num[pid, execname, probefunc] = count(); }
    402 	dtrace:::END
    403 	{ 
    404 		printf("%6s %-24s %-24s %8s\n",
    405 		    "PID", "CMD", "SYSCALL", "COUNT");
    406 		printa("%6d %-24s %-24s %@8d\n", @num);
    407 	}
    408 ' | $clean > Proc/syscall_by_process
    409 
    410 dstatus "Syscall count by syscall..."
    411 $dtrace -qn "$header"'
    412 	syscall:::entry { @num[probefunc] = count(); }
    413 	dtrace:::END
    414 	{ 
    415 		printf("%-32s %16s\n", "SYSCALL", "COUNT");
    416 		printa("%-32s %@16d\n", @num);
    417 	}
    418 ' | $clean > Proc/syscall_count
    419 
    420 dstatus "Read bytes by process..."
    421 $dtrace -qn "$header"'
    422 	sysinfo:::readch { @bytes[pid, execname] = sum(arg0); }
    423 	dtrace:::END
    424 	{ 
    425 		printf("%6s %-16s %16s\n", "PID", "CMD", "BYTES");
    426 		printa("%6d %-16s %@16d\n", @bytes);
    427 	}
    428 ' | $clean > Proc/readb_by_process
    429 
    430 dstatus "Write bytes by process..."
    431 $dtrace -qn "$header"'
    432 	sysinfo:::writech { @bytes[pid, execname] = sum(arg0); }
    433 	dtrace:::END
    434 	{ 
    435 		printf("%6s %-16s %16s\n", "PID", "CMD", "BYTES");
    436 		printa("%6d %-16s %@16d\n", @bytes);
    437 	}
    438 ' | $clean > Proc/writeb_by_process
    439 
    440 dstatus "Sysinfo counts by process..."
    441 $dtrace -qn "$header"'
    442 	sysinfo::: { @num[pid, execname, probename] = sum(arg0); }
    443 	dtrace:::END
    444 	{ 
    445 		printf("%6s %-16s %-16s %16s\n", 
    446 		    "PID", "CMD", "STATISTIC", "COUNT");
    447 		printa("%6d %-16s %-16s %@16d\n", @num);
    448 	}
    449 ' | $clean > Proc/sysinfo_by_process
    450 
    451 dstatus "New process counts with arguments..."
    452 $dtrace -qn "$header"'
    453 	proc:::exec-success
    454 	{
    455 		@num[pid, ppid, curpsinfo->pr_psargs] = count();
    456 	}
    457 	dtrace:::END
    458 	{ 
    459 		printf("%6s %6s %8s %s\n", "PID", "PPID", "COUNT", "ARGS");
    460 		printa("%6d %6d %@8d %S\n", @num);
    461 	}
    462 ' | $clean > Proc/newprocess_count
    463 
    464 dstatus "Signal counts..."
    465 $dtrace -qn "$header"'
    466 	proc:::signal-send { 
    467 		@num[execname,args[2],stringof(args[1]->pr_fname)] = count();
    468 	}
    469 	dtrace:::END
    470 	{ 
    471 		printf("%-16s %-8s %-16s %8s\n",
    472 		    "FROM", "SIG", "TO", "COUNT");
    473 		printa("%-16s %-8d %-16s %@8d\n", @num);
    474 	}
    475 ' | $clean > Proc/signal_count
    476 
    477 dstatus "Syscall error counts..."
    478 $dtrace -qn "$header"'
    479 	syscall:::return /(int)arg0 == -1/
    480 	{
    481 		@num[pid, execname, probefunc, errno] = count();
    482 	}
    483 	dtrace:::END
    484 	{ 
    485 		printf("%6s %-16s %-32s %-6s %8s\n",
    486 		    "PID", "CMD", "SYSCALL", "ERRNO", "COUNT");
    487 		printa("%6d %-16s %-32s %-6d %@8d\n", @num);
    488 	}
    489 ' | $clean > Proc/syscall_errors
    490 
    491 
    492 ###########
    493 #  Done
    494 #
    495 ( print -n "End:    "
    496 date ) >> log
    497 decho "100% Done."
    498 if (( tar )); then
    499 	cd ..
    500 	tar cf $dir.tar $dir
    501 	gzip $dir.tar
    502 	decho "File is $dir.tar.gz"
    503 fi
    504 if (( delete && tar )); then
    505 	cd $dir
    506 	# this could be all an "rm -r $dir", but since it will be run 
    507 	# as root on production servers - lets be analy cautious,
    508 	rm Cpu/interrupt_by_cpu
    509 	rm Cpu/interrupt_time
    510 	rm Cpu/dispqlen_by_cpu
    511 	rm Cpu/sdt_count
    512 	rm Disk/pgpgin_by_process
    513 	rm Disk/fileopen_count
    514 	rm Disk/sizedist_by_process
    515 	rm Mem/minf_by_process
    516 	rm Mem/vminfo_by_process
    517 	rm Net/mib_data
    518 	rm Net/tcpw_by_process
    519 	rm Proc/sample_process
    520 	rm Proc/syscall_by_process
    521 	rm Proc/syscall_count
    522 	rm Proc/readb_by_process
    523 	rm Proc/writeb_by_process
    524 	rm Proc/sysinfo_by_process
    525 	rm Proc/newprocess_count
    526 	rm Proc/signal_count
    527 	rm Proc/syscall_errors
    528 	rmdir Cpu
    529 	rmdir Disk
    530 	rmdir Mem
    531 	rmdir Net
    532 	rmdir Proc
    533 	rm Info/uname-a
    534 	rm Info/psrinfo-v
    535 	rm Info/prtconf
    536 	rm Info/df-k
    537 	rm Info/ifconfig-a
    538 	rm Info/ps-o
    539 	rm Info/uptime
    540 	rmdir Info
    541 	rm log
    542 	cd ..
    543 	rmdir $dir
    544 else
    545 	decho "Directory is $dir"
    546 fi
    547 
    548