1 #!/usr/bin/ksh 2 # 3 # iotop - display top disk I/O events by process. 4 # Written using DTrace (Solaris 10 3/05). 5 # 6 # This is measuring disk events that have made it past system caches. 7 # 8 # $Id: iotop,v 1.1.1.1 2015/09/30 22:01:06 christos Exp $ 9 # 10 # USAGE: iotop [-C] [-D|-o|-P] [-j|-Z] [-d device] [-f filename] 11 # [-m mount_point] [-t top] [interval [count]] 12 # 13 # iotop # default output, 5 second intervals 14 # 15 # -C # don't clear the screen 16 # -D # print delta times, elapsed, us 17 # -j # print project ID 18 # -o # print disk delta times, us 19 # -P # print %I/O (disk delta times) 20 # -Z # print zone ID 21 # -d device # instance name to snoop (eg, dad0) 22 # -f filename # full pathname of file to snoop 23 # -m mount_point # this FS only (will skip raw events) 24 # -t top # print top number only 25 # eg, 26 # iotop 1 # 1 second samples 27 # iotop -C # don't clear the screen 28 # iotop -P # print %I/O (time based) 29 # iotop -j # print project IDs 30 # iotop -Z # print zone IDs 31 # iotop -t 20 # print top 20 lines only 32 # iotop -C 5 12 # print 12 x 5 second samples 33 # 34 # FIELDS: 35 # UID user ID 36 # PID process ID 37 # PPID parent process ID 38 # PROJ project ID 39 # ZONE zone ID 40 # CMD process command name 41 # DEVICE device name 42 # MAJ device major number 43 # MIN device minor number 44 # D direction, Read or Write 45 # BYTES total size of operations, bytes 46 # ELAPSED total elapsed from request to completion, us 47 # DISKTIME total time for disk to complete request, us 48 # %I/O percent disk I/O, based on time (DISKTIME) 49 # load 1 min load average 50 # disk_r total disk read Kbytes for sample 51 # disk_w total disk write Kbytes for sample 52 # 53 # NOTE: 54 # * There are two different delta times reported. -D prints the 55 # elapsed time from the disk request (strategy) to the disk completion 56 # (iodone); -o prints the time for the disk to complete that event 57 # since it's last event (time between iodones), or, the time to the 58 # strategy if the disk had been idle. 59 # * The %I/O value can exceed 100%. It represents how busy a process is 60 # making the disks, in terms of a single disk. A value of 200% could 61 # mean 2 disks are busy at 100%, or 4 disks at 50%... 62 # 63 # SEE ALSO: iosnoop 64 # BigAdmin: DTrace, http://www.sun.com/bigadmin/content/dtrace 65 # Solaris Dynamic Tracing Guide, http://docs.sun.com 66 # DTrace Tools, http://www.brendangregg.com/dtrace.html 67 # 68 # INSPIRATION: top(1) by William LeFebvre 69 # 70 # COPYRIGHT: Copyright (c) 2005, 2006 Brendan Gregg. 71 # 72 # CDDL HEADER START 73 # 74 # The contents of this file are subject to the terms of the 75 # Common Development and Distribution License, Version 1.0 only 76 # (the "License"). You may not use this file except in compliance 77 # with the License. 78 # 79 # You can obtain a copy of the license at Docs/cddl1.txt 80 # or http://www.opensolaris.org/os/licensing. 81 # See the License for the specific language governing permissions 82 # and limitations under the License. 83 # 84 # CDDL HEADER END 85 # 86 # KNOWN BUGS: 87 # - This can print errors while running on servers with Veritas volumes. 88 # 89 # Author: Brendan Gregg [Sydney, Australia] 90 # 91 # 15-Jul-2005 Brendan Gregg Created this. 92 # 20-Apr-2006 " " Last update. 93 # 94 95 96 ############################## 97 # --- Process Arguments --- 98 # 99 100 ### default variables 101 opt_device=0; opt_file=0; opt_mount=0; opt_clear=1; opt_proj=0; opt_zone=0 102 opt_percent=0; opt_def=1; opt_bytes=1; filter=0; device=.; filename=.; mount=. 103 opt_top=0; opt_elapsed=0; opt_dtime=0; interval=5; count=-1; top=0 104 105 ### process options 106 while getopts CDd:f:hjm:oPt:Z name 107 do 108 case $name in 109 C) opt_clear=0 ;; 110 D) opt_elapsed=1; opt_bytes=0 ;; 111 d) opt_device=1; device=$OPTARG ;; 112 f) opt_file=1; filename=$OPTARG ;; 113 j) opt_proj=1; opt_def=0 ;; 114 m) opt_mount=1; mount=$OPTARG ;; 115 o) opt_dtime=1; opt_bytes=0 ;; 116 P) opt_percent=1; opt_dtime=1; opt_bytes=0 ;; 117 t) opt_top=1; top=$OPTARG ;; 118 Z) opt_zone=1; opt_def=0 ;; 119 h|?) cat <<-END >&2 120 USAGE: iotop [-C] [-D|-o|-P] [-j|-Z] [-d device] [-f filename] 121 [-m mount_point] [-t top] [interval [count]] 122 123 -C # don't clear the screen 124 -D # print delta times, elapsed, us 125 -j # print project ID 126 -o # print disk delta times, us 127 -P # print %I/O (disk delta times) 128 -Z # print zone ID 129 -d device # instance name to snoop 130 -f filename # snoop this file only 131 -m mount_point # this FS only 132 -t top # print top number only 133 eg, 134 iotop # default output, 5 second samples 135 iotop 1 # 1 second samples 136 iotop -P # print %I/O (time based) 137 iotop -m / # snoop events on filesystem / only 138 iotop -t 20 # print top 20 lines only 139 iotop -C 5 12 # print 12 x 5 second samples 140 END 141 exit 1 142 esac 143 done 144 145 shift $(( $OPTIND - 1 )) 146 147 ### option logic 148 if [[ "$1" > 0 ]]; then 149 interval=$1; shift 150 fi 151 if [[ "$1" > 0 ]]; then 152 count=$1; shift 153 fi 154 if (( opt_proj && opt_zone )); then 155 opt_proj=0 156 fi 157 if (( opt_elapsed && opt_dtime )); then 158 opt_elapsed=0 159 fi 160 if (( opt_device || opt_mount || opt_file )); then 161 filter=1 162 fi 163 if (( opt_clear )); then 164 clearstr=`clear` 165 else 166 clearstr=. 167 fi 168 169 170 171 ################################# 172 # --- Main Program, DTrace --- 173 # 174 /usr/sbin/dtrace -n ' 175 /* 176 * Command line arguments 177 */ 178 inline int OPT_def = '$opt_def'; 179 inline int OPT_proj = '$opt_proj'; 180 inline int OPT_zone = '$opt_zone'; 181 inline int OPT_clear = '$opt_clear'; 182 inline int OPT_bytes = '$opt_bytes'; 183 inline int OPT_elapsed = '$opt_elapsed'; 184 inline int OPT_dtime = '$opt_dtime'; 185 inline int OPT_percent = '$opt_percent'; 186 inline int OPT_device = '$opt_device'; 187 inline int OPT_mount = '$opt_mount'; 188 inline int OPT_file = '$opt_file'; 189 inline int OPT_top = '$opt_top'; 190 inline int INTERVAL = '$interval'; 191 inline int COUNTER = '$count'; 192 inline int FILTER = '$filter'; 193 inline int TOP = '$top'; 194 inline string DEVICE = "'$device'"; 195 inline string FILENAME = "'$filename'"; 196 inline string MOUNT = "'$mount'"; 197 inline string CLEAR = "'$clearstr'"; 198 199 #pragma D option quiet 200 201 /* boost the following if you get "dynamic variable drops" */ 202 #pragma D option dynvarsize=8m 203 204 /* 205 * Print header 206 */ 207 dtrace:::BEGIN 208 { 209 last_event[""] = 0; 210 211 /* starting values */ 212 counts = COUNTER; 213 secs = INTERVAL; 214 disk_r = 0; 215 disk_w = 0; 216 217 printf("Tracing... Please wait.\n"); 218 } 219 220 /* 221 * Check event is being traced 222 */ 223 io:genunix::start, 224 io:genunix::done 225 { 226 /* default is to trace unless filtering, */ 227 this->ok = FILTER ? 0 : 1; 228 229 /* check each filter, */ 230 (OPT_device == 1 && DEVICE == args[1]->dev_statname)? this->ok = 1 : 1; 231 (OPT_file == 1 && FILENAME == args[2]->fi_pathname) ? this->ok = 1 : 1; 232 (OPT_mount == 1 && MOUNT == args[2]->fi_mount) ? this->ok = 1 : 1; 233 } 234 235 /* 236 * Reset last_event for disk idle -> start 237 * this prevents idle time being counted as disk time. 238 */ 239 io:genunix::start 240 /! pending[args[1]->dev_statname]/ 241 { 242 /* save last disk event */ 243 last_event[args[1]->dev_statname] = timestamp; 244 } 245 246 /* 247 * Store entry details 248 */ 249 io:genunix::start 250 /this->ok/ 251 { 252 /* these are used as a unique disk event key, */ 253 this->dev = args[0]->b_edev; 254 this->blk = args[0]->b_blkno; 255 256 /* save disk event details, */ 257 start_uid[this->dev, this->blk] = uid; 258 start_pid[this->dev, this->blk] = pid; 259 start_ppid[this->dev, this->blk] = ppid; 260 start_comm[this->dev, this->blk] = execname; 261 start_time[this->dev, this->blk] = timestamp; 262 start_proj[this->dev, this->blk] = curpsinfo->pr_projid; 263 start_zone[this->dev, this->blk] = curpsinfo->pr_zoneid; 264 start_rw[this->dev, this->blk] = args[0]->b_flags & B_READ ? "R" : "W"; 265 disk_r += args[0]->b_flags & B_READ ? args[0]->b_bcount : 0; 266 disk_w += args[0]->b_flags & B_READ ? 0 : args[0]->b_bcount; 267 268 /* increase disk event pending count */ 269 pending[args[1]->dev_statname]++; 270 } 271 272 /* 273 * Process and Print completion 274 */ 275 io:genunix::done 276 /this->ok/ 277 { 278 /* decrease disk event pending count */ 279 pending[args[1]->dev_statname]--; 280 281 /* 282 * Process details 283 */ 284 285 /* fetch entry values */ 286 this->dev = args[0]->b_edev; 287 this->blk = args[0]->b_blkno; 288 this->suid = start_uid[this->dev, this->blk]; 289 this->spid = start_pid[this->dev, this->blk]; 290 this->sppid = start_ppid[this->dev, this->blk]; 291 this->sproj = start_proj[this->dev, this->blk]; 292 this->szone = start_zone[this->dev, this->blk]; 293 self->scomm = start_comm[this->dev, this->blk]; 294 this->stime = start_time[this->dev, this->blk]; 295 this->etime = timestamp; /* endtime */ 296 this->elapsed = this->etime - this->stime; 297 self->rw = start_rw[this->dev, this->blk]; 298 this->dtime = last_event[args[1]->dev_statname] == 0 ? 0 : 299 timestamp - last_event[args[1]->dev_statname]; 300 301 /* memory cleanup */ 302 start_uid[this->dev, this->blk] = 0; 303 start_pid[this->dev, this->blk] = 0; 304 start_ppid[this->dev, this->blk] = 0; 305 start_time[this->dev, this->blk] = 0; 306 start_comm[this->dev, this->blk] = 0; 307 start_zone[this->dev, this->blk] = 0; 308 start_proj[this->dev, this->blk] = 0; 309 start_rw[this->dev, this->blk] = 0; 310 311 /* 312 * Choose statistic to track 313 */ 314 OPT_bytes ? this->value = args[0]->b_bcount : 1; 315 OPT_elapsed ? this->value = this->elapsed / 1000 : 1; 316 OPT_dtime ? this->value = this->dtime / 1000 : 1; 317 318 /* 319 * Save details 320 */ 321 OPT_def ? @out[this->suid, this->spid, this->sppid, self->scomm, 322 args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor, 323 self->rw] = sum(this->value) : 1; 324 OPT_proj ? @out[this->sproj, this->spid, this->sppid, self->scomm, 325 args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor, 326 self->rw] = sum(this->value) : 1; 327 OPT_zone ? @out[this->szone, this->spid, this->sppid, self->scomm, 328 args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor, 329 self->rw] = sum(this->value) : 1; 330 331 /* save last disk event */ 332 last_event[args[1]->dev_statname] = timestamp; 333 334 self->scomm = 0; 335 self->rw = 0; 336 } 337 338 /* 339 * Prevent pending from underflowing 340 * this can happen if this program is started during disk events. 341 */ 342 io:genunix::done 343 /pending[args[1]->dev_statname] < 0/ 344 { 345 pending[args[1]->dev_statname] = 0; 346 } 347 348 /* 349 * Timer 350 */ 351 profile:::tick-1sec 352 { 353 secs--; 354 } 355 356 /* 357 * Print Report 358 */ 359 profile:::tick-1sec 360 /secs == 0/ 361 { 362 /* fetch 1 min load average */ 363 this->load1a = `hp_avenrun[0] / 65536; 364 this->load1b = ((`hp_avenrun[0] % 65536) * 100) / 65536; 365 366 /* convert counters to Kbytes */ 367 disk_r /= 1024; 368 disk_w /= 1024; 369 370 /* print status */ 371 OPT_clear ? printf("%s", CLEAR) : 1; 372 printf("%Y, load: %d.%02d, disk_r: %6d KB, disk_w: %6d KB\n\n", 373 walltimestamp, this->load1a, this->load1b, disk_r, disk_w); 374 375 /* print headers */ 376 OPT_def ? printf(" UID ") : 1; 377 OPT_proj ? printf(" PROJ ") : 1; 378 OPT_zone ? printf(" ZONE ") : 1; 379 printf("%6s %6s %-16s %-7s %3s %3s %1s", 380 "PID", "PPID", "CMD", "DEVICE", "MAJ", "MIN", "D"); 381 OPT_bytes ? printf(" %16s\n", "BYTES") : 1; 382 OPT_elapsed ? printf(" %16s\n", "ELAPSED") : 1; 383 OPT_dtime && ! OPT_percent ? printf(" %16s\n", "DISKTIME") : 1; 384 OPT_dtime && OPT_percent ? printf(" %6s\n", "%I/O") : 1; 385 386 /* truncate to top lines if needed */ 387 OPT_top ? trunc(@out, TOP) : 1; 388 389 /* normalise to percentage if needed */ 390 OPT_percent ? normalize(@out, INTERVAL * 10000) : 1; 391 392 /* print data */ 393 ! OPT_percent ? 394 printa("%5d %6d %6d %-16s %-7s %3d %3d %1s %16@d\n", @out) : 395 printa("%5d %6d %6d %-16s %-7s %3d %3d %1s %6@d\n", @out); 396 printf("\n"); 397 398 /* clear data */ 399 trunc(@out); 400 disk_r = 0; 401 disk_w = 0; 402 secs = INTERVAL; 403 counts--; 404 } 405 406 /* 407 * End of program 408 */ 409 profile:::tick-1sec 410 /counts == 0/ 411 { 412 exit(0); 413 } 414 415 /* 416 * Cleanup for Ctrl-C 417 */ 418 dtrace:::END 419 { 420 trunc(@out); 421 } 422 ' 423