1 /* $NetBSD: subr_iostat.c,v 1.28 2026/01/04 03:18:46 riastradh Exp $ */ 2 /* NetBSD: subr_disk.c,v 1.69 2005/05/29 22:24:15 christos Exp */ 3 4 /*- 5 * Copyright (c) 1996, 1997, 1999, 2000, 2009 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 10 * NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1993 36 * The Regents of the University of California. All rights reserved. 37 * (c) UNIX System Laboratories, Inc. 38 * All or some portions of this file are derived from material licensed 39 * to the University of California by American Telephone and Telegraph 40 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 41 * the permission of UNIX System Laboratories, Inc. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. Neither the name of the University nor the names of its contributors 52 * may be used to endorse or promote products derived from this software 53 * without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 58 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 * 67 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 68 */ 69 70 #include <sys/cdefs.h> 71 __KERNEL_RCSID(0, "$NetBSD: subr_iostat.c,v 1.28 2026/01/04 03:18:46 riastradh Exp $"); 72 73 #include <sys/param.h> 74 #include <sys/types.h> 75 76 #include <sys/iostat.h> 77 #include <sys/kernel.h> 78 #include <sys/kmem.h> 79 #include <sys/rwlock.h> 80 #include <sys/sdt.h> 81 #include <sys/sysctl.h> 82 83 /* 84 * Function prototypes for sysctl nodes 85 */ 86 static int sysctl_hw_disknames(SYSCTLFN_PROTO); 87 static int sysctl_hw_iostatnames(SYSCTLFN_PROTO); 88 static int sysctl_hw_iostats(SYSCTLFN_PROTO); 89 90 static int 91 iostati_getnames(int disk_only, char *oldp, size_t *oldlenp, const void *newp, 92 u_int namelen); 93 94 /* 95 * A global list of all drives attached to the system. May grow or 96 * shrink over time. 97 */ 98 struct iostatlist_head iostatlist = TAILQ_HEAD_INITIALIZER(iostatlist); 99 int iostat_count; /* number of drives in global drivelist */ 100 krwlock_t iostatlist_lock; 101 102 static void sysctl_io_stats_setup(struct sysctllog **); 103 104 /* 105 * Initialise the iostat subsystem. 106 */ 107 void 108 iostat_init(void) 109 { 110 111 rw_init(&iostatlist_lock); 112 sysctl_io_stats_setup(NULL); 113 } 114 115 /* 116 * Searches the iostatlist for the iostat corresponding to the 117 * name provided. 118 */ 119 struct io_stats * 120 iostat_find(const char *name) 121 { 122 struct io_stats *iostatp; 123 124 KASSERT(name != NULL); 125 126 rw_enter(&iostatlist_lock, RW_READER); 127 TAILQ_FOREACH(iostatp, &iostatlist, io_link) { 128 if (strcmp(iostatp->io_name, name) == 0) { 129 break; 130 } 131 } 132 rw_exit(&iostatlist_lock); 133 134 return iostatp; 135 } 136 137 /* 138 * Allocate and initialise memory for the i/o statistics. 139 */ 140 struct io_stats * 141 iostat_alloc(int32_t type, void *parent, const char *name) 142 { 143 struct io_stats *stats; 144 145 stats = kmem_zalloc(sizeof(*stats), KM_SLEEP); 146 stats->io_type = type; 147 stats->io_parent = parent; 148 (void)strlcpy(stats->io_name, name, sizeof(stats->io_name)); 149 150 /* 151 * Set the attached timestamp. 152 */ 153 getmicrouptime(&stats->io_attachtime); 154 155 /* 156 * Link into the drivelist. 157 */ 158 rw_enter(&iostatlist_lock, RW_WRITER); 159 TAILQ_INSERT_TAIL(&iostatlist, stats, io_link); 160 iostat_count++; 161 rw_exit(&iostatlist_lock); 162 163 return stats; 164 } 165 166 /* 167 * Remove i/o from stats collection. 168 */ 169 void 170 iostat_free(struct io_stats *stats) 171 { 172 173 /* 174 * Remove from the iostat list. 175 */ 176 if (iostat_count == 0) 177 panic("iostat_free: iostat_count == 0"); 178 rw_enter(&iostatlist_lock, RW_WRITER); 179 TAILQ_REMOVE(&iostatlist, stats, io_link); 180 iostat_count--; 181 rw_exit(&iostatlist_lock); 182 kmem_free(stats, sizeof(*stats)); 183 } 184 185 /* 186 * Rename i/o stats. 187 */ 188 void 189 iostat_rename(struct io_stats *stats, const char *name) 190 { 191 192 rw_enter(&iostatlist_lock, RW_WRITER); 193 (void)strlcpy(stats->io_name, name, sizeof(stats->io_name)); 194 rw_exit(&iostatlist_lock); 195 } 196 197 /* 198 * multiply timeval by unsigned integer and add to result 199 */ 200 static void 201 timermac(struct timeval *a, uint64_t count, struct timeval *res) 202 { 203 struct timeval part = *a; 204 205 while (count) { 206 if (count & 1) 207 timeradd(res, &part, res); 208 timeradd(&part, &part, &part); 209 count >>= 1; 210 } 211 } 212 213 /* 214 * Increment the iostat wait counter. 215 * Accumulate wait time and timesum. 216 * 217 * Wait time is spent in the device bufq. 218 */ 219 void 220 iostat_wait(struct io_stats *stats) 221 { 222 struct timeval dv_time, diff_time; 223 int32_t count; 224 225 KASSERT(stats->io_wait >= 0); 226 227 getmicrouptime(&dv_time); 228 229 timersub(&dv_time, &stats->io_waitstamp, &diff_time); 230 count = stats->io_wait++; 231 if (count != 0) { 232 timermac(&diff_time, count, &stats->io_waitsum); 233 timeradd(&stats->io_waittime, &diff_time, &stats->io_waittime); 234 } 235 stats->io_waitstamp = dv_time; 236 } 237 238 /* 239 * Decrement the iostat wait counter. 240 * Increment the iostat busy counter. 241 * Accumulate wait and busy times and timesums. 242 * 243 * Busy time is spent being processed by the device. 244 * 245 * Old devices do not yet measure wait time, so skip 246 * processing it if the counter is still zero. 247 */ 248 void 249 iostat_busy(struct io_stats *stats) 250 { 251 struct timeval dv_time, diff_time; 252 int32_t count; 253 254 KASSERT(stats->io_wait >= 0); /* > 0 when iostat_wait is used */ 255 KASSERT(stats->io_busy >= 0); 256 257 getmicrouptime(&dv_time); 258 259 timersub(&dv_time, &stats->io_waitstamp, &diff_time); 260 if (stats->io_wait != 0) { 261 count = stats->io_wait--; 262 timermac(&diff_time, count, &stats->io_waitsum); 263 timeradd(&stats->io_waittime, &diff_time, &stats->io_waittime); 264 } 265 stats->io_waitstamp = dv_time; 266 267 timersub(&dv_time, &stats->io_busystamp, &diff_time); 268 count = stats->io_busy++; 269 if (count != 0) { 270 timermac(&diff_time, count, &stats->io_busysum); 271 timeradd(&stats->io_busytime, &diff_time, &stats->io_busytime); 272 } 273 stats->io_busystamp = dv_time; 274 } 275 276 /* 277 * Decrement the iostat busy counter, increment the byte count. 278 * Accumulate busy time and timesum. 279 */ 280 void 281 iostat_unbusy(struct io_stats *stats, long bcount, int read) 282 { 283 struct timeval dv_time, diff_time; 284 int32_t count; 285 286 KASSERT(stats->io_busy > 0); 287 288 getmicrouptime(&dv_time); 289 stats->io_timestamp = dv_time; 290 291 /* any op */ 292 timersub(&dv_time, &stats->io_busystamp, &diff_time); 293 count = stats->io_busy--; 294 timermac(&diff_time, count, &stats->io_busysum); 295 timeradd(&stats->io_busytime, &diff_time, &stats->io_busytime); 296 stats->io_busystamp = dv_time; 297 298 if (bcount > 0) { 299 if (read) { 300 stats->io_rbytes += bcount; 301 stats->io_rxfer++; 302 } else { 303 stats->io_wbytes += bcount; 304 stats->io_wxfer++; 305 } 306 } 307 } 308 309 /* 310 * Return non-zero if a device has an I/O request in flight. 311 */ 312 bool 313 iostat_isbusy(struct io_stats *stats) 314 { 315 316 return stats->io_busy != 0; 317 } 318 319 /* 320 * Increment the seek counter. This does look almost redundant but it 321 * abstracts the stats gathering. 322 */ 323 void 324 iostat_seek(struct io_stats *stats) 325 { 326 327 stats->io_seek++; 328 } 329 330 static int 331 sysctl_hw_disknames(SYSCTLFN_ARGS) 332 { 333 334 return iostati_getnames(1, oldp, oldlenp, newp, namelen); 335 } 336 337 static int 338 sysctl_hw_iostatnames(SYSCTLFN_ARGS) 339 { 340 341 return iostati_getnames(0, oldp, oldlenp, newp, namelen); 342 } 343 344 static int 345 iostati_getnames(int disk_only, char *oldp, size_t *oldlenp, const void *newp, 346 u_int namelen) 347 { 348 char bf[IOSTATNAMELEN + 1]; 349 char *where = oldp; 350 struct io_stats *stats; 351 size_t needed, left, slen; 352 int error, first; 353 354 if (newp != NULL) 355 return SET_ERROR(EPERM); 356 if (namelen != 0) 357 return SET_ERROR(EINVAL); 358 359 first = 1; 360 error = 0; 361 needed = 0; 362 left = *oldlenp; 363 364 rw_enter(&iostatlist_lock, RW_READER); 365 for (stats = TAILQ_FIRST(&iostatlist); stats != NULL; 366 stats = TAILQ_NEXT(stats, io_link)) { 367 if ((disk_only == 1) && (stats->io_type != IOSTAT_DISK)) 368 continue; 369 370 if (where == NULL) 371 needed += strlen(stats->io_name) + 1; 372 else { 373 memset(bf, 0, sizeof(bf)); 374 if (first) { 375 strncpy(bf, stats->io_name, sizeof(bf)); 376 /* account for trailing NUL byte */ 377 needed += 1; 378 first = 0; 379 } else { 380 bf[0] = ' '; 381 strncpy(bf + 1, stats->io_name, 382 sizeof(bf) - 1); 383 } 384 bf[IOSTATNAMELEN] = '\0'; 385 slen = strlen(bf); 386 if (left < slen + 1) 387 break; 388 /* +1 to copy out the trailing NUL byte */ 389 error = copyout(bf, where, slen + 1); 390 if (error) 391 break; 392 where += slen; 393 needed += slen; 394 left -= slen; 395 } 396 } 397 rw_exit(&iostatlist_lock); 398 *oldlenp = needed; 399 return error; 400 } 401 402 static int 403 sysctl_hw_iostats(SYSCTLFN_ARGS) 404 { 405 struct io_sysctl sdrive; 406 struct io_stats *stats; 407 char *where = oldp; 408 size_t tocopy, left; 409 int error; 410 411 if (newp != NULL) 412 return SET_ERROR(EPERM); 413 414 /* 415 * The original hw.diskstats call was broken and did not require 416 * the userland to pass in its size of struct disk_sysctl. This 417 * was fixed after NetBSD 1.6 was released. 418 */ 419 if (namelen == 0) 420 tocopy = offsetof(struct io_sysctl, busy); 421 else 422 tocopy = name[0]; 423 424 if (where == NULL) { 425 *oldlenp = iostat_count * tocopy; 426 return 0; 427 } 428 429 error = 0; 430 left = *oldlenp; 431 memset(&sdrive, 0, sizeof(sdrive)); 432 *oldlenp = 0; 433 434 rw_enter(&iostatlist_lock, RW_READER); 435 TAILQ_FOREACH(stats, &iostatlist, io_link) { 436 if (left < tocopy) 437 break; 438 439 strncpy(sdrive.name, stats->io_name, sizeof(sdrive.name)); 440 sdrive.attachtime_sec = stats->io_attachtime.tv_sec; 441 sdrive.attachtime_usec = stats->io_attachtime.tv_usec; 442 sdrive.timestamp_sec = stats->io_busystamp.tv_sec; 443 sdrive.timestamp_usec = stats->io_busystamp.tv_usec; 444 445 sdrive.time_sec = stats->io_busytime.tv_sec; 446 sdrive.time_usec = stats->io_busytime.tv_usec; 447 448 sdrive.seek = stats->io_seek; 449 450 sdrive.rxfer = stats->io_rxfer; 451 sdrive.wxfer = stats->io_wxfer; 452 sdrive.xfer = stats->io_rxfer + stats->io_wxfer; 453 454 sdrive.rbytes = stats->io_rbytes; 455 sdrive.wbytes = stats->io_wbytes; 456 sdrive.bytes = stats->io_rbytes + stats->io_wbytes; 457 458 sdrive.wait_sec = stats->io_waittime.tv_sec; 459 sdrive.wait_usec = stats->io_waittime.tv_usec; 460 461 sdrive.time_sec = stats->io_busytime.tv_sec; 462 sdrive.time_usec = stats->io_busytime.tv_usec; 463 464 sdrive.waitsum_sec = stats->io_waitsum.tv_sec; 465 sdrive.waitsum_usec = stats->io_waitsum.tv_usec; 466 467 sdrive.busysum_sec = stats->io_busysum.tv_sec; 468 sdrive.busysum_usec = stats->io_busysum.tv_usec; 469 470 sdrive.busy = stats->io_busy; 471 472 error = copyout(&sdrive, where, uimin(tocopy, sizeof(sdrive))); 473 if (error) 474 break; 475 where += tocopy; 476 *oldlenp += tocopy; 477 left -= tocopy; 478 } 479 rw_exit(&iostatlist_lock); 480 return error; 481 } 482 483 static void 484 sysctl_io_stats_setup(struct sysctllog **clog) 485 { 486 487 sysctl_createv(clog, 0, NULL, NULL, 488 CTLFLAG_PERMANENT, 489 CTLTYPE_STRING, "disknames", 490 SYSCTL_DESCR("List of disk drives present"), 491 sysctl_hw_disknames, 0, NULL, 0, 492 CTL_HW, HW_DISKNAMES, CTL_EOL); 493 sysctl_createv(clog, 0, NULL, NULL, 494 CTLFLAG_PERMANENT, 495 CTLTYPE_STRING, "iostatnames", 496 SYSCTL_DESCR("I/O stats are being collected for these" 497 " devices"), 498 sysctl_hw_iostatnames, 0, NULL, 0, 499 CTL_HW, HW_IOSTATNAMES, CTL_EOL); 500 sysctl_createv(clog, 0, NULL, NULL, 501 CTLFLAG_PERMANENT, 502 CTLTYPE_STRUCT, "iostats", 503 SYSCTL_DESCR("Statistics on device I/O operations"), 504 sysctl_hw_iostats, 0, NULL, 0, 505 CTL_HW, HW_IOSTATS, CTL_EOL); 506 } 507