Home | History | Annotate | Line # | Download | only in kern
      1 /*	$NetBSD: subr_disk.c,v 1.140 2026/01/04 03:16:14 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1996, 1997, 1999, 2000, 2009 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9  * NASA Ames Research Center.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30  * POSSIBILITY OF SUCH DAMAGE.
     31  */
     32 
     33 /*
     34  * Copyright (c) 1982, 1986, 1988, 1993
     35  *	The Regents of the University of California.  All rights reserved.
     36  * (c) UNIX System Laboratories, Inc.
     37  * All or some portions of this file are derived from material licensed
     38  * to the University of California by American Telephone and Telegraph
     39  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     40  * the permission of UNIX System Laboratories, Inc.
     41  *
     42  * Redistribution and use in source and binary forms, with or without
     43  * modification, are permitted provided that the following conditions
     44  * are met:
     45  * 1. Redistributions of source code must retain the above copyright
     46  *    notice, this list of conditions and the following disclaimer.
     47  * 2. Redistributions in binary form must reproduce the above copyright
     48  *    notice, this list of conditions and the following disclaimer in the
     49  *    documentation and/or other materials provided with the distribution.
     50  * 3. Neither the name of the University nor the names of its contributors
     51  *    may be used to endorse or promote products derived from this software
     52  *    without specific prior written permission.
     53  *
     54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     64  * SUCH DAMAGE.
     65  *
     66  *	@(#)ufs_disksubr.c	8.5 (Berkeley) 1/21/94
     67  */
     68 
     69 #include <sys/cdefs.h>
     70 __KERNEL_RCSID(0, "$NetBSD: subr_disk.c,v 1.140 2026/01/04 03:16:14 riastradh Exp $");
     71 
     72 #include <sys/param.h>
     73 #include <sys/types.h>
     74 
     75 #include <sys/buf.h>
     76 #include <sys/disk.h>
     77 #include <sys/disklabel.h>
     78 #include <sys/fcntl.h>
     79 #include <sys/kernel.h>
     80 #include <sys/kmem.h>
     81 #include <sys/sdt.h>
     82 #include <sys/sysctl.h>
     83 #include <sys/syslog.h>
     84 
     85 #include <lib/libkern/libkern.h>
     86 
     87 /*
     88  * Disk error is the preface to plaintive error messages
     89  * about failing disk transfers.  It prints messages of the form
     90 
     91 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
     92 
     93  * if the offset of the error in the transfer and a disk label
     94  * are both available.  blkdone should be -1 if the position of the error
     95  * is unknown; the disklabel pointer may be null from drivers that have not
     96  * been converted to use them.  The message is printed with printf
     97  * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
     98  * The message should be completed (with at least a newline) with printf
     99  * or addlog, respectively.  There is no trailing space.
    100  */
    101 #ifndef PRIdaddr
    102 #define PRIdaddr PRId64
    103 #endif
    104 void
    105 diskerr(const struct buf *bp, const char *dname, const char *what, int pri,
    106     int blkdone, const struct disklabel *lp)
    107 {
    108 	int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev);
    109 	void (*pr)(const char *, ...) __printflike(1, 2);
    110 	char partname = 'a' + part;
    111 	daddr_t sn;
    112 
    113 	if (/*CONSTCOND*/0)
    114 		/* Compiler will error this if the format is wrong... */
    115 		printf("%" PRIdaddr, bp->b_blkno);
    116 
    117 	if (pri != LOG_PRINTF) {
    118 		static const char fmt[] = "";
    119 		log(pri, fmt);
    120 		pr = addlog;
    121 	} else
    122 		pr = printf;
    123 	(*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what,
    124 	    bp->b_flags & B_READ ? "read" : "writ");
    125 	sn = bp->b_blkno;
    126 	if (bp->b_bcount <= DEV_BSIZE)
    127 		(*pr)("%" PRIdaddr, sn);
    128 	else {
    129 		if (blkdone >= 0) {
    130 			sn += blkdone;
    131 			(*pr)("%" PRIdaddr " of ", sn);
    132 		}
    133 		(*pr)("%" PRIdaddr "-%" PRIdaddr "", bp->b_blkno,
    134 		    bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE);
    135 	}
    136 	if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
    137 		sn += lp->d_partitions[part].p_offset;
    138 		(*pr)(" (%s%d bn %" PRIdaddr "; cn %" PRIdaddr "",
    139 		    dname, unit, sn, sn / lp->d_secpercyl);
    140 		sn %= lp->d_secpercyl;
    141 		(*pr)(" tn %" PRIdaddr " sn %" PRIdaddr ")",
    142 		    sn / lp->d_nsectors, sn % lp->d_nsectors);
    143 	}
    144 }
    145 
    146 /*
    147  * Searches the iostatlist for the disk corresponding to the
    148  * name provided.
    149  */
    150 struct disk *
    151 disk_find(const char *name)
    152 {
    153 	struct io_stats *stat;
    154 
    155 	stat = iostat_find(name);
    156 
    157 	if ((stat != NULL) && (stat->io_type == IOSTAT_DISK))
    158 		return stat->io_parent;
    159 
    160 	return (NULL);
    161 }
    162 
    163 void
    164 disk_init(struct disk *diskp, const char *name, const struct dkdriver *driver)
    165 {
    166 	u_int blocksize = DEV_BSIZE;
    167 
    168 	/*
    169 	 * Initialize the wedge-related locks and other fields.
    170 	 */
    171 	mutex_init(&diskp->dk_rawlock, MUTEX_DEFAULT, IPL_NONE);
    172 	mutex_init(&diskp->dk_openlock, MUTEX_DEFAULT, IPL_NONE);
    173 	LIST_INIT(&diskp->dk_wedges);
    174 	diskp->dk_nwedges = 0;
    175 	diskp->dk_labelsector = LABELSECTOR;
    176 	diskp->dk_blkshift = DK_BSIZE2BLKSHIFT(blocksize);
    177 	diskp->dk_byteshift = DK_BSIZE2BYTESHIFT(blocksize);
    178 	diskp->dk_name = name;
    179 	diskp->dk_driver = driver;
    180 }
    181 
    182 /*
    183  * Rename a disk.
    184  */
    185 void
    186 disk_rename(struct disk *diskp, const char *name)
    187 {
    188 
    189 	diskp->dk_name = name;
    190 	iostat_rename(diskp->dk_stats, diskp->dk_name);
    191 }
    192 
    193 /*
    194  * Attach a disk.
    195  */
    196 void
    197 disk_attach(struct disk *diskp)
    198 {
    199 
    200 	/*
    201 	 * Allocate and initialize the disklabel structures.
    202 	 */
    203 	diskp->dk_label = kmem_zalloc(sizeof(struct disklabel), KM_SLEEP);
    204 	diskp->dk_cpulabel = kmem_zalloc(sizeof(struct cpu_disklabel),
    205 	    KM_SLEEP);
    206 
    207 	/*
    208 	 * Set up the stats collection.
    209 	 */
    210 	diskp->dk_stats = iostat_alloc(IOSTAT_DISK, diskp, diskp->dk_name);
    211 }
    212 
    213 int
    214 disk_begindetach(struct disk *dk, int (*lastclose)(device_t),
    215     device_t self, int flags)
    216 {
    217 	int rc;
    218 
    219 	rc = 0;
    220 	mutex_enter(&dk->dk_openlock);
    221 	if (dk->dk_openmask == 0)
    222 		;	/* nothing to do */
    223 	else if ((flags & DETACH_FORCE) == 0)
    224 		rc = SET_ERROR(EBUSY);
    225 	else if (lastclose != NULL)
    226 		rc = (*lastclose)(self);
    227 	mutex_exit(&dk->dk_openlock);
    228 
    229 	return rc;
    230 }
    231 
    232 /*
    233  * Detach a disk.
    234  */
    235 void
    236 disk_detach(struct disk *diskp)
    237 {
    238 
    239 	/*
    240 	 * Remove from the drivelist.
    241 	 */
    242 	iostat_free(diskp->dk_stats);
    243 
    244 	/*
    245 	 * Release the disk-info dictionary.
    246 	 */
    247 	if (diskp->dk_info) {
    248 		prop_object_release(diskp->dk_info);
    249 		diskp->dk_info = NULL;
    250 	}
    251 
    252 	/*
    253 	 * Free the space used by the disklabel structures.
    254 	 */
    255 	kmem_free(diskp->dk_label, sizeof(*diskp->dk_label));
    256 	kmem_free(diskp->dk_cpulabel, sizeof(*diskp->dk_cpulabel));
    257 }
    258 
    259 void
    260 disk_destroy(struct disk *diskp)
    261 {
    262 
    263 	mutex_destroy(&diskp->dk_openlock);
    264 	mutex_destroy(&diskp->dk_rawlock);
    265 }
    266 
    267 /*
    268  * Mark the disk as having work queued for metrics collection.
    269  */
    270 void
    271 disk_wait(struct disk *diskp)
    272 {
    273 
    274 	iostat_wait(diskp->dk_stats);
    275 }
    276 
    277 /*
    278  * Mark the disk as busy for metrics collection.
    279  */
    280 void
    281 disk_busy(struct disk *diskp)
    282 {
    283 
    284 	iostat_busy(diskp->dk_stats);
    285 }
    286 
    287 /*
    288  * Finished disk operations, gather metrics.
    289  */
    290 void
    291 disk_unbusy(struct disk *diskp, long bcount, int read)
    292 {
    293 
    294 	iostat_unbusy(diskp->dk_stats, bcount, read);
    295 }
    296 
    297 /*
    298  * Return true if disk has an I/O operation in flight.
    299  */
    300 bool
    301 disk_isbusy(struct disk *diskp)
    302 {
    303 
    304 	return iostat_isbusy(diskp->dk_stats);
    305 }
    306 
    307 /*
    308  * Bounds checking against the media size, used for the raw partition.
    309  * secsize, mediasize and b_blkno must all be the same units.
    310  * Possibly this has to be DEV_BSIZE (512).
    311  */
    312 int
    313 bounds_check_with_mediasize(struct buf *bp, int secsize, uint64_t mediasize)
    314 {
    315 	int64_t sz;
    316 
    317 	if (bp->b_blkno < 0) {
    318 		/* Reject negative offsets immediately. */
    319 		bp->b_error = SET_ERROR(EINVAL);
    320 		return 0;
    321 	}
    322 
    323 	sz = howmany((int64_t)bp->b_bcount, secsize);
    324 
    325 	/*
    326 	 * bp->b_bcount is a 32-bit value, and we rejected a negative
    327 	 * bp->b_blkno already, so "bp->b_blkno + sz" cannot overflow.
    328 	 */
    329 
    330 	if (bp->b_blkno + sz > mediasize) {
    331 		sz = mediasize - bp->b_blkno;
    332 		if (sz == 0) {
    333 			/* If exactly at end of disk, return EOF. */
    334 			bp->b_resid = bp->b_bcount;
    335 			return 0;
    336 		}
    337 		if (sz < 0) {
    338 			/* If past end of disk, return EINVAL. */
    339 			bp->b_error = SET_ERROR(EINVAL);
    340 			return 0;
    341 		}
    342 		/* Otherwise, truncate request. */
    343 		bp->b_bcount = sz * secsize;
    344 	}
    345 
    346 	return 1;
    347 }
    348 
    349 /*
    350  * Determine the size of the transfer, and make sure it is
    351  * within the boundaries of the partition. Adjust transfer
    352  * if needed, and signal errors or early completion.
    353  */
    354 int
    355 bounds_check_with_label(struct disk *dk, struct buf *bp, int wlabel)
    356 {
    357 	struct disklabel *lp = dk->dk_label;
    358 	struct partition *p = lp->d_partitions + DISKPART(bp->b_dev);
    359 	uint64_t p_size, p_offset, labelsector;
    360 	int64_t sz;
    361 
    362 	if (bp->b_blkno < 0) {
    363 		/* Reject negative offsets immediately. */
    364 		bp->b_error = SET_ERROR(EINVAL);
    365 		return -1;
    366 	}
    367 
    368 	/* Protect against division by zero. XXX: Should never happen?!?! */
    369 	if ((lp->d_secsize / DEV_BSIZE) == 0 || lp->d_secpercyl == 0) {
    370 		bp->b_error = SET_ERROR(EINVAL);
    371 		return -1;
    372 	}
    373 
    374 	p_size = (uint64_t)p->p_size << dk->dk_blkshift;
    375 	p_offset = (uint64_t)p->p_offset << dk->dk_blkshift;
    376 #if RAW_PART == 3
    377 	labelsector = lp->d_partitions[2].p_offset;
    378 #else
    379 	labelsector = lp->d_partitions[RAW_PART].p_offset;
    380 #endif
    381 	labelsector = (labelsector + dk->dk_labelsector) << dk->dk_blkshift;
    382 
    383 	sz = howmany((int64_t)bp->b_bcount, DEV_BSIZE);
    384 
    385 	/*
    386 	 * bp->b_bcount is a 32-bit value, and we rejected a negative
    387 	 * bp->b_blkno already, so "bp->b_blkno + sz" cannot overflow.
    388 	 */
    389 
    390 	if (bp->b_blkno + sz > p_size) {
    391 		sz = p_size - bp->b_blkno;
    392 		if (sz == 0) {
    393 			/* If exactly at end of disk, return EOF. */
    394 			bp->b_resid = bp->b_bcount;
    395 			return 0;
    396 		}
    397 		if (sz < 0) {
    398 			/* If past end of disk, return EINVAL. */
    399 			bp->b_error = SET_ERROR(EINVAL);
    400 			return -1;
    401 		}
    402 		/* Otherwise, truncate request. */
    403 		bp->b_bcount = sz << DEV_BSHIFT;
    404 	}
    405 
    406 	/* Overwriting disk label? */
    407 	if (bp->b_blkno + p_offset <= labelsector &&
    408 	    bp->b_blkno + p_offset + sz > labelsector &&
    409 	    (bp->b_flags & B_READ) == 0 && !wlabel) {
    410 		bp->b_error = SET_ERROR(EROFS);
    411 		return -1;
    412 	}
    413 
    414 	/* calculate cylinder for disksort to order transfers with */
    415 	bp->b_cylinder = (bp->b_blkno + p->p_offset) /
    416 	    (lp->d_secsize / DEV_BSIZE) / lp->d_secpercyl;
    417 	return 1;
    418 }
    419 
    420 int
    421 disk_read_sectors(void (*strat)(struct buf *), const struct disklabel *lp,
    422     struct buf *bp, unsigned int sector, int count)
    423 {
    424 
    425 	if ((lp->d_secsize / DEV_BSIZE) == 0 || lp->d_secpercyl == 0)
    426 		return SET_ERROR(EINVAL);
    427 
    428 	bp->b_blkno = btodb((off_t)sector * lp->d_secsize);
    429 	bp->b_bcount = count * lp->d_secsize;
    430 	bp->b_flags = (bp->b_flags & ~B_WRITE) | B_READ;
    431 	bp->b_oflags &= ~BO_DONE;
    432 	bp->b_cylinder = sector / lp->d_secpercyl;
    433 	(*strat)(bp);
    434 	return biowait(bp);
    435 }
    436 
    437 const char *
    438 convertdisklabel(struct disklabel *lp, void (*strat)(struct buf *),
    439     struct buf *bp, uint32_t secperunit)
    440 {
    441 	struct partition rp, *altp, *p;
    442 	int geom_ok;
    443 	const char *str;
    444 
    445 	memset(&rp, 0, sizeof(rp));
    446 	rp.p_size = secperunit;
    447 	rp.p_fstype = FS_UNUSED;
    448 
    449 	/* If we can seek to d_secperunit - 1, believe the disk geometry. */
    450 	if (secperunit != 0 &&
    451 	    disk_read_sectors(strat, lp, bp, secperunit - 1, 1) == 0)
    452 		geom_ok = 1;
    453 	else
    454 		geom_ok = 0;
    455 
    456 #if 0
    457 	printf("%s: secperunit (%" PRIu32 ") %s\n", __func__,
    458 	    secperunit, geom_ok ? "ok" : "not ok");
    459 #endif
    460 
    461 	p = &lp->d_partitions[RAW_PART];
    462 	if (RAW_PART == 'c' - 'a')
    463 		altp = &lp->d_partitions['d' - 'a'];
    464 	else
    465 		altp = &lp->d_partitions['c' - 'a'];
    466 
    467 	if (lp->d_npartitions > RAW_PART && p->p_offset == 0 && p->p_size != 0)
    468 		return NULL;	/* already a raw partition */
    469 	else if (lp->d_npartitions > MAX('c', 'd') - 'a' &&
    470 		 altp->p_offset == 0 && altp->p_size != 0) {
    471 		/* alternate partition ('c' or 'd') is suitable for raw slot,
    472 		 * swap with 'd' or 'c'.
    473 		 */
    474 		rp = *p;
    475 		*p = *altp;
    476 		*altp = rp;
    477 		return NULL;
    478 	} else if (lp->d_npartitions <= RAW_PART &&
    479 	           lp->d_npartitions > 'c' - 'a') {
    480 		/* No raw partition is present, but the alternate is present.
    481 		 * Copy alternate to raw partition.
    482 		 */
    483 		lp->d_npartitions = RAW_PART + 1;
    484 		*p = *altp;
    485 		return NULL;
    486 	} else if (!geom_ok)
    487 		str = "no raw partition and disk reports bad geometry";
    488 	else if (lp->d_npartitions <= RAW_PART) {
    489 		memset(&lp->d_partitions[lp->d_npartitions], 0,
    490 		    sizeof(struct partition) * (RAW_PART - lp->d_npartitions));
    491 		*p = rp;
    492 		lp->d_npartitions = RAW_PART + 1;
    493 		return NULL;
    494 	} else if (lp->d_npartitions < MAXPARTITIONS) {
    495 		memmove(p + 1, p,
    496 		    sizeof(struct partition) * (lp->d_npartitions - RAW_PART));
    497 		*p = rp;
    498 		lp->d_npartitions++;
    499 		return NULL;
    500 	} else
    501 		str = "no raw partition and partition table is full";
    502 #ifdef DIAGNOSTIC
    503 	printf("Bad partition: %s\n", str);
    504 	printf("type = %u, subtype = %u, typename = %s\n",
    505 	    lp->d_type, lp->d_subtype, lp->d_typename);
    506 	printf("secsize = %u, nsectors = %u, ntracks = %u\n",
    507 	    lp->d_secsize, lp->d_nsectors, lp->d_ntracks);
    508 	printf("ncylinders = %u, secpercyl = %u, secperunit = %u\n",
    509 	    lp->d_ncylinders, lp->d_secpercyl, lp->d_secperunit);
    510 	printf("npartitions = %u\n", lp->d_npartitions);
    511 
    512 	for (size_t i = 0; i < MIN(lp->d_npartitions, MAXPARTITIONS); i++) {
    513 		p = &lp->d_partitions[i];
    514 		printf("\t%c: offset = %u size = %u fstype = %u\n",
    515 		    (char)(i + 'a'), p->p_offset, p->p_size, p->p_fstype);
    516 	}
    517 #endif
    518 	return str;
    519 }
    520 
    521 /*
    522  * disk_ioctl --
    523  *	Generic disk ioctl handling.
    524  */
    525 int
    526 disk_ioctl(struct disk *dk, dev_t dev, u_long cmd, void *data, int flag,
    527     struct lwp *l)
    528 {
    529 	struct dkwedge_info *dkw;
    530 	struct partinfo *pi;
    531 	struct partition *dp;
    532 #ifdef __HAVE_OLD_DISKLABEL
    533 	struct disklabel newlabel;
    534 #endif
    535 
    536 	switch (cmd) {
    537 	case DIOCGDISKINFO: {
    538 		prop_dictionary_t disk_info;
    539 		int error;
    540 
    541 		mutex_enter(&dk->dk_openlock);
    542 		if ((disk_info = dk->dk_info) == NULL) {
    543 			error = SET_ERROR(ENOTSUP);
    544 		} else {
    545 			prop_object_retain(disk_info);
    546 			error = 0;
    547 		}
    548 		mutex_exit(&dk->dk_openlock);
    549 		if (error)
    550 			return error;
    551 
    552 		error = prop_dictionary_copyout_ioctl(data, cmd, disk_info);
    553 		prop_object_release(disk_info);
    554 		return error;
    555 	}
    556 	case DIOCGSECTORSIZE:
    557 		*(u_int *)data = dk->dk_geom.dg_secsize;
    558 		return 0;
    559 
    560 	case DIOCGMEDIASIZE:
    561 		*(off_t *)data = (off_t)dk->dk_geom.dg_secsize *
    562 		    dk->dk_geom.dg_secperunit;
    563 		return 0;
    564 	default:
    565 		break;
    566 	}
    567 
    568 	if (dev == NODEV)
    569 		return SET_ERROR(EPASSTHROUGH);
    570 
    571 	/* The following should be moved to dk_ioctl */
    572 	switch (cmd) {
    573 	case DIOCGDINFO:
    574 		if (dk->dk_label == NULL)
    575 			return SET_ERROR(EBUSY);
    576 		memcpy(data, dk->dk_label, sizeof (*dk->dk_label));
    577 		return 0;
    578 
    579 #ifdef __HAVE_OLD_DISKLABEL
    580 	case ODIOCGDINFO:
    581 		if (dk->dk_label == NULL)
    582 			return SET_ERROR(EBUSY);
    583 		memcpy(&newlabel, dk->dk_label, sizeof(newlabel));
    584 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
    585 			return SET_ERROR(ENOTTY);
    586 		memcpy(data, &newlabel, sizeof(struct olddisklabel));
    587 		return 0;
    588 #endif
    589 
    590 	case DIOCGPARTINFO:
    591 		pi = data;
    592 		memset(pi, 0, sizeof(*pi));
    593 		pi->pi_secsize = dk->dk_geom.dg_secsize;
    594 		pi->pi_bsize = MAX(BLKDEV_IOSIZE, pi->pi_secsize);
    595 
    596 		if (DISKPART(dev) == RAW_PART) {
    597 			pi->pi_size = dk->dk_geom.dg_secperunit;
    598 			return 0;
    599 		}
    600 
    601 		if (dk->dk_label == NULL)
    602 			return SET_ERROR(EBUSY);
    603 
    604 		dp = &dk->dk_label->d_partitions[DISKPART(dev)];
    605 		pi->pi_offset = dp->p_offset;
    606 		pi->pi_size = dp->p_size;
    607 
    608 		pi->pi_fstype = dp->p_fstype;
    609 		pi->pi_frag = dp->p_frag;
    610 		pi->pi_fsize = dp->p_fsize;
    611 		pi->pi_cpg = dp->p_cpg;
    612 
    613 		/*
    614 		 * dholland 20130616: XXX this logic should not be
    615 		 * here. It is here because the old buffer cache
    616 		 * demands that all accesses to the same blocks need
    617 		 * to be the same size; but it only works for FFS and
    618 		 * nowadays I think it'll fail silently if the size
    619 		 * info in the disklabel is wrong. (Or missing.) The
    620 		 * buffer cache needs to be smarter; or failing that
    621 		 * we need a reliable way here to get the right block
    622 		 * size; or a reliable way to guarantee that (a) the
    623 		 * fs is not mounted when we get here and (b) any
    624 		 * buffers generated here will get purged when the fs
    625 		 * does get mounted.
    626 		 */
    627 		if (dp->p_fstype == FS_BSDFFS &&
    628 		    dp->p_frag != 0 && dp->p_fsize != 0)
    629 			pi->pi_bsize = dp->p_frag * dp->p_fsize;
    630 		return 0;
    631 
    632 	case DIOCAWEDGE:
    633 		if ((flag & FWRITE) == 0)
    634 			return SET_ERROR(EBADF);
    635 
    636 		dkw = data;
    637 		strlcpy(dkw->dkw_parent, dk->dk_name, sizeof(dkw->dkw_parent));
    638 		return dkwedge_add(dkw);
    639 
    640 	case DIOCDWEDGE:
    641 		if ((flag & FWRITE) == 0)
    642 			return SET_ERROR(EBADF);
    643 
    644 		dkw = data;
    645 		strlcpy(dkw->dkw_parent, dk->dk_name, sizeof(dkw->dkw_parent));
    646 		return dkwedge_del(dkw);
    647 
    648 	case DIOCLWEDGES:
    649 		return dkwedge_list(dk, data, l);
    650 
    651 	case DIOCMWEDGES:
    652 		if ((flag & FWRITE) == 0)
    653 			return SET_ERROR(EBADF);
    654 
    655 		dkwedge_discover(dk);
    656 		return 0;
    657 
    658 	case DIOCRMWEDGES:
    659 		if ((flag & FWRITE) == 0)
    660 			return SET_ERROR(EBADF);
    661 
    662 		dkwedge_delidle(dk);
    663 		return 0;
    664 
    665 	case DIOCGSECTORALIGN: {
    666 		struct disk_sectoralign * const dsa = data;
    667 		const int part = DISKPART(dev);
    668 
    669 		dsa->dsa_alignment = MAX(1u, dk->dk_geom.dg_physsecsize /
    670 		    dk->dk_geom.dg_secsize);
    671 		dsa->dsa_firstaligned = dk->dk_geom.dg_alignedsec;
    672 
    673 		if (part != RAW_PART) {
    674 			struct disklabel * const lp = dk->dk_label;
    675 			daddr_t offset = lp->d_partitions[part].p_offset;
    676 			uint32_t r = offset % dsa->dsa_alignment;
    677 
    678 			if (r <= dsa->dsa_firstaligned)
    679 				dsa->dsa_firstaligned -= r;
    680 			else
    681 				dsa->dsa_firstaligned += dsa->dsa_alignment - r;
    682 		}
    683 		dsa->dsa_firstaligned %= dsa->dsa_alignment;
    684 		return 0;
    685 	}
    686 
    687 	default:
    688 		return SET_ERROR(EPASSTHROUGH);
    689 	}
    690 }
    691 
    692 /*
    693  * disk_set_info --
    694  *	Canonicalize dk->dk_geom and set some parameters.
    695  *
    696  *	If disk_set_info can happen concurrently with disk_ioctl in a
    697  *	driver, the driver must serialize calls to disk_set_info with
    698  *	dk_openlock.
    699  */
    700 void
    701 disk_set_info(device_t dev, struct disk *dk, const char *type)
    702 {
    703 	struct disk_geom *dg = &dk->dk_geom;
    704 
    705 	if (dg->dg_secsize == 0) {
    706 #ifdef DIAGNOSTIC
    707 		printf("%s: fixing 0 sector size\n", dk->dk_name);
    708 #endif
    709 		dg->dg_secsize = DEV_BSIZE;
    710 	}
    711 
    712 	dk->dk_blkshift = DK_BSIZE2BLKSHIFT(dg->dg_secsize);
    713 	dk->dk_byteshift = DK_BSIZE2BYTESHIFT(dg->dg_secsize);
    714 
    715 	if (dg->dg_secperunit == 0) {
    716 #ifdef DIAGNOSTIC
    717 		if (dg->dg_ncylinders == 0) {
    718 			printf("%s: secperunit and ncylinders are zero\n",
    719 			    dk->dk_name);
    720 		}
    721 		if (dg->dg_nsectors == 0 || dg->dg_ntracks == 0) {
    722 			printf("%s: secperunit and (sectors or tracks) "
    723 			    "are zero\n", dk->dk_name);
    724 		}
    725 #endif
    726 		dg->dg_secperunit = (int64_t) dg->dg_nsectors *
    727 		    dg->dg_ntracks * dg->dg_ncylinders;
    728 	}
    729 
    730 	if (dg->dg_ncylinders == 0) {
    731 		if (dg->dg_ntracks && dg->dg_nsectors)
    732 			dg->dg_ncylinders = dg->dg_secperunit /
    733 			    (dg->dg_ntracks * dg->dg_nsectors);
    734 	}
    735 
    736 	prop_dictionary_t disk_info, odisk_info, geom;
    737 
    738 	disk_info = prop_dictionary_create();
    739 	geom = prop_dictionary_create();
    740 
    741 	prop_dictionary_set_uint64(geom, "sectors-per-unit",
    742 	    dg->dg_secperunit);
    743 
    744 	prop_dictionary_set_uint32(geom, "sector-size", dg->dg_secsize);
    745 
    746 	if (dg->dg_nsectors)
    747 		prop_dictionary_set_uint16(geom, "sectors-per-track",
    748 		    dg->dg_nsectors);
    749 
    750 	if (dg->dg_ntracks)
    751 		prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
    752 		    dg->dg_ntracks);
    753 
    754 	if (dg->dg_ncylinders)
    755 		prop_dictionary_set_uint64(geom, "cylinders-per-unit",
    756 		    dg->dg_ncylinders);
    757 
    758 	if (dg->dg_physsecsize) {
    759 		prop_dictionary_set_uint32(geom, "physical-sector-size",
    760 		    dg->dg_physsecsize);
    761 		prop_dictionary_set_uint32(geom, "aligned-sector",
    762 		    dg->dg_alignedsec);
    763 	}
    764 
    765 	prop_dictionary_set(disk_info, "geometry", geom);
    766 
    767 	if (type)
    768 		prop_dictionary_set_string_nocopy(disk_info, "type", type);
    769 
    770 	prop_object_release(geom);
    771 
    772 	odisk_info = dk->dk_info;
    773 	dk->dk_info = disk_info;
    774 
    775 	if (dev)
    776 		prop_dictionary_set(device_properties(dev), "disk-info",
    777 		    disk_info);
    778 
    779 	/*
    780 	 * Don't release disk_info here; we keep a reference to it.
    781 	 * disk_detach() will release it when we go away.
    782 	 */
    783 	if (odisk_info)
    784 		prop_object_release(odisk_info);
    785 }
    786 
    787 int
    788 disklabel_dev_unit(dev_t dev)
    789 {
    790 
    791 	return DISKUNIT(dev);
    792 }
    793