Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.316.2.4
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.316.2.4 2015/12/27 12:09:58 skrll Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Greg Oster; Jason R. Thorpe.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Copyright (c) 1988 University of Utah.
     34  * Copyright (c) 1990, 1993
     35  *      The Regents of the University of California.  All rights reserved.
     36  *
     37  * This code is derived from software contributed to Berkeley by
     38  * the Systems Programming Group of the University of Utah Computer
     39  * Science Department.
     40  *
     41  * Redistribution and use in source and binary forms, with or without
     42  * modification, are permitted provided that the following conditions
     43  * are met:
     44  * 1. Redistributions of source code must retain the above copyright
     45  *    notice, this list of conditions and the following disclaimer.
     46  * 2. Redistributions in binary form must reproduce the above copyright
     47  *    notice, this list of conditions and the following disclaimer in the
     48  *    documentation and/or other materials provided with the distribution.
     49  * 3. Neither the name of the University nor the names of its contributors
     50  *    may be used to endorse or promote products derived from this software
     51  *    without specific prior written permission.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     63  * SUCH DAMAGE.
     64  *
     65  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     66  *
     67  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     68  */
     69 
     70 /*
     71  * Copyright (c) 1995 Carnegie-Mellon University.
     72  * All rights reserved.
     73  *
     74  * Authors: Mark Holland, Jim Zelenka
     75  *
     76  * Permission to use, copy, modify and distribute this software and
     77  * its documentation is hereby granted, provided that both the copyright
     78  * notice and this permission notice appear in all copies of the
     79  * software, derivative works or modified versions, and any portions
     80  * thereof, and that both notices appear in supporting documentation.
     81  *
     82  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     83  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     84  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     85  *
     86  * Carnegie Mellon requests users of this software to return to
     87  *
     88  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     89  *  School of Computer Science
     90  *  Carnegie Mellon University
     91  *  Pittsburgh PA 15213-3890
     92  *
     93  * any improvements or extensions that they make and grant Carnegie the
     94  * rights to redistribute these changes.
     95  */
     96 
     97 /***********************************************************
     98  *
     99  * rf_kintf.c -- the kernel interface routines for RAIDframe
    100  *
    101  ***********************************************************/
    102 
    103 #include <sys/cdefs.h>
    104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.316.2.4 2015/12/27 12:09:58 skrll Exp $");
    105 
    106 #ifdef _KERNEL_OPT
    107 #include "opt_compat_netbsd.h"
    108 #include "opt_raid_autoconfig.h"
    109 #endif
    110 
    111 #include <sys/param.h>
    112 #include <sys/errno.h>
    113 #include <sys/pool.h>
    114 #include <sys/proc.h>
    115 #include <sys/queue.h>
    116 #include <sys/disk.h>
    117 #include <sys/device.h>
    118 #include <sys/stat.h>
    119 #include <sys/ioctl.h>
    120 #include <sys/fcntl.h>
    121 #include <sys/systm.h>
    122 #include <sys/vnode.h>
    123 #include <sys/disklabel.h>
    124 #include <sys/conf.h>
    125 #include <sys/buf.h>
    126 #include <sys/bufq.h>
    127 #include <sys/reboot.h>
    128 #include <sys/kauth.h>
    129 #include <sys/module.h>
    130 
    131 #include <prop/proplib.h>
    132 
    133 #include <dev/raidframe/raidframevar.h>
    134 #include <dev/raidframe/raidframeio.h>
    135 #include <dev/raidframe/rf_paritymap.h>
    136 
    137 #include "rf_raid.h"
    138 #include "rf_copyback.h"
    139 #include "rf_dag.h"
    140 #include "rf_dagflags.h"
    141 #include "rf_desc.h"
    142 #include "rf_diskqueue.h"
    143 #include "rf_etimer.h"
    144 #include "rf_general.h"
    145 #include "rf_kintf.h"
    146 #include "rf_options.h"
    147 #include "rf_driver.h"
    148 #include "rf_parityscan.h"
    149 #include "rf_threadstuff.h"
    150 
    151 #ifdef COMPAT_50
    152 #include "rf_compat50.h"
    153 #endif
    154 
    155 #include "ioconf.h"
    156 
    157 #ifdef DEBUG
    158 int     rf_kdebug_level = 0;
    159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    160 #else				/* DEBUG */
    161 #define db1_printf(a) { }
    162 #endif				/* DEBUG */
    163 
    164 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
    165 static rf_declare_mutex2(rf_sparet_wait_mutex);
    166 static rf_declare_cond2(rf_sparet_wait_cv);
    167 static rf_declare_cond2(rf_sparet_resp_cv);
    168 
    169 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    170 						 * spare table */
    171 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    172 						 * installation process */
    173 #endif
    174 
    175 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
    176 
    177 /* prototypes */
    178 static void KernelWakeupFunc(struct buf *);
    179 static void InitBP(struct buf *, struct vnode *, unsigned,
    180     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
    181     void *, int, struct proc *);
    182 struct raid_softc;
    183 static void raidinit(struct raid_softc *);
    184 
    185 static int raid_match(device_t, cfdata_t, void *);
    186 static void raid_attach(device_t, device_t, void *);
    187 static int raid_detach(device_t, int);
    188 
    189 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
    190     daddr_t, daddr_t);
    191 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
    192     daddr_t, daddr_t, int);
    193 
    194 static int raidwrite_component_label(unsigned,
    195     dev_t, struct vnode *, RF_ComponentLabel_t *);
    196 static int raidread_component_label(unsigned,
    197     dev_t, struct vnode *, RF_ComponentLabel_t *);
    198 
    199 
    200 static dev_type_open(raidopen);
    201 static dev_type_close(raidclose);
    202 static dev_type_read(raidread);
    203 static dev_type_write(raidwrite);
    204 static dev_type_ioctl(raidioctl);
    205 static dev_type_strategy(raidstrategy);
    206 static dev_type_dump(raiddump);
    207 static dev_type_size(raidsize);
    208 
    209 const struct bdevsw raid_bdevsw = {
    210 	.d_open = raidopen,
    211 	.d_close = raidclose,
    212 	.d_strategy = raidstrategy,
    213 	.d_ioctl = raidioctl,
    214 	.d_dump = raiddump,
    215 	.d_psize = raidsize,
    216 	.d_discard = nodiscard,
    217 	.d_flag = D_DISK
    218 };
    219 
    220 const struct cdevsw raid_cdevsw = {
    221 	.d_open = raidopen,
    222 	.d_close = raidclose,
    223 	.d_read = raidread,
    224 	.d_write = raidwrite,
    225 	.d_ioctl = raidioctl,
    226 	.d_stop = nostop,
    227 	.d_tty = notty,
    228 	.d_poll = nopoll,
    229 	.d_mmap = nommap,
    230 	.d_kqfilter = nokqfilter,
    231 	.d_discard = nodiscard,
    232 	.d_flag = D_DISK
    233 };
    234 
    235 static struct dkdriver rf_dkdriver = {
    236 	.d_strategy = raidstrategy,
    237 	.d_minphys = minphys
    238 };
    239 
    240 struct raid_softc {
    241 	device_t sc_dev;
    242 	int	sc_unit;
    243 	int     sc_flags;	/* flags */
    244 	int     sc_cflags;	/* configuration flags */
    245 	kmutex_t sc_mutex;	/* interlock mutex */
    246 	kcondvar_t sc_cv;	/* and the condvar */
    247 	uint64_t sc_size;	/* size of the raid device */
    248 	char    sc_xname[20];	/* XXX external name */
    249 	struct disk sc_dkdev;	/* generic disk device info */
    250 	struct bufq_state *buf_queue;	/* used for the device queue */
    251 	RF_Raid_t sc_r;
    252 	LIST_ENTRY(raid_softc) sc_link;
    253 };
    254 /* sc_flags */
    255 #define RAIDF_INITED	0x01	/* unit has been initialized */
    256 #define RAIDF_WLABEL	0x02	/* label area is writable */
    257 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    258 #define RAIDF_SHUTDOWN	0x08	/* unit is being shutdown */
    259 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    260 #define RAIDF_LOCKED	0x80	/* unit is locked */
    261 
    262 #define	raidunit(x)	DISKUNIT(x)
    263 
    264 extern struct cfdriver raid_cd;
    265 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
    266     raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
    267     DVF_DETACH_SHUTDOWN);
    268 
    269 /*
    270  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    271  * Be aware that large numbers can allow the driver to consume a lot of
    272  * kernel memory, especially on writes, and in degraded mode reads.
    273  *
    274  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    275  * a single 64K write will typically require 64K for the old data,
    276  * 64K for the old parity, and 64K for the new parity, for a total
    277  * of 192K (if the parity buffer is not re-used immediately).
    278  * Even it if is used immediately, that's still 128K, which when multiplied
    279  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    280  *
    281  * Now in degraded mode, for example, a 64K read on the above setup may
    282  * require data reconstruction, which will require *all* of the 4 remaining
    283  * disks to participate -- 4 * 32K/disk == 128K again.
    284  */
    285 
    286 #ifndef RAIDOUTSTANDING
    287 #define RAIDOUTSTANDING   6
    288 #endif
    289 
    290 #define RAIDLABELDEV(dev)	\
    291 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    292 
    293 /* declared here, and made public, for the benefit of KVM stuff.. */
    294 
    295 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    296 				     struct disklabel *);
    297 static void raidgetdisklabel(dev_t);
    298 static void raidmakedisklabel(struct raid_softc *);
    299 
    300 static int raidlock(struct raid_softc *);
    301 static void raidunlock(struct raid_softc *);
    302 
    303 static int raid_detach_unlocked(struct raid_softc *);
    304 
    305 static void rf_markalldirty(RF_Raid_t *);
    306 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
    307 
    308 void rf_ReconThread(struct rf_recon_req *);
    309 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    310 void rf_CopybackThread(RF_Raid_t *raidPtr);
    311 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    312 int rf_autoconfig(device_t);
    313 void rf_buildroothack(RF_ConfigSet_t *);
    314 
    315 RF_AutoConfig_t *rf_find_raid_components(void);
    316 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    317 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    318 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
    319 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    320 int rf_set_autoconfig(RF_Raid_t *, int);
    321 int rf_set_rootpartition(RF_Raid_t *, int);
    322 void rf_release_all_vps(RF_ConfigSet_t *);
    323 void rf_cleanup_config_set(RF_ConfigSet_t *);
    324 int rf_have_enough_components(RF_ConfigSet_t *);
    325 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
    326 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
    327 
    328 /*
    329  * Debugging, mostly.  Set to 0 to not allow autoconfig to take place.
    330  * Note that this is overridden by having RAID_AUTOCONFIG as an option
    331  * in the kernel config file.
    332  */
    333 #ifdef RAID_AUTOCONFIG
    334 int raidautoconfig = 1;
    335 #else
    336 int raidautoconfig = 0;
    337 #endif
    338 static bool raidautoconfigdone = false;
    339 
    340 struct RF_Pools_s rf_pools;
    341 
    342 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
    343 static kmutex_t raid_lock;
    344 
    345 static struct raid_softc *
    346 raidcreate(int unit) {
    347 	struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
    348 	if (sc == NULL) {
    349 #ifdef DIAGNOSTIC
    350 		printf("%s: out of memory\n", __func__);
    351 #endif
    352 		return NULL;
    353 	}
    354 	sc->sc_unit = unit;
    355 	bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK);
    356 	cv_init(&sc->sc_cv, "raidunit");
    357 	mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
    358 	return sc;
    359 }
    360 
    361 static void
    362 raiddestroy(struct raid_softc *sc) {
    363 	cv_destroy(&sc->sc_cv);
    364 	mutex_destroy(&sc->sc_mutex);
    365 	bufq_free(sc->buf_queue);
    366 	kmem_free(sc, sizeof(*sc));
    367 }
    368 
    369 static struct raid_softc *
    370 raidget(int unit, bool create) {
    371 	struct raid_softc *sc;
    372 	if (unit < 0) {
    373 #ifdef DIAGNOSTIC
    374 		panic("%s: unit %d!", __func__, unit);
    375 #endif
    376 		return NULL;
    377 	}
    378 	mutex_enter(&raid_lock);
    379 	LIST_FOREACH(sc, &raids, sc_link) {
    380 		if (sc->sc_unit == unit) {
    381 			mutex_exit(&raid_lock);
    382 			return sc;
    383 		}
    384 	}
    385 	mutex_exit(&raid_lock);
    386 	if (!create)
    387 		return NULL;
    388 	if ((sc = raidcreate(unit)) == NULL)
    389 		return NULL;
    390 	mutex_enter(&raid_lock);
    391 	LIST_INSERT_HEAD(&raids, sc, sc_link);
    392 	mutex_exit(&raid_lock);
    393 	return sc;
    394 }
    395 
    396 static void
    397 raidput(struct raid_softc *sc) {
    398 	mutex_enter(&raid_lock);
    399 	LIST_REMOVE(sc, sc_link);
    400 	mutex_exit(&raid_lock);
    401 	raiddestroy(sc);
    402 }
    403 
    404 void
    405 raidattach(int num)
    406 {
    407 
    408 	/*
    409 	 * Device attachment and associated initialization now occurs
    410 	 * as part of the module initialization.
    411 	 */
    412 }
    413 
    414 int
    415 rf_autoconfig(device_t self)
    416 {
    417 	RF_AutoConfig_t *ac_list;
    418 	RF_ConfigSet_t *config_sets;
    419 
    420 	if (!raidautoconfig || raidautoconfigdone == true)
    421 		return (0);
    422 
    423 	/* XXX This code can only be run once. */
    424 	raidautoconfigdone = true;
    425 
    426 #ifdef __HAVE_CPU_BOOTCONF
    427 	/*
    428 	 * 0. find the boot device if needed first so we can use it later
    429 	 * this needs to be done before we autoconfigure any raid sets,
    430 	 * because if we use wedges we are not going to be able to open
    431 	 * the boot device later
    432 	 */
    433 	if (booted_device == NULL)
    434 		cpu_bootconf();
    435 #endif
    436 	/* 1. locate all RAID components on the system */
    437 	aprint_debug("Searching for RAID components...\n");
    438 	ac_list = rf_find_raid_components();
    439 
    440 	/* 2. Sort them into their respective sets. */
    441 	config_sets = rf_create_auto_sets(ac_list);
    442 
    443 	/*
    444 	 * 3. Evaluate each set and configure the valid ones.
    445 	 * This gets done in rf_buildroothack().
    446 	 */
    447 	rf_buildroothack(config_sets);
    448 
    449 	return 1;
    450 }
    451 
    452 static int
    453 rf_containsboot(RF_Raid_t *r, device_t bdv) {
    454 	const char *bootname = device_xname(bdv);
    455 	size_t len = strlen(bootname);
    456 
    457 	for (int col = 0; col < r->numCol; col++) {
    458 		const char *devname = r->Disks[col].devname;
    459 		devname += sizeof("/dev/") - 1;
    460 		if (strncmp(devname, "dk", 2) == 0) {
    461 			const char *parent =
    462 			    dkwedge_get_parent_name(r->Disks[col].dev);
    463 			if (parent != NULL)
    464 				devname = parent;
    465 		}
    466 		if (strncmp(devname, bootname, len) == 0) {
    467 			struct raid_softc *sc = r->softc;
    468 			aprint_debug("raid%d includes boot device %s\n",
    469 			    sc->sc_unit, devname);
    470 			return 1;
    471 		}
    472 	}
    473 	return 0;
    474 }
    475 
    476 void
    477 rf_buildroothack(RF_ConfigSet_t *config_sets)
    478 {
    479 	RF_ConfigSet_t *cset;
    480 	RF_ConfigSet_t *next_cset;
    481 	int num_root;
    482 	struct raid_softc *sc, *rsc;
    483 
    484 	sc = rsc = NULL;
    485 	num_root = 0;
    486 	cset = config_sets;
    487 	while (cset != NULL) {
    488 		next_cset = cset->next;
    489 		if (rf_have_enough_components(cset) &&
    490 		    cset->ac->clabel->autoconfigure == 1) {
    491 			sc = rf_auto_config_set(cset);
    492 			if (sc != NULL) {
    493 				aprint_debug("raid%d: configured ok\n",
    494 				    sc->sc_unit);
    495 				if (cset->rootable) {
    496 					rsc = sc;
    497 					num_root++;
    498 				}
    499 			} else {
    500 				/* The autoconfig didn't work :( */
    501 				aprint_debug("Autoconfig failed\n");
    502 				rf_release_all_vps(cset);
    503 			}
    504 		} else {
    505 			/* we're not autoconfiguring this set...
    506 			   release the associated resources */
    507 			rf_release_all_vps(cset);
    508 		}
    509 		/* cleanup */
    510 		rf_cleanup_config_set(cset);
    511 		cset = next_cset;
    512 	}
    513 
    514 	/* if the user has specified what the root device should be
    515 	   then we don't touch booted_device or boothowto... */
    516 
    517 	if (rootspec != NULL)
    518 		return;
    519 
    520 	/* we found something bootable... */
    521 
    522 	/*
    523 	 * XXX: The following code assumes that the root raid
    524 	 * is the first ('a') partition. This is about the best
    525 	 * we can do with a BSD disklabel, but we might be able
    526 	 * to do better with a GPT label, by setting a specified
    527 	 * attribute to indicate the root partition. We can then
    528 	 * stash the partition number in the r->root_partition
    529 	 * high bits (the bottom 2 bits are already used). For
    530 	 * now we just set booted_partition to 0 when we override
    531 	 * root.
    532 	 */
    533 	if (num_root == 1) {
    534 		device_t candidate_root;
    535 		if (rsc->sc_dkdev.dk_nwedges != 0) {
    536 			char cname[sizeof(cset->ac->devname)];
    537 			/* XXX: assume 'a' */
    538 			snprintf(cname, sizeof(cname), "%s%c",
    539 			    device_xname(rsc->sc_dev), 'a');
    540 			candidate_root = dkwedge_find_by_wname(cname);
    541 		} else
    542 			candidate_root = rsc->sc_dev;
    543 		if (booted_device == NULL ||
    544 		    rsc->sc_r.root_partition == 1 ||
    545 		    rf_containsboot(&rsc->sc_r, booted_device)) {
    546 			booted_device = candidate_root;
    547 			booted_partition = 0;	/* XXX assume 'a' */
    548 		}
    549 	} else if (num_root > 1) {
    550 
    551 		/*
    552 		 * Maybe the MD code can help. If it cannot, then
    553 		 * setroot() will discover that we have no
    554 		 * booted_device and will ask the user if nothing was
    555 		 * hardwired in the kernel config file
    556 		 */
    557 		if (booted_device == NULL)
    558 			return;
    559 
    560 		num_root = 0;
    561 		mutex_enter(&raid_lock);
    562 		LIST_FOREACH(sc, &raids, sc_link) {
    563 			RF_Raid_t *r = &sc->sc_r;
    564 			if (r->valid == 0)
    565 				continue;
    566 
    567 			if (r->root_partition == 0)
    568 				continue;
    569 
    570 			if (rf_containsboot(r, booted_device)) {
    571 				num_root++;
    572 				rsc = sc;
    573 			}
    574 		}
    575 		mutex_exit(&raid_lock);
    576 
    577 		if (num_root == 1) {
    578 			booted_device = rsc->sc_dev;
    579 			booted_partition = 0;	/* XXX assume 'a' */
    580 		} else {
    581 			/* we can't guess.. require the user to answer... */
    582 			boothowto |= RB_ASKNAME;
    583 		}
    584 	}
    585 }
    586 
    587 static int
    588 raidsize(dev_t dev)
    589 {
    590 	struct raid_softc *rs;
    591 	struct disklabel *lp;
    592 	int     part, unit, omask, size;
    593 
    594 	unit = raidunit(dev);
    595 	if ((rs = raidget(unit, false)) == NULL)
    596 		return -1;
    597 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    598 		return (-1);
    599 
    600 	part = DISKPART(dev);
    601 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    602 	lp = rs->sc_dkdev.dk_label;
    603 
    604 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
    605 		return (-1);
    606 
    607 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    608 		size = -1;
    609 	else
    610 		size = lp->d_partitions[part].p_size *
    611 		    (lp->d_secsize / DEV_BSIZE);
    612 
    613 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
    614 		return (-1);
    615 
    616 	return (size);
    617 
    618 }
    619 
    620 static int
    621 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
    622 {
    623 	int     unit = raidunit(dev);
    624 	struct raid_softc *rs;
    625 	const struct bdevsw *bdev;
    626 	struct disklabel *lp;
    627 	RF_Raid_t *raidPtr;
    628 	daddr_t offset;
    629 	int     part, c, sparecol, j, scol, dumpto;
    630 	int     error = 0;
    631 
    632 	if ((rs = raidget(unit, false)) == NULL)
    633 		return ENXIO;
    634 
    635 	raidPtr = &rs->sc_r;
    636 
    637 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    638 		return ENXIO;
    639 
    640 	/* we only support dumping to RAID 1 sets */
    641 	if (raidPtr->Layout.numDataCol != 1 ||
    642 	    raidPtr->Layout.numParityCol != 1)
    643 		return EINVAL;
    644 
    645 	if ((error = raidlock(rs)) != 0)
    646 		return error;
    647 
    648 	if (size % DEV_BSIZE != 0) {
    649 		error = EINVAL;
    650 		goto out;
    651 	}
    652 
    653 	if (blkno + size / DEV_BSIZE > rs->sc_size) {
    654 		printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
    655 		    "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
    656 		    size / DEV_BSIZE, rs->sc_size);
    657 		error = EINVAL;
    658 		goto out;
    659 	}
    660 
    661 	part = DISKPART(dev);
    662 	lp = rs->sc_dkdev.dk_label;
    663 	offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
    664 
    665 	/* figure out what device is alive.. */
    666 
    667 	/*
    668 	   Look for a component to dump to.  The preference for the
    669 	   component to dump to is as follows:
    670 	   1) the master
    671 	   2) a used_spare of the master
    672 	   3) the slave
    673 	   4) a used_spare of the slave
    674 	*/
    675 
    676 	dumpto = -1;
    677 	for (c = 0; c < raidPtr->numCol; c++) {
    678 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
    679 			/* this might be the one */
    680 			dumpto = c;
    681 			break;
    682 		}
    683 	}
    684 
    685 	/*
    686 	   At this point we have possibly selected a live master or a
    687 	   live slave.  We now check to see if there is a spared
    688 	   master (or a spared slave), if we didn't find a live master
    689 	   or a live slave.
    690 	*/
    691 
    692 	for (c = 0; c < raidPtr->numSpare; c++) {
    693 		sparecol = raidPtr->numCol + c;
    694 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
    695 			/* How about this one? */
    696 			scol = -1;
    697 			for(j=0;j<raidPtr->numCol;j++) {
    698 				if (raidPtr->Disks[j].spareCol == sparecol) {
    699 					scol = j;
    700 					break;
    701 				}
    702 			}
    703 			if (scol == 0) {
    704 				/*
    705 				   We must have found a spared master!
    706 				   We'll take that over anything else
    707 				   found so far.  (We couldn't have
    708 				   found a real master before, since
    709 				   this is a used spare, and it's
    710 				   saying that it's replacing the
    711 				   master.)  On reboot (with
    712 				   autoconfiguration turned on)
    713 				   sparecol will become the 1st
    714 				   component (component0) of this set.
    715 				*/
    716 				dumpto = sparecol;
    717 				break;
    718 			} else if (scol != -1) {
    719 				/*
    720 				   Must be a spared slave.  We'll dump
    721 				   to that if we havn't found anything
    722 				   else so far.
    723 				*/
    724 				if (dumpto == -1)
    725 					dumpto = sparecol;
    726 			}
    727 		}
    728 	}
    729 
    730 	if (dumpto == -1) {
    731 		/* we couldn't find any live components to dump to!?!?
    732 		 */
    733 		error = EINVAL;
    734 		goto out;
    735 	}
    736 
    737 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
    738 
    739 	/*
    740 	   Note that blkno is relative to this particular partition.
    741 	   By adding the offset of this partition in the RAID
    742 	   set, and also adding RF_PROTECTED_SECTORS, we get a
    743 	   value that is relative to the partition used for the
    744 	   underlying component.
    745 	*/
    746 
    747 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
    748 				blkno + offset, va, size);
    749 
    750 out:
    751 	raidunlock(rs);
    752 
    753 	return error;
    754 }
    755 
    756 /* ARGSUSED */
    757 static int
    758 raidopen(dev_t dev, int flags, int fmt,
    759     struct lwp *l)
    760 {
    761 	int     unit = raidunit(dev);
    762 	struct raid_softc *rs;
    763 	struct disklabel *lp;
    764 	int     part, pmask;
    765 	int     error = 0;
    766 
    767 	if ((rs = raidget(unit, true)) == NULL)
    768 		return ENXIO;
    769 	if ((error = raidlock(rs)) != 0)
    770 		return (error);
    771 
    772 	if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
    773 		error = EBUSY;
    774 		goto bad;
    775 	}
    776 
    777 	lp = rs->sc_dkdev.dk_label;
    778 
    779 	part = DISKPART(dev);
    780 
    781 	/*
    782 	 * If there are wedges, and this is not RAW_PART, then we
    783 	 * need to fail.
    784 	 */
    785 	if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
    786 		error = EBUSY;
    787 		goto bad;
    788 	}
    789 	pmask = (1 << part);
    790 
    791 	if ((rs->sc_flags & RAIDF_INITED) &&
    792 	    (rs->sc_dkdev.dk_nwedges == 0) &&
    793 	    (rs->sc_dkdev.dk_openmask == 0))
    794 		raidgetdisklabel(dev);
    795 
    796 	/* make sure that this partition exists */
    797 
    798 	if (part != RAW_PART) {
    799 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    800 		    ((part >= lp->d_npartitions) ||
    801 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    802 			error = ENXIO;
    803 			goto bad;
    804 		}
    805 	}
    806 	/* Prevent this unit from being unconfigured while open. */
    807 	switch (fmt) {
    808 	case S_IFCHR:
    809 		rs->sc_dkdev.dk_copenmask |= pmask;
    810 		break;
    811 
    812 	case S_IFBLK:
    813 		rs->sc_dkdev.dk_bopenmask |= pmask;
    814 		break;
    815 	}
    816 
    817 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    818 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    819 		/* First one... mark things as dirty... Note that we *MUST*
    820 		 have done a configure before this.  I DO NOT WANT TO BE
    821 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    822 		 THAT THEY BELONG TOGETHER!!!!! */
    823 		/* XXX should check to see if we're only open for reading
    824 		   here... If so, we needn't do this, but then need some
    825 		   other way of keeping track of what's happened.. */
    826 
    827 		rf_markalldirty(&rs->sc_r);
    828 	}
    829 
    830 
    831 	rs->sc_dkdev.dk_openmask =
    832 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    833 
    834 bad:
    835 	raidunlock(rs);
    836 
    837 	return (error);
    838 
    839 
    840 }
    841 
    842 /* ARGSUSED */
    843 static int
    844 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
    845 {
    846 	int     unit = raidunit(dev);
    847 	struct raid_softc *rs;
    848 	int     error = 0;
    849 	int     part;
    850 
    851 	if ((rs = raidget(unit, false)) == NULL)
    852 		return ENXIO;
    853 
    854 	if ((error = raidlock(rs)) != 0)
    855 		return (error);
    856 
    857 	part = DISKPART(dev);
    858 
    859 	/* ...that much closer to allowing unconfiguration... */
    860 	switch (fmt) {
    861 	case S_IFCHR:
    862 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    863 		break;
    864 
    865 	case S_IFBLK:
    866 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    867 		break;
    868 	}
    869 	rs->sc_dkdev.dk_openmask =
    870 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    871 
    872 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    873 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    874 		/* Last one... device is not unconfigured yet.
    875 		   Device shutdown has taken care of setting the
    876 		   clean bits if RAIDF_INITED is not set
    877 		   mark things as clean... */
    878 
    879 		rf_update_component_labels(&rs->sc_r,
    880 						 RF_FINAL_COMPONENT_UPDATE);
    881 	}
    882 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    883 	    ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)) {
    884 		/*
    885 		 * Detach this raid unit
    886 		 */
    887 		cfdata_t cf = NULL;
    888 		int retcode = 0;
    889 
    890 		if (rs->sc_dev != NULL) {
    891 			cf = device_cfdata(rs->sc_dev);
    892 
    893 			raidunlock(rs);
    894 			retcode = config_detach(rs->sc_dev, DETACH_QUIET);
    895 			if (retcode == 0)
    896 				/* free the pseudo device attach bits */
    897 				free(cf, M_RAIDFRAME);
    898 		} else {
    899 			raidput(rs);
    900 		}
    901 		return retcode;
    902 	}
    903 
    904 	raidunlock(rs);
    905 	return (0);
    906 }
    907 
    908 static void
    909 raidstrategy(struct buf *bp)
    910 {
    911 	unsigned int unit = raidunit(bp->b_dev);
    912 	RF_Raid_t *raidPtr;
    913 	int     wlabel;
    914 	struct raid_softc *rs;
    915 
    916 	if ((rs = raidget(unit, false)) == NULL) {
    917 		bp->b_error = ENXIO;
    918 		goto done;
    919 	}
    920 	if ((rs->sc_flags & RAIDF_INITED) == 0) {
    921 		bp->b_error = ENXIO;
    922 		goto done;
    923 	}
    924 	raidPtr = &rs->sc_r;
    925 	if (!raidPtr->valid) {
    926 		bp->b_error = ENODEV;
    927 		goto done;
    928 	}
    929 	if (bp->b_bcount == 0) {
    930 		db1_printf(("b_bcount is zero..\n"));
    931 		goto done;
    932 	}
    933 
    934 	/*
    935 	 * Do bounds checking and adjust transfer.  If there's an
    936 	 * error, the bounds check will flag that for us.
    937 	 */
    938 
    939 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    940 	if (DISKPART(bp->b_dev) == RAW_PART) {
    941 		uint64_t size; /* device size in DEV_BSIZE unit */
    942 
    943 		if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
    944 			size = raidPtr->totalSectors <<
    945 			    (raidPtr->logBytesPerSector - DEV_BSHIFT);
    946 		} else {
    947 			size = raidPtr->totalSectors >>
    948 			    (DEV_BSHIFT - raidPtr->logBytesPerSector);
    949 		}
    950 		if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
    951 			goto done;
    952 		}
    953 	} else {
    954 		if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
    955 			db1_printf(("Bounds check failed!!:%d %d\n",
    956 				(int) bp->b_blkno, (int) wlabel));
    957 			goto done;
    958 		}
    959 	}
    960 
    961 	rf_lock_mutex2(raidPtr->iodone_lock);
    962 
    963 	bp->b_resid = 0;
    964 
    965 	/* stuff it onto our queue */
    966 	bufq_put(rs->buf_queue, bp);
    967 
    968 	/* scheduled the IO to happen at the next convenient time */
    969 	rf_signal_cond2(raidPtr->iodone_cv);
    970 	rf_unlock_mutex2(raidPtr->iodone_lock);
    971 
    972 	return;
    973 
    974 done:
    975 	bp->b_resid = bp->b_bcount;
    976 	biodone(bp);
    977 }
    978 
    979 /* ARGSUSED */
    980 static int
    981 raidread(dev_t dev, struct uio *uio, int flags)
    982 {
    983 	int     unit = raidunit(dev);
    984 	struct raid_softc *rs;
    985 
    986 	if ((rs = raidget(unit, false)) == NULL)
    987 		return ENXIO;
    988 
    989 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    990 		return (ENXIO);
    991 
    992 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    993 
    994 }
    995 
    996 /* ARGSUSED */
    997 static int
    998 raidwrite(dev_t dev, struct uio *uio, int flags)
    999 {
   1000 	int     unit = raidunit(dev);
   1001 	struct raid_softc *rs;
   1002 
   1003 	if ((rs = raidget(unit, false)) == NULL)
   1004 		return ENXIO;
   1005 
   1006 	if ((rs->sc_flags & RAIDF_INITED) == 0)
   1007 		return (ENXIO);
   1008 
   1009 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
   1010 
   1011 }
   1012 
   1013 static int
   1014 raid_detach_unlocked(struct raid_softc *rs)
   1015 {
   1016 	int error;
   1017 	RF_Raid_t *raidPtr;
   1018 
   1019 	raidPtr = &rs->sc_r;
   1020 
   1021 	/*
   1022 	 * If somebody has a partition mounted, we shouldn't
   1023 	 * shutdown.
   1024 	 */
   1025 	if (rs->sc_dkdev.dk_openmask != 0)
   1026 		return EBUSY;
   1027 
   1028 	if ((rs->sc_flags & RAIDF_INITED) == 0)
   1029 		;	/* not initialized: nothing to do */
   1030 	else if ((error = rf_Shutdown(raidPtr)) != 0)
   1031 		return error;
   1032 	else
   1033 		rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
   1034 
   1035 	/* Detach the disk. */
   1036 	dkwedge_delall(&rs->sc_dkdev);
   1037 	disk_detach(&rs->sc_dkdev);
   1038 	disk_destroy(&rs->sc_dkdev);
   1039 
   1040 	/* Free the softc */
   1041 	raidput(rs);
   1042 
   1043 	aprint_normal_dev(rs->sc_dev, "detached\n");
   1044 
   1045 	return 0;
   1046 }
   1047 
   1048 static int
   1049 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
   1050 {
   1051 	int     unit = raidunit(dev);
   1052 	int     error = 0;
   1053 	int     part, pmask, s;
   1054 	cfdata_t cf;
   1055 	struct raid_softc *rs;
   1056 	RF_Config_t *k_cfg, *u_cfg;
   1057 	RF_Raid_t *raidPtr;
   1058 	RF_RaidDisk_t *diskPtr;
   1059 	RF_AccTotals_t *totals;
   1060 	RF_DeviceConfig_t *d_cfg, **ucfgp;
   1061 	u_char *specific_buf;
   1062 	int retcode = 0;
   1063 	int column;
   1064 /*	int raidid; */
   1065 	struct rf_recon_req *rrcopy, *rr;
   1066 	RF_ComponentLabel_t *clabel;
   1067 	RF_ComponentLabel_t *ci_label;
   1068 	RF_ComponentLabel_t **clabel_ptr;
   1069 	RF_SingleComponent_t *sparePtr,*componentPtr;
   1070 	RF_SingleComponent_t component;
   1071 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
   1072 	int i, j, d;
   1073 #ifdef __HAVE_OLD_DISKLABEL
   1074 	struct disklabel newlabel;
   1075 #endif
   1076 
   1077 	if ((rs = raidget(unit, false)) == NULL)
   1078 		return ENXIO;
   1079 	raidPtr = &rs->sc_r;
   1080 
   1081 	db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
   1082 		(int) DISKPART(dev), (int) unit, cmd));
   1083 
   1084 	/* Must be open for writes for these commands... */
   1085 	switch (cmd) {
   1086 #ifdef DIOCGSECTORSIZE
   1087 	case DIOCGSECTORSIZE:
   1088 		*(u_int *)data = raidPtr->bytesPerSector;
   1089 		return 0;
   1090 	case DIOCGMEDIASIZE:
   1091 		*(off_t *)data =
   1092 		    (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
   1093 		return 0;
   1094 #endif
   1095 	case DIOCSDINFO:
   1096 	case DIOCWDINFO:
   1097 #ifdef __HAVE_OLD_DISKLABEL
   1098 	case ODIOCWDINFO:
   1099 	case ODIOCSDINFO:
   1100 #endif
   1101 	case DIOCWLABEL:
   1102 	case DIOCAWEDGE:
   1103 	case DIOCDWEDGE:
   1104 	case DIOCMWEDGES:
   1105 	case DIOCSSTRATEGY:
   1106 		if ((flag & FWRITE) == 0)
   1107 			return (EBADF);
   1108 	}
   1109 
   1110 	/* Must be initialized for these... */
   1111 	switch (cmd) {
   1112 	case DIOCGDINFO:
   1113 	case DIOCSDINFO:
   1114 	case DIOCWDINFO:
   1115 #ifdef __HAVE_OLD_DISKLABEL
   1116 	case ODIOCGDINFO:
   1117 	case ODIOCWDINFO:
   1118 	case ODIOCSDINFO:
   1119 	case ODIOCGDEFLABEL:
   1120 #endif
   1121 	case DIOCGPARTINFO:
   1122 	case DIOCWLABEL:
   1123 	case DIOCGDEFLABEL:
   1124 	case DIOCAWEDGE:
   1125 	case DIOCDWEDGE:
   1126 	case DIOCLWEDGES:
   1127 	case DIOCMWEDGES:
   1128 	case DIOCCACHESYNC:
   1129 	case RAIDFRAME_SHUTDOWN:
   1130 	case RAIDFRAME_REWRITEPARITY:
   1131 	case RAIDFRAME_GET_INFO:
   1132 	case RAIDFRAME_RESET_ACCTOTALS:
   1133 	case RAIDFRAME_GET_ACCTOTALS:
   1134 	case RAIDFRAME_KEEP_ACCTOTALS:
   1135 	case RAIDFRAME_GET_SIZE:
   1136 	case RAIDFRAME_FAIL_DISK:
   1137 	case RAIDFRAME_COPYBACK:
   1138 	case RAIDFRAME_CHECK_RECON_STATUS:
   1139 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1140 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1141 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1142 	case RAIDFRAME_ADD_HOT_SPARE:
   1143 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1144 	case RAIDFRAME_INIT_LABELS:
   1145 	case RAIDFRAME_REBUILD_IN_PLACE:
   1146 	case RAIDFRAME_CHECK_PARITY:
   1147 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1148 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1149 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1150 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1151 	case RAIDFRAME_SET_AUTOCONFIG:
   1152 	case RAIDFRAME_SET_ROOT:
   1153 	case RAIDFRAME_DELETE_COMPONENT:
   1154 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1155 	case RAIDFRAME_PARITYMAP_STATUS:
   1156 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
   1157 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
   1158 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
   1159 	case DIOCGSTRATEGY:
   1160 	case DIOCSSTRATEGY:
   1161 		if ((rs->sc_flags & RAIDF_INITED) == 0)
   1162 			return (ENXIO);
   1163 	}
   1164 
   1165 	switch (cmd) {
   1166 #ifdef COMPAT_50
   1167 	case RAIDFRAME_GET_INFO50:
   1168 		return rf_get_info50(raidPtr, data);
   1169 
   1170 	case RAIDFRAME_CONFIGURE50:
   1171 		if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
   1172 			return retcode;
   1173 		goto config;
   1174 #endif
   1175 		/* configure the system */
   1176 	case RAIDFRAME_CONFIGURE:
   1177 
   1178 		if (raidPtr->valid) {
   1179 			/* There is a valid RAID set running on this unit! */
   1180 			printf("raid%d: Device already configured!\n",unit);
   1181 			return(EINVAL);
   1182 		}
   1183 
   1184 		/* copy-in the configuration information */
   1185 		/* data points to a pointer to the configuration structure */
   1186 
   1187 		u_cfg = *((RF_Config_t **) data);
   1188 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
   1189 		if (k_cfg == NULL) {
   1190 			return (ENOMEM);
   1191 		}
   1192 		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
   1193 		if (retcode) {
   1194 			RF_Free(k_cfg, sizeof(RF_Config_t));
   1195 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
   1196 				retcode));
   1197 			goto no_config;
   1198 		}
   1199 		goto config;
   1200 	config:
   1201 		rs->sc_flags &= ~RAIDF_SHUTDOWN;
   1202 
   1203 		/* allocate a buffer for the layout-specific data, and copy it
   1204 		 * in */
   1205 		if (k_cfg->layoutSpecificSize) {
   1206 			if (k_cfg->layoutSpecificSize > 10000) {
   1207 				/* sanity check */
   1208 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1209 				retcode = EINVAL;
   1210 				goto no_config;
   1211 			}
   1212 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
   1213 			    (u_char *));
   1214 			if (specific_buf == NULL) {
   1215 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1216 				retcode = ENOMEM;
   1217 				goto no_config;
   1218 			}
   1219 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
   1220 			    k_cfg->layoutSpecificSize);
   1221 			if (retcode) {
   1222 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1223 				RF_Free(specific_buf,
   1224 					k_cfg->layoutSpecificSize);
   1225 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
   1226 					retcode));
   1227 				goto no_config;
   1228 			}
   1229 		} else
   1230 			specific_buf = NULL;
   1231 		k_cfg->layoutSpecific = specific_buf;
   1232 
   1233 		/* should do some kind of sanity check on the configuration.
   1234 		 * Store the sum of all the bytes in the last byte? */
   1235 
   1236 		/* configure the system */
   1237 
   1238 		/*
   1239 		 * Clear the entire RAID descriptor, just to make sure
   1240 		 *  there is no stale data left in the case of a
   1241 		 *  reconfiguration
   1242 		 */
   1243 		memset(raidPtr, 0, sizeof(*raidPtr));
   1244 		raidPtr->softc = rs;
   1245 		raidPtr->raidid = unit;
   1246 
   1247 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
   1248 
   1249 		if (retcode == 0) {
   1250 
   1251 			/* allow this many simultaneous IO's to
   1252 			   this RAID device */
   1253 			raidPtr->openings = RAIDOUTSTANDING;
   1254 
   1255 			raidinit(rs);
   1256 			rf_markalldirty(raidPtr);
   1257 		}
   1258 		/* free the buffers.  No return code here. */
   1259 		if (k_cfg->layoutSpecificSize) {
   1260 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
   1261 		}
   1262 		RF_Free(k_cfg, sizeof(RF_Config_t));
   1263 
   1264 	no_config:
   1265 		/*
   1266 		 * If configuration failed, set sc_flags so that we
   1267 		 * will detach the device when we close it.
   1268 		 */
   1269 		if (retcode != 0)
   1270 			rs->sc_flags |= RAIDF_SHUTDOWN;
   1271 		return (retcode);
   1272 
   1273 		/* shutdown the system */
   1274 	case RAIDFRAME_SHUTDOWN:
   1275 
   1276 		part = DISKPART(dev);
   1277 		pmask = (1 << part);
   1278 
   1279 		if ((error = raidlock(rs)) != 0)
   1280 			return (error);
   1281 
   1282 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
   1283 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
   1284 			(rs->sc_dkdev.dk_copenmask & pmask)))
   1285 			retcode = EBUSY;
   1286 		else {
   1287 			rs->sc_flags |= RAIDF_SHUTDOWN;
   1288 			rs->sc_dkdev.dk_copenmask &= ~pmask;
   1289 			rs->sc_dkdev.dk_bopenmask &= ~pmask;
   1290 			rs->sc_dkdev.dk_openmask &= ~pmask;
   1291 			retcode = 0;
   1292 		}
   1293 
   1294 		raidunlock(rs);
   1295 
   1296 		if (retcode != 0)
   1297 			return retcode;
   1298 
   1299 		/* free the pseudo device attach bits */
   1300 
   1301 		cf = device_cfdata(rs->sc_dev);
   1302 		if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
   1303 			free(cf, M_RAIDFRAME);
   1304 
   1305 		return (retcode);
   1306 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1307 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1308 		/* need to read the component label for the disk indicated
   1309 		   by row,column in clabel */
   1310 
   1311 		/*
   1312 		 * Perhaps there should be an option to skip the in-core
   1313 		 * copy and hit the disk, as with disklabel(8).
   1314 		 */
   1315 		RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
   1316 
   1317 		retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
   1318 
   1319 		if (retcode) {
   1320 			RF_Free(clabel, sizeof(*clabel));
   1321 			return retcode;
   1322 		}
   1323 
   1324 		clabel->row = 0; /* Don't allow looking at anything else.*/
   1325 
   1326 		column = clabel->column;
   1327 
   1328 		if ((column < 0) || (column >= raidPtr->numCol +
   1329 		    raidPtr->numSpare)) {
   1330 			RF_Free(clabel, sizeof(*clabel));
   1331 			return EINVAL;
   1332 		}
   1333 
   1334 		RF_Free(clabel, sizeof(*clabel));
   1335 
   1336 		clabel = raidget_component_label(raidPtr, column);
   1337 
   1338 		return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
   1339 
   1340 #if 0
   1341 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1342 		clabel = (RF_ComponentLabel_t *) data;
   1343 
   1344 		/* XXX check the label for valid stuff... */
   1345 		/* Note that some things *should not* get modified --
   1346 		   the user should be re-initing the labels instead of
   1347 		   trying to patch things.
   1348 		   */
   1349 
   1350 		raidid = raidPtr->raidid;
   1351 #ifdef DEBUG
   1352 		printf("raid%d: Got component label:\n", raidid);
   1353 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1354 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1355 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1356 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1357 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1358 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1359 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1360 #endif
   1361 		clabel->row = 0;
   1362 		column = clabel->column;
   1363 
   1364 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1365 			return(EINVAL);
   1366 		}
   1367 
   1368 		/* XXX this isn't allowed to do anything for now :-) */
   1369 
   1370 		/* XXX and before it is, we need to fill in the rest
   1371 		   of the fields!?!?!?! */
   1372 		memcpy(raidget_component_label(raidPtr, column),
   1373 		    clabel, sizeof(*clabel));
   1374 		raidflush_component_label(raidPtr, column);
   1375 		return (0);
   1376 #endif
   1377 
   1378 	case RAIDFRAME_INIT_LABELS:
   1379 		clabel = (RF_ComponentLabel_t *) data;
   1380 		/*
   1381 		   we only want the serial number from
   1382 		   the above.  We get all the rest of the information
   1383 		   from the config that was used to create this RAID
   1384 		   set.
   1385 		   */
   1386 
   1387 		raidPtr->serial_number = clabel->serial_number;
   1388 
   1389 		for(column=0;column<raidPtr->numCol;column++) {
   1390 			diskPtr = &raidPtr->Disks[column];
   1391 			if (!RF_DEAD_DISK(diskPtr->status)) {
   1392 				ci_label = raidget_component_label(raidPtr,
   1393 				    column);
   1394 				/* Zeroing this is important. */
   1395 				memset(ci_label, 0, sizeof(*ci_label));
   1396 				raid_init_component_label(raidPtr, ci_label);
   1397 				ci_label->serial_number =
   1398 				    raidPtr->serial_number;
   1399 				ci_label->row = 0; /* we dont' pretend to support more */
   1400 				rf_component_label_set_partitionsize(ci_label,
   1401 				    diskPtr->partitionSize);
   1402 				ci_label->column = column;
   1403 				raidflush_component_label(raidPtr, column);
   1404 			}
   1405 			/* XXXjld what about the spares? */
   1406 		}
   1407 
   1408 		return (retcode);
   1409 	case RAIDFRAME_SET_AUTOCONFIG:
   1410 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1411 		printf("raid%d: New autoconfig value is: %d\n",
   1412 		       raidPtr->raidid, d);
   1413 		*(int *) data = d;
   1414 		return (retcode);
   1415 
   1416 	case RAIDFRAME_SET_ROOT:
   1417 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1418 		printf("raid%d: New rootpartition value is: %d\n",
   1419 		       raidPtr->raidid, d);
   1420 		*(int *) data = d;
   1421 		return (retcode);
   1422 
   1423 		/* initialize all parity */
   1424 	case RAIDFRAME_REWRITEPARITY:
   1425 
   1426 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1427 			/* Parity for RAID 0 is trivially correct */
   1428 			raidPtr->parity_good = RF_RAID_CLEAN;
   1429 			return(0);
   1430 		}
   1431 
   1432 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1433 			/* Re-write is already in progress! */
   1434 			return(EINVAL);
   1435 		}
   1436 
   1437 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1438 					   rf_RewriteParityThread,
   1439 					   raidPtr,"raid_parity");
   1440 		return (retcode);
   1441 
   1442 
   1443 	case RAIDFRAME_ADD_HOT_SPARE:
   1444 		sparePtr = (RF_SingleComponent_t *) data;
   1445 		memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
   1446 		retcode = rf_add_hot_spare(raidPtr, &component);
   1447 		return(retcode);
   1448 
   1449 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1450 		return(retcode);
   1451 
   1452 	case RAIDFRAME_DELETE_COMPONENT:
   1453 		componentPtr = (RF_SingleComponent_t *)data;
   1454 		memcpy( &component, componentPtr,
   1455 			sizeof(RF_SingleComponent_t));
   1456 		retcode = rf_delete_component(raidPtr, &component);
   1457 		return(retcode);
   1458 
   1459 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1460 		componentPtr = (RF_SingleComponent_t *)data;
   1461 		memcpy( &component, componentPtr,
   1462 			sizeof(RF_SingleComponent_t));
   1463 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1464 		return(retcode);
   1465 
   1466 	case RAIDFRAME_REBUILD_IN_PLACE:
   1467 
   1468 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1469 			/* Can't do this on a RAID 0!! */
   1470 			return(EINVAL);
   1471 		}
   1472 
   1473 		if (raidPtr->recon_in_progress == 1) {
   1474 			/* a reconstruct is already in progress! */
   1475 			return(EINVAL);
   1476 		}
   1477 
   1478 		componentPtr = (RF_SingleComponent_t *) data;
   1479 		memcpy( &component, componentPtr,
   1480 			sizeof(RF_SingleComponent_t));
   1481 		component.row = 0; /* we don't support any more */
   1482 		column = component.column;
   1483 
   1484 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1485 			return(EINVAL);
   1486 		}
   1487 
   1488 		rf_lock_mutex2(raidPtr->mutex);
   1489 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
   1490 		    (raidPtr->numFailures > 0)) {
   1491 			/* XXX 0 above shouldn't be constant!!! */
   1492 			/* some component other than this has failed.
   1493 			   Let's not make things worse than they already
   1494 			   are... */
   1495 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1496 			       raidPtr->raidid);
   1497 			printf("raid%d:     Col: %d   Too many failures.\n",
   1498 			       raidPtr->raidid, column);
   1499 			rf_unlock_mutex2(raidPtr->mutex);
   1500 			return (EINVAL);
   1501 		}
   1502 		if (raidPtr->Disks[column].status ==
   1503 		    rf_ds_reconstructing) {
   1504 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1505 			       raidPtr->raidid);
   1506 			printf("raid%d:    Col: %d   Reconstruction already occurring!\n", raidPtr->raidid, column);
   1507 
   1508 			rf_unlock_mutex2(raidPtr->mutex);
   1509 			return (EINVAL);
   1510 		}
   1511 		if (raidPtr->Disks[column].status == rf_ds_spared) {
   1512 			rf_unlock_mutex2(raidPtr->mutex);
   1513 			return (EINVAL);
   1514 		}
   1515 		rf_unlock_mutex2(raidPtr->mutex);
   1516 
   1517 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1518 		if (rrcopy == NULL)
   1519 			return(ENOMEM);
   1520 
   1521 		rrcopy->raidPtr = (void *) raidPtr;
   1522 		rrcopy->col = column;
   1523 
   1524 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1525 					   rf_ReconstructInPlaceThread,
   1526 					   rrcopy,"raid_reconip");
   1527 		return(retcode);
   1528 
   1529 	case RAIDFRAME_GET_INFO:
   1530 		if (!raidPtr->valid)
   1531 			return (ENODEV);
   1532 		ucfgp = (RF_DeviceConfig_t **) data;
   1533 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1534 			  (RF_DeviceConfig_t *));
   1535 		if (d_cfg == NULL)
   1536 			return (ENOMEM);
   1537 		d_cfg->rows = 1; /* there is only 1 row now */
   1538 		d_cfg->cols = raidPtr->numCol;
   1539 		d_cfg->ndevs = raidPtr->numCol;
   1540 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1541 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1542 			return (ENOMEM);
   1543 		}
   1544 		d_cfg->nspares = raidPtr->numSpare;
   1545 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1546 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1547 			return (ENOMEM);
   1548 		}
   1549 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1550 		d = 0;
   1551 		for (j = 0; j < d_cfg->cols; j++) {
   1552 			d_cfg->devs[d] = raidPtr->Disks[j];
   1553 			d++;
   1554 		}
   1555 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1556 			d_cfg->spares[i] = raidPtr->Disks[j];
   1557 			if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
   1558 				/* XXX: raidctl(8) expects to see this as a used spare */
   1559 				d_cfg->spares[i].status = rf_ds_used_spare;
   1560 			}
   1561 		}
   1562 		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
   1563 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1564 
   1565 		return (retcode);
   1566 
   1567 	case RAIDFRAME_CHECK_PARITY:
   1568 		*(int *) data = raidPtr->parity_good;
   1569 		return (0);
   1570 
   1571 	case RAIDFRAME_PARITYMAP_STATUS:
   1572 		if (rf_paritymap_ineligible(raidPtr))
   1573 			return EINVAL;
   1574 		rf_paritymap_status(raidPtr->parity_map,
   1575 		    (struct rf_pmstat *)data);
   1576 		return 0;
   1577 
   1578 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
   1579 		if (rf_paritymap_ineligible(raidPtr))
   1580 			return EINVAL;
   1581 		if (raidPtr->parity_map == NULL)
   1582 			return ENOENT; /* ??? */
   1583 		if (0 != rf_paritymap_set_params(raidPtr->parity_map,
   1584 			(struct rf_pmparams *)data, 1))
   1585 			return EINVAL;
   1586 		return 0;
   1587 
   1588 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
   1589 		if (rf_paritymap_ineligible(raidPtr))
   1590 			return EINVAL;
   1591 		*(int *) data = rf_paritymap_get_disable(raidPtr);
   1592 		return 0;
   1593 
   1594 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
   1595 		if (rf_paritymap_ineligible(raidPtr))
   1596 			return EINVAL;
   1597 		rf_paritymap_set_disable(raidPtr, *(int *)data);
   1598 		/* XXX should errors be passed up? */
   1599 		return 0;
   1600 
   1601 	case RAIDFRAME_RESET_ACCTOTALS:
   1602 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1603 		return (0);
   1604 
   1605 	case RAIDFRAME_GET_ACCTOTALS:
   1606 		totals = (RF_AccTotals_t *) data;
   1607 		*totals = raidPtr->acc_totals;
   1608 		return (0);
   1609 
   1610 	case RAIDFRAME_KEEP_ACCTOTALS:
   1611 		raidPtr->keep_acc_totals = *(int *)data;
   1612 		return (0);
   1613 
   1614 	case RAIDFRAME_GET_SIZE:
   1615 		*(int *) data = raidPtr->totalSectors;
   1616 		return (0);
   1617 
   1618 		/* fail a disk & optionally start reconstruction */
   1619 	case RAIDFRAME_FAIL_DISK:
   1620 
   1621 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1622 			/* Can't do this on a RAID 0!! */
   1623 			return(EINVAL);
   1624 		}
   1625 
   1626 		rr = (struct rf_recon_req *) data;
   1627 		rr->row = 0;
   1628 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
   1629 			return (EINVAL);
   1630 
   1631 
   1632 		rf_lock_mutex2(raidPtr->mutex);
   1633 		if (raidPtr->status == rf_rs_reconstructing) {
   1634 			/* you can't fail a disk while we're reconstructing! */
   1635 			/* XXX wrong for RAID6 */
   1636 			rf_unlock_mutex2(raidPtr->mutex);
   1637 			return (EINVAL);
   1638 		}
   1639 		if ((raidPtr->Disks[rr->col].status ==
   1640 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
   1641 			/* some other component has failed.  Let's not make
   1642 			   things worse. XXX wrong for RAID6 */
   1643 			rf_unlock_mutex2(raidPtr->mutex);
   1644 			return (EINVAL);
   1645 		}
   1646 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
   1647 			/* Can't fail a spared disk! */
   1648 			rf_unlock_mutex2(raidPtr->mutex);
   1649 			return (EINVAL);
   1650 		}
   1651 		rf_unlock_mutex2(raidPtr->mutex);
   1652 
   1653 		/* make a copy of the recon request so that we don't rely on
   1654 		 * the user's buffer */
   1655 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1656 		if (rrcopy == NULL)
   1657 			return(ENOMEM);
   1658 		memcpy(rrcopy, rr, sizeof(*rr));
   1659 		rrcopy->raidPtr = (void *) raidPtr;
   1660 
   1661 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1662 					   rf_ReconThread,
   1663 					   rrcopy,"raid_recon");
   1664 		return (0);
   1665 
   1666 		/* invoke a copyback operation after recon on whatever disk
   1667 		 * needs it, if any */
   1668 	case RAIDFRAME_COPYBACK:
   1669 
   1670 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1671 			/* This makes no sense on a RAID 0!! */
   1672 			return(EINVAL);
   1673 		}
   1674 
   1675 		if (raidPtr->copyback_in_progress == 1) {
   1676 			/* Copyback is already in progress! */
   1677 			return(EINVAL);
   1678 		}
   1679 
   1680 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1681 					   rf_CopybackThread,
   1682 					   raidPtr,"raid_copyback");
   1683 		return (retcode);
   1684 
   1685 		/* return the percentage completion of reconstruction */
   1686 	case RAIDFRAME_CHECK_RECON_STATUS:
   1687 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1688 			/* This makes no sense on a RAID 0, so tell the
   1689 			   user it's done. */
   1690 			*(int *) data = 100;
   1691 			return(0);
   1692 		}
   1693 		if (raidPtr->status != rf_rs_reconstructing)
   1694 			*(int *) data = 100;
   1695 		else {
   1696 			if (raidPtr->reconControl->numRUsTotal > 0) {
   1697 				*(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
   1698 			} else {
   1699 				*(int *) data = 0;
   1700 			}
   1701 		}
   1702 		return (0);
   1703 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1704 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1705 		if (raidPtr->status != rf_rs_reconstructing) {
   1706 			progressInfo.remaining = 0;
   1707 			progressInfo.completed = 100;
   1708 			progressInfo.total = 100;
   1709 		} else {
   1710 			progressInfo.total =
   1711 				raidPtr->reconControl->numRUsTotal;
   1712 			progressInfo.completed =
   1713 				raidPtr->reconControl->numRUsComplete;
   1714 			progressInfo.remaining = progressInfo.total -
   1715 				progressInfo.completed;
   1716 		}
   1717 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1718 				  sizeof(RF_ProgressInfo_t));
   1719 		return (retcode);
   1720 
   1721 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1722 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1723 			/* This makes no sense on a RAID 0, so tell the
   1724 			   user it's done. */
   1725 			*(int *) data = 100;
   1726 			return(0);
   1727 		}
   1728 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1729 			*(int *) data = 100 *
   1730 				raidPtr->parity_rewrite_stripes_done /
   1731 				raidPtr->Layout.numStripe;
   1732 		} else {
   1733 			*(int *) data = 100;
   1734 		}
   1735 		return (0);
   1736 
   1737 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1738 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1739 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1740 			progressInfo.total = raidPtr->Layout.numStripe;
   1741 			progressInfo.completed =
   1742 				raidPtr->parity_rewrite_stripes_done;
   1743 			progressInfo.remaining = progressInfo.total -
   1744 				progressInfo.completed;
   1745 		} else {
   1746 			progressInfo.remaining = 0;
   1747 			progressInfo.completed = 100;
   1748 			progressInfo.total = 100;
   1749 		}
   1750 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1751 				  sizeof(RF_ProgressInfo_t));
   1752 		return (retcode);
   1753 
   1754 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1755 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1756 			/* This makes no sense on a RAID 0 */
   1757 			*(int *) data = 100;
   1758 			return(0);
   1759 		}
   1760 		if (raidPtr->copyback_in_progress == 1) {
   1761 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1762 				raidPtr->Layout.numStripe;
   1763 		} else {
   1764 			*(int *) data = 100;
   1765 		}
   1766 		return (0);
   1767 
   1768 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1769 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1770 		if (raidPtr->copyback_in_progress == 1) {
   1771 			progressInfo.total = raidPtr->Layout.numStripe;
   1772 			progressInfo.completed =
   1773 				raidPtr->copyback_stripes_done;
   1774 			progressInfo.remaining = progressInfo.total -
   1775 				progressInfo.completed;
   1776 		} else {
   1777 			progressInfo.remaining = 0;
   1778 			progressInfo.completed = 100;
   1779 			progressInfo.total = 100;
   1780 		}
   1781 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1782 				  sizeof(RF_ProgressInfo_t));
   1783 		return (retcode);
   1784 
   1785 		/* the sparetable daemon calls this to wait for the kernel to
   1786 		 * need a spare table. this ioctl does not return until a
   1787 		 * spare table is needed. XXX -- calling mpsleep here in the
   1788 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1789 		 * -- I should either compute the spare table in the kernel,
   1790 		 * or have a different -- XXX XXX -- interface (a different
   1791 		 * character device) for delivering the table     -- XXX */
   1792 #if 0
   1793 	case RAIDFRAME_SPARET_WAIT:
   1794 		rf_lock_mutex2(rf_sparet_wait_mutex);
   1795 		while (!rf_sparet_wait_queue)
   1796 			rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
   1797 		waitreq = rf_sparet_wait_queue;
   1798 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1799 		rf_unlock_mutex2(rf_sparet_wait_mutex);
   1800 
   1801 		/* structure assignment */
   1802 		*((RF_SparetWait_t *) data) = *waitreq;
   1803 
   1804 		RF_Free(waitreq, sizeof(*waitreq));
   1805 		return (0);
   1806 
   1807 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1808 		 * code in it that will cause the dameon to exit */
   1809 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1810 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1811 		waitreq->fcol = -1;
   1812 		rf_lock_mutex2(rf_sparet_wait_mutex);
   1813 		waitreq->next = rf_sparet_wait_queue;
   1814 		rf_sparet_wait_queue = waitreq;
   1815 		rf_broadcast_conf2(rf_sparet_wait_cv);
   1816 		rf_unlock_mutex2(rf_sparet_wait_mutex);
   1817 		return (0);
   1818 
   1819 		/* used by the spare table daemon to deliver a spare table
   1820 		 * into the kernel */
   1821 	case RAIDFRAME_SEND_SPARET:
   1822 
   1823 		/* install the spare table */
   1824 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1825 
   1826 		/* respond to the requestor.  the return status of the spare
   1827 		 * table installation is passed in the "fcol" field */
   1828 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1829 		waitreq->fcol = retcode;
   1830 		rf_lock_mutex2(rf_sparet_wait_mutex);
   1831 		waitreq->next = rf_sparet_resp_queue;
   1832 		rf_sparet_resp_queue = waitreq;
   1833 		rf_broadcast_cond2(rf_sparet_resp_cv);
   1834 		rf_unlock_mutex2(rf_sparet_wait_mutex);
   1835 
   1836 		return (retcode);
   1837 #endif
   1838 
   1839 	default:
   1840 		break; /* fall through to the os-specific code below */
   1841 
   1842 	}
   1843 
   1844 	if (!raidPtr->valid)
   1845 		return (EINVAL);
   1846 
   1847 	/*
   1848 	 * Add support for "regular" device ioctls here.
   1849 	 */
   1850 
   1851 	error = disk_ioctl(&rs->sc_dkdev, dev, cmd, data, flag, l);
   1852 	if (error != EPASSTHROUGH)
   1853 		return (error);
   1854 
   1855 	switch (cmd) {
   1856 	case DIOCWDINFO:
   1857 	case DIOCSDINFO:
   1858 #ifdef __HAVE_OLD_DISKLABEL
   1859 	case ODIOCWDINFO:
   1860 	case ODIOCSDINFO:
   1861 #endif
   1862 	{
   1863 		struct disklabel *lp;
   1864 #ifdef __HAVE_OLD_DISKLABEL
   1865 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1866 			memset(&newlabel, 0, sizeof newlabel);
   1867 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1868 			lp = &newlabel;
   1869 		} else
   1870 #endif
   1871 		lp = (struct disklabel *)data;
   1872 
   1873 		if ((error = raidlock(rs)) != 0)
   1874 			return (error);
   1875 
   1876 		rs->sc_flags |= RAIDF_LABELLING;
   1877 
   1878 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1879 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1880 		if (error == 0) {
   1881 			if (cmd == DIOCWDINFO
   1882 #ifdef __HAVE_OLD_DISKLABEL
   1883 			    || cmd == ODIOCWDINFO
   1884 #endif
   1885 			   )
   1886 				error = writedisklabel(RAIDLABELDEV(dev),
   1887 				    raidstrategy, rs->sc_dkdev.dk_label,
   1888 				    rs->sc_dkdev.dk_cpulabel);
   1889 		}
   1890 		rs->sc_flags &= ~RAIDF_LABELLING;
   1891 
   1892 		raidunlock(rs);
   1893 
   1894 		if (error)
   1895 			return (error);
   1896 		break;
   1897 	}
   1898 
   1899 	case DIOCWLABEL:
   1900 		if (*(int *) data != 0)
   1901 			rs->sc_flags |= RAIDF_WLABEL;
   1902 		else
   1903 			rs->sc_flags &= ~RAIDF_WLABEL;
   1904 		break;
   1905 
   1906 	case DIOCGDEFLABEL:
   1907 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1908 		break;
   1909 
   1910 #ifdef __HAVE_OLD_DISKLABEL
   1911 	case ODIOCGDEFLABEL:
   1912 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1913 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1914 			return ENOTTY;
   1915 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1916 		break;
   1917 #endif
   1918 
   1919 	case DIOCCACHESYNC:
   1920 		return rf_sync_component_caches(raidPtr);
   1921 
   1922 	case DIOCGSTRATEGY:
   1923 	    {
   1924 		struct disk_strategy *dks = (void *)data;
   1925 
   1926 		s = splbio();
   1927 		strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
   1928 		    sizeof(dks->dks_name));
   1929 		splx(s);
   1930 		dks->dks_paramlen = 0;
   1931 
   1932 		return 0;
   1933 	    }
   1934 
   1935 	case DIOCSSTRATEGY:
   1936 	    {
   1937 		struct disk_strategy *dks = (void *)data;
   1938 		struct bufq_state *new;
   1939 		struct bufq_state *old;
   1940 
   1941 		if (dks->dks_param != NULL) {
   1942 			return EINVAL;
   1943 		}
   1944 		dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
   1945 		error = bufq_alloc(&new, dks->dks_name,
   1946 		    BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
   1947 		if (error) {
   1948 			return error;
   1949 		}
   1950 		s = splbio();
   1951 		old = rs->buf_queue;
   1952 		bufq_move(new, old);
   1953 		rs->buf_queue = new;
   1954 		splx(s);
   1955 		bufq_free(old);
   1956 
   1957 		return 0;
   1958 	    }
   1959 
   1960 	default:
   1961 		retcode = ENOTTY;
   1962 	}
   1963 	return (retcode);
   1964 
   1965 }
   1966 
   1967 
   1968 /* raidinit -- complete the rest of the initialization for the
   1969    RAIDframe device.  */
   1970 
   1971 
   1972 static void
   1973 raidinit(struct raid_softc *rs)
   1974 {
   1975 	cfdata_t cf;
   1976 	int     unit;
   1977 	RF_Raid_t *raidPtr = &rs->sc_r;
   1978 
   1979 	unit = raidPtr->raidid;
   1980 
   1981 
   1982 	/* XXX should check return code first... */
   1983 	rs->sc_flags |= RAIDF_INITED;
   1984 
   1985 	/* XXX doesn't check bounds. */
   1986 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
   1987 
   1988 	/* attach the pseudo device */
   1989 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
   1990 	cf->cf_name = raid_cd.cd_name;
   1991 	cf->cf_atname = raid_cd.cd_name;
   1992 	cf->cf_unit = unit;
   1993 	cf->cf_fstate = FSTATE_STAR;
   1994 
   1995 	rs->sc_dev = config_attach_pseudo(cf);
   1996 
   1997 	if (rs->sc_dev == NULL) {
   1998 		printf("raid%d: config_attach_pseudo failed\n",
   1999 		    raidPtr->raidid);
   2000 		rs->sc_flags &= ~RAIDF_INITED;
   2001 		free(cf, M_RAIDFRAME);
   2002 		return;
   2003 	}
   2004 
   2005 	/* disk_attach actually creates space for the CPU disklabel, among
   2006 	 * other things, so it's critical to call this *BEFORE* we try putzing
   2007 	 * with disklabels. */
   2008 
   2009 	disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
   2010 	disk_attach(&rs->sc_dkdev);
   2011 
   2012 	/* XXX There may be a weird interaction here between this, and
   2013 	 * protectedSectors, as used in RAIDframe.  */
   2014 
   2015 	rs->sc_size = raidPtr->totalSectors;
   2016 
   2017 	rf_set_geometry(rs, raidPtr);
   2018 
   2019 	dkwedge_discover(&rs->sc_dkdev);
   2020 
   2021 }
   2022 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   2023 /* wake up the daemon & tell it to get us a spare table
   2024  * XXX
   2025  * the entries in the queues should be tagged with the raidPtr
   2026  * so that in the extremely rare case that two recons happen at once,
   2027  * we know for which device were requesting a spare table
   2028  * XXX
   2029  *
   2030  * XXX This code is not currently used. GO
   2031  */
   2032 int
   2033 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
   2034 {
   2035 	int     retcode;
   2036 
   2037 	rf_lock_mutex2(rf_sparet_wait_mutex);
   2038 	req->next = rf_sparet_wait_queue;
   2039 	rf_sparet_wait_queue = req;
   2040 	rf_broadcast_cond2(rf_sparet_wait_cv);
   2041 
   2042 	/* mpsleep unlocks the mutex */
   2043 	while (!rf_sparet_resp_queue) {
   2044 		rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
   2045 	}
   2046 	req = rf_sparet_resp_queue;
   2047 	rf_sparet_resp_queue = req->next;
   2048 	rf_unlock_mutex2(rf_sparet_wait_mutex);
   2049 
   2050 	retcode = req->fcol;
   2051 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   2052 					 * alloc'd */
   2053 	return (retcode);
   2054 }
   2055 #endif
   2056 
   2057 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   2058  * bp & passes it down.
   2059  * any calls originating in the kernel must use non-blocking I/O
   2060  * do some extra sanity checking to return "appropriate" error values for
   2061  * certain conditions (to make some standard utilities work)
   2062  *
   2063  * Formerly known as: rf_DoAccessKernel
   2064  */
   2065 void
   2066 raidstart(RF_Raid_t *raidPtr)
   2067 {
   2068 	RF_SectorCount_t num_blocks, pb, sum;
   2069 	RF_RaidAddr_t raid_addr;
   2070 	struct partition *pp;
   2071 	daddr_t blocknum;
   2072 	struct raid_softc *rs;
   2073 	int     do_async;
   2074 	struct buf *bp;
   2075 	int rc;
   2076 
   2077 	rs = raidPtr->softc;
   2078 	/* quick check to see if anything has died recently */
   2079 	rf_lock_mutex2(raidPtr->mutex);
   2080 	if (raidPtr->numNewFailures > 0) {
   2081 		rf_unlock_mutex2(raidPtr->mutex);
   2082 		rf_update_component_labels(raidPtr,
   2083 					   RF_NORMAL_COMPONENT_UPDATE);
   2084 		rf_lock_mutex2(raidPtr->mutex);
   2085 		raidPtr->numNewFailures--;
   2086 	}
   2087 
   2088 	/* Check to see if we're at the limit... */
   2089 	while (raidPtr->openings > 0) {
   2090 		rf_unlock_mutex2(raidPtr->mutex);
   2091 
   2092 		/* get the next item, if any, from the queue */
   2093 		if ((bp = bufq_get(rs->buf_queue)) == NULL) {
   2094 			/* nothing more to do */
   2095 			return;
   2096 		}
   2097 
   2098 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   2099 		 * partition.. Need to make it absolute to the underlying
   2100 		 * device.. */
   2101 
   2102 		blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
   2103 		if (DISKPART(bp->b_dev) != RAW_PART) {
   2104 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   2105 			blocknum += pp->p_offset;
   2106 		}
   2107 
   2108 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   2109 			    (int) blocknum));
   2110 
   2111 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   2112 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   2113 
   2114 		/* *THIS* is where we adjust what block we're going to...
   2115 		 * but DO NOT TOUCH bp->b_blkno!!! */
   2116 		raid_addr = blocknum;
   2117 
   2118 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   2119 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   2120 		sum = raid_addr + num_blocks + pb;
   2121 		if (1 || rf_debugKernelAccess) {
   2122 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   2123 				    (int) raid_addr, (int) sum, (int) num_blocks,
   2124 				    (int) pb, (int) bp->b_resid));
   2125 		}
   2126 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   2127 		    || (sum < num_blocks) || (sum < pb)) {
   2128 			bp->b_error = ENOSPC;
   2129 			bp->b_resid = bp->b_bcount;
   2130 			biodone(bp);
   2131 			rf_lock_mutex2(raidPtr->mutex);
   2132 			continue;
   2133 		}
   2134 		/*
   2135 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   2136 		 */
   2137 
   2138 		if (bp->b_bcount & raidPtr->sectorMask) {
   2139 			bp->b_error = EINVAL;
   2140 			bp->b_resid = bp->b_bcount;
   2141 			biodone(bp);
   2142 			rf_lock_mutex2(raidPtr->mutex);
   2143 			continue;
   2144 
   2145 		}
   2146 		db1_printf(("Calling DoAccess..\n"));
   2147 
   2148 
   2149 		rf_lock_mutex2(raidPtr->mutex);
   2150 		raidPtr->openings--;
   2151 		rf_unlock_mutex2(raidPtr->mutex);
   2152 
   2153 		/*
   2154 		 * Everything is async.
   2155 		 */
   2156 		do_async = 1;
   2157 
   2158 		disk_busy(&rs->sc_dkdev);
   2159 
   2160 		/* XXX we're still at splbio() here... do we *really*
   2161 		   need to be? */
   2162 
   2163 		/* don't ever condition on bp->b_flags & B_WRITE.
   2164 		 * always condition on B_READ instead */
   2165 
   2166 		rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   2167 				 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   2168 				 do_async, raid_addr, num_blocks,
   2169 				 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   2170 
   2171 		if (rc) {
   2172 			bp->b_error = rc;
   2173 			bp->b_resid = bp->b_bcount;
   2174 			biodone(bp);
   2175 			/* continue loop */
   2176 		}
   2177 
   2178 		rf_lock_mutex2(raidPtr->mutex);
   2179 	}
   2180 	rf_unlock_mutex2(raidPtr->mutex);
   2181 }
   2182 
   2183 
   2184 
   2185 
   2186 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   2187 
   2188 int
   2189 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
   2190 {
   2191 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   2192 	struct buf *bp;
   2193 
   2194 	req->queue = queue;
   2195 	bp = req->bp;
   2196 
   2197 	switch (req->type) {
   2198 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   2199 		/* XXX need to do something extra here.. */
   2200 		/* I'm leaving this in, as I've never actually seen it used,
   2201 		 * and I'd like folks to report it... GO */
   2202 		printf(("WAKEUP CALLED\n"));
   2203 		queue->numOutstanding++;
   2204 
   2205 		bp->b_flags = 0;
   2206 		bp->b_private = req;
   2207 
   2208 		KernelWakeupFunc(bp);
   2209 		break;
   2210 
   2211 	case RF_IO_TYPE_READ:
   2212 	case RF_IO_TYPE_WRITE:
   2213 #if RF_ACC_TRACE > 0
   2214 		if (req->tracerec) {
   2215 			RF_ETIMER_START(req->tracerec->timer);
   2216 		}
   2217 #endif
   2218 		InitBP(bp, queue->rf_cinfo->ci_vp,
   2219 		    op, queue->rf_cinfo->ci_dev,
   2220 		    req->sectorOffset, req->numSector,
   2221 		    req->buf, KernelWakeupFunc, (void *) req,
   2222 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   2223 
   2224 		if (rf_debugKernelAccess) {
   2225 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   2226 				(long) bp->b_blkno));
   2227 		}
   2228 		queue->numOutstanding++;
   2229 		queue->last_deq_sector = req->sectorOffset;
   2230 		/* acc wouldn't have been let in if there were any pending
   2231 		 * reqs at any other priority */
   2232 		queue->curPriority = req->priority;
   2233 
   2234 		db1_printf(("Going for %c to unit %d col %d\n",
   2235 			    req->type, queue->raidPtr->raidid,
   2236 			    queue->col));
   2237 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   2238 			(int) req->sectorOffset, (int) req->numSector,
   2239 			(int) (req->numSector <<
   2240 			    queue->raidPtr->logBytesPerSector),
   2241 			(int) queue->raidPtr->logBytesPerSector));
   2242 
   2243 		/*
   2244 		 * XXX: drop lock here since this can block at
   2245 		 * least with backing SCSI devices.  Retake it
   2246 		 * to minimize fuss with calling interfaces.
   2247 		 */
   2248 
   2249 		RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
   2250 		bdev_strategy(bp);
   2251 		RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
   2252 		break;
   2253 
   2254 	default:
   2255 		panic("bad req->type in rf_DispatchKernelIO");
   2256 	}
   2257 	db1_printf(("Exiting from DispatchKernelIO\n"));
   2258 
   2259 	return (0);
   2260 }
   2261 /* this is the callback function associated with a I/O invoked from
   2262    kernel code.
   2263  */
   2264 static void
   2265 KernelWakeupFunc(struct buf *bp)
   2266 {
   2267 	RF_DiskQueueData_t *req = NULL;
   2268 	RF_DiskQueue_t *queue;
   2269 
   2270 	db1_printf(("recovering the request queue:\n"));
   2271 
   2272 	req = bp->b_private;
   2273 
   2274 	queue = (RF_DiskQueue_t *) req->queue;
   2275 
   2276 	rf_lock_mutex2(queue->raidPtr->iodone_lock);
   2277 
   2278 #if RF_ACC_TRACE > 0
   2279 	if (req->tracerec) {
   2280 		RF_ETIMER_STOP(req->tracerec->timer);
   2281 		RF_ETIMER_EVAL(req->tracerec->timer);
   2282 		rf_lock_mutex2(rf_tracing_mutex);
   2283 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2284 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2285 		req->tracerec->num_phys_ios++;
   2286 		rf_unlock_mutex2(rf_tracing_mutex);
   2287 	}
   2288 #endif
   2289 
   2290 	/* XXX Ok, let's get aggressive... If b_error is set, let's go
   2291 	 * ballistic, and mark the component as hosed... */
   2292 
   2293 	if (bp->b_error != 0) {
   2294 		/* Mark the disk as dead */
   2295 		/* but only mark it once... */
   2296 		/* and only if it wouldn't leave this RAID set
   2297 		   completely broken */
   2298 		if (((queue->raidPtr->Disks[queue->col].status ==
   2299 		      rf_ds_optimal) ||
   2300 		     (queue->raidPtr->Disks[queue->col].status ==
   2301 		      rf_ds_used_spare)) &&
   2302 		     (queue->raidPtr->numFailures <
   2303 		      queue->raidPtr->Layout.map->faultsTolerated)) {
   2304 			printf("raid%d: IO Error (%d). Marking %s as failed.\n",
   2305 			       queue->raidPtr->raidid,
   2306 			       bp->b_error,
   2307 			       queue->raidPtr->Disks[queue->col].devname);
   2308 			queue->raidPtr->Disks[queue->col].status =
   2309 			    rf_ds_failed;
   2310 			queue->raidPtr->status = rf_rs_degraded;
   2311 			queue->raidPtr->numFailures++;
   2312 			queue->raidPtr->numNewFailures++;
   2313 		} else {	/* Disk is already dead... */
   2314 			/* printf("Disk already marked as dead!\n"); */
   2315 		}
   2316 
   2317 	}
   2318 
   2319 	/* Fill in the error value */
   2320 	req->error = bp->b_error;
   2321 
   2322 	/* Drop this one on the "finished" queue... */
   2323 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   2324 
   2325 	/* Let the raidio thread know there is work to be done. */
   2326 	rf_signal_cond2(queue->raidPtr->iodone_cv);
   2327 
   2328 	rf_unlock_mutex2(queue->raidPtr->iodone_lock);
   2329 }
   2330 
   2331 
   2332 /*
   2333  * initialize a buf structure for doing an I/O in the kernel.
   2334  */
   2335 static void
   2336 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
   2337        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
   2338        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
   2339        struct proc *b_proc)
   2340 {
   2341 	/* bp->b_flags       = B_PHYS | rw_flag; */
   2342 	bp->b_flags = rw_flag;	/* XXX need B_PHYS here too??? */
   2343 	bp->b_oflags = 0;
   2344 	bp->b_cflags = 0;
   2345 	bp->b_bcount = numSect << logBytesPerSector;
   2346 	bp->b_bufsize = bp->b_bcount;
   2347 	bp->b_error = 0;
   2348 	bp->b_dev = dev;
   2349 	bp->b_data = bf;
   2350 	bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
   2351 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   2352 	if (bp->b_bcount == 0) {
   2353 		panic("bp->b_bcount is zero in InitBP!!");
   2354 	}
   2355 	bp->b_proc = b_proc;
   2356 	bp->b_iodone = cbFunc;
   2357 	bp->b_private = cbArg;
   2358 }
   2359 
   2360 static void
   2361 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
   2362 		    struct disklabel *lp)
   2363 {
   2364 	memset(lp, 0, sizeof(*lp));
   2365 
   2366 	/* fabricate a label... */
   2367 	if (raidPtr->totalSectors > UINT32_MAX)
   2368 		lp->d_secperunit = UINT32_MAX;
   2369 	else
   2370 		lp->d_secperunit = raidPtr->totalSectors;
   2371 	lp->d_secsize = raidPtr->bytesPerSector;
   2372 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2373 	lp->d_ntracks = 4 * raidPtr->numCol;
   2374 	lp->d_ncylinders = raidPtr->totalSectors /
   2375 		(lp->d_nsectors * lp->d_ntracks);
   2376 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2377 
   2378 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2379 	lp->d_type = DKTYPE_RAID;
   2380 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2381 	lp->d_rpm = 3600;
   2382 	lp->d_interleave = 1;
   2383 	lp->d_flags = 0;
   2384 
   2385 	lp->d_partitions[RAW_PART].p_offset = 0;
   2386 	lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
   2387 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2388 	lp->d_npartitions = RAW_PART + 1;
   2389 
   2390 	lp->d_magic = DISKMAGIC;
   2391 	lp->d_magic2 = DISKMAGIC;
   2392 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2393 
   2394 }
   2395 /*
   2396  * Read the disklabel from the raid device.  If one is not present, fake one
   2397  * up.
   2398  */
   2399 static void
   2400 raidgetdisklabel(dev_t dev)
   2401 {
   2402 	int     unit = raidunit(dev);
   2403 	struct raid_softc *rs;
   2404 	const char   *errstring;
   2405 	struct disklabel *lp;
   2406 	struct cpu_disklabel *clp;
   2407 	RF_Raid_t *raidPtr;
   2408 
   2409 	if ((rs = raidget(unit, false)) == NULL)
   2410 		return;
   2411 
   2412 	lp = rs->sc_dkdev.dk_label;
   2413 	clp = rs->sc_dkdev.dk_cpulabel;
   2414 
   2415 	db1_printf(("Getting the disklabel...\n"));
   2416 
   2417 	memset(clp, 0, sizeof(*clp));
   2418 
   2419 	raidPtr = &rs->sc_r;
   2420 
   2421 	raidgetdefaultlabel(raidPtr, rs, lp);
   2422 
   2423 	/*
   2424 	 * Call the generic disklabel extraction routine.
   2425 	 */
   2426 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2427 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2428 	if (errstring)
   2429 		raidmakedisklabel(rs);
   2430 	else {
   2431 		int     i;
   2432 		struct partition *pp;
   2433 
   2434 		/*
   2435 		 * Sanity check whether the found disklabel is valid.
   2436 		 *
   2437 		 * This is necessary since total size of the raid device
   2438 		 * may vary when an interleave is changed even though exactly
   2439 		 * same components are used, and old disklabel may used
   2440 		 * if that is found.
   2441 		 */
   2442 		if (lp->d_secperunit < UINT32_MAX ?
   2443 		    lp->d_secperunit != rs->sc_size :
   2444 		    lp->d_secperunit > rs->sc_size)
   2445 			printf("raid%d: WARNING: %s: "
   2446 			    "total sector size in disklabel (%ju) != "
   2447 			    "the size of raid (%ju)\n", unit, rs->sc_xname,
   2448 			    (uintmax_t)lp->d_secperunit,
   2449 			    (uintmax_t)rs->sc_size);
   2450 		for (i = 0; i < lp->d_npartitions; i++) {
   2451 			pp = &lp->d_partitions[i];
   2452 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2453 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2454 				       "exceeds the size of raid (%ju)\n",
   2455 				       unit, rs->sc_xname, 'a' + i,
   2456 				       (uintmax_t)rs->sc_size);
   2457 		}
   2458 	}
   2459 
   2460 }
   2461 /*
   2462  * Take care of things one might want to take care of in the event
   2463  * that a disklabel isn't present.
   2464  */
   2465 static void
   2466 raidmakedisklabel(struct raid_softc *rs)
   2467 {
   2468 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2469 	db1_printf(("Making a label..\n"));
   2470 
   2471 	/*
   2472 	 * For historical reasons, if there's no disklabel present
   2473 	 * the raw partition must be marked FS_BSDFFS.
   2474 	 */
   2475 
   2476 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2477 
   2478 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2479 
   2480 	lp->d_checksum = dkcksum(lp);
   2481 }
   2482 /*
   2483  * Wait interruptibly for an exclusive lock.
   2484  *
   2485  * XXX
   2486  * Several drivers do this; it should be abstracted and made MP-safe.
   2487  * (Hmm... where have we seen this warning before :->  GO )
   2488  */
   2489 static int
   2490 raidlock(struct raid_softc *rs)
   2491 {
   2492 	int     error;
   2493 
   2494 	mutex_enter(&rs->sc_mutex);
   2495 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2496 		rs->sc_flags |= RAIDF_WANTED;
   2497 		error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
   2498 		if (error != 0)
   2499 			return (error);
   2500 	}
   2501 	rs->sc_flags |= RAIDF_LOCKED;
   2502 	mutex_exit(&rs->sc_mutex);
   2503 	return (0);
   2504 }
   2505 /*
   2506  * Unlock and wake up any waiters.
   2507  */
   2508 static void
   2509 raidunlock(struct raid_softc *rs)
   2510 {
   2511 
   2512 	mutex_enter(&rs->sc_mutex);
   2513 	rs->sc_flags &= ~RAIDF_LOCKED;
   2514 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2515 		rs->sc_flags &= ~RAIDF_WANTED;
   2516 		cv_broadcast(&rs->sc_cv);
   2517 	}
   2518 	mutex_exit(&rs->sc_mutex);
   2519 }
   2520 
   2521 
   2522 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2523 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2524 #define RF_PARITY_MAP_SIZE   RF_PARITYMAP_NBYTE
   2525 
   2526 static daddr_t
   2527 rf_component_info_offset(void)
   2528 {
   2529 
   2530 	return RF_COMPONENT_INFO_OFFSET;
   2531 }
   2532 
   2533 static daddr_t
   2534 rf_component_info_size(unsigned secsize)
   2535 {
   2536 	daddr_t info_size;
   2537 
   2538 	KASSERT(secsize);
   2539 	if (secsize > RF_COMPONENT_INFO_SIZE)
   2540 		info_size = secsize;
   2541 	else
   2542 		info_size = RF_COMPONENT_INFO_SIZE;
   2543 
   2544 	return info_size;
   2545 }
   2546 
   2547 static daddr_t
   2548 rf_parity_map_offset(RF_Raid_t *raidPtr)
   2549 {
   2550 	daddr_t map_offset;
   2551 
   2552 	KASSERT(raidPtr->bytesPerSector);
   2553 	if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
   2554 		map_offset = raidPtr->bytesPerSector;
   2555 	else
   2556 		map_offset = RF_COMPONENT_INFO_SIZE;
   2557 	map_offset += rf_component_info_offset();
   2558 
   2559 	return map_offset;
   2560 }
   2561 
   2562 static daddr_t
   2563 rf_parity_map_size(RF_Raid_t *raidPtr)
   2564 {
   2565 	daddr_t map_size;
   2566 
   2567 	if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
   2568 		map_size = raidPtr->bytesPerSector;
   2569 	else
   2570 		map_size = RF_PARITY_MAP_SIZE;
   2571 
   2572 	return map_size;
   2573 }
   2574 
   2575 int
   2576 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2577 {
   2578 	RF_ComponentLabel_t *clabel;
   2579 
   2580 	clabel = raidget_component_label(raidPtr, col);
   2581 	clabel->clean = RF_RAID_CLEAN;
   2582 	raidflush_component_label(raidPtr, col);
   2583 	return(0);
   2584 }
   2585 
   2586 
   2587 int
   2588 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2589 {
   2590 	RF_ComponentLabel_t *clabel;
   2591 
   2592 	clabel = raidget_component_label(raidPtr, col);
   2593 	clabel->clean = RF_RAID_DIRTY;
   2594 	raidflush_component_label(raidPtr, col);
   2595 	return(0);
   2596 }
   2597 
   2598 int
   2599 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2600 {
   2601 	KASSERT(raidPtr->bytesPerSector);
   2602 	return raidread_component_label(raidPtr->bytesPerSector,
   2603 	    raidPtr->Disks[col].dev,
   2604 	    raidPtr->raid_cinfo[col].ci_vp,
   2605 	    &raidPtr->raid_cinfo[col].ci_label);
   2606 }
   2607 
   2608 RF_ComponentLabel_t *
   2609 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2610 {
   2611 	return &raidPtr->raid_cinfo[col].ci_label;
   2612 }
   2613 
   2614 int
   2615 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2616 {
   2617 	RF_ComponentLabel_t *label;
   2618 
   2619 	label = &raidPtr->raid_cinfo[col].ci_label;
   2620 	label->mod_counter = raidPtr->mod_counter;
   2621 #ifndef RF_NO_PARITY_MAP
   2622 	label->parity_map_modcount = label->mod_counter;
   2623 #endif
   2624 	return raidwrite_component_label(raidPtr->bytesPerSector,
   2625 	    raidPtr->Disks[col].dev,
   2626 	    raidPtr->raid_cinfo[col].ci_vp, label);
   2627 }
   2628 
   2629 
   2630 static int
   2631 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
   2632     RF_ComponentLabel_t *clabel)
   2633 {
   2634 	return raidread_component_area(dev, b_vp, clabel,
   2635 	    sizeof(RF_ComponentLabel_t),
   2636 	    rf_component_info_offset(),
   2637 	    rf_component_info_size(secsize));
   2638 }
   2639 
   2640 /* ARGSUSED */
   2641 static int
   2642 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
   2643     size_t msize, daddr_t offset, daddr_t dsize)
   2644 {
   2645 	struct buf *bp;
   2646 	const struct bdevsw *bdev;
   2647 	int error;
   2648 
   2649 	/* XXX should probably ensure that we don't try to do this if
   2650 	   someone has changed rf_protected_sectors. */
   2651 
   2652 	if (b_vp == NULL) {
   2653 		/* For whatever reason, this component is not valid.
   2654 		   Don't try to read a component label from it. */
   2655 		return(EINVAL);
   2656 	}
   2657 
   2658 	/* get a block of the appropriate size... */
   2659 	bp = geteblk((int)dsize);
   2660 	bp->b_dev = dev;
   2661 
   2662 	/* get our ducks in a row for the read */
   2663 	bp->b_blkno = offset / DEV_BSIZE;
   2664 	bp->b_bcount = dsize;
   2665 	bp->b_flags |= B_READ;
   2666  	bp->b_resid = dsize;
   2667 
   2668 	bdev = bdevsw_lookup(bp->b_dev);
   2669 	if (bdev == NULL)
   2670 		return (ENXIO);
   2671 	(*bdev->d_strategy)(bp);
   2672 
   2673 	error = biowait(bp);
   2674 
   2675 	if (!error) {
   2676 		memcpy(data, bp->b_data, msize);
   2677 	}
   2678 
   2679 	brelse(bp, 0);
   2680 	return(error);
   2681 }
   2682 
   2683 
   2684 static int
   2685 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
   2686     RF_ComponentLabel_t *clabel)
   2687 {
   2688 	return raidwrite_component_area(dev, b_vp, clabel,
   2689 	    sizeof(RF_ComponentLabel_t),
   2690 	    rf_component_info_offset(),
   2691 	    rf_component_info_size(secsize), 0);
   2692 }
   2693 
   2694 /* ARGSUSED */
   2695 static int
   2696 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
   2697     size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
   2698 {
   2699 	struct buf *bp;
   2700 	const struct bdevsw *bdev;
   2701 	int error;
   2702 
   2703 	/* get a block of the appropriate size... */
   2704 	bp = geteblk((int)dsize);
   2705 	bp->b_dev = dev;
   2706 
   2707 	/* get our ducks in a row for the write */
   2708 	bp->b_blkno = offset / DEV_BSIZE;
   2709 	bp->b_bcount = dsize;
   2710 	bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
   2711  	bp->b_resid = dsize;
   2712 
   2713 	memset(bp->b_data, 0, dsize);
   2714 	memcpy(bp->b_data, data, msize);
   2715 
   2716 	bdev = bdevsw_lookup(bp->b_dev);
   2717 	if (bdev == NULL)
   2718 		return (ENXIO);
   2719 	(*bdev->d_strategy)(bp);
   2720 	if (asyncp)
   2721 		return 0;
   2722 	error = biowait(bp);
   2723 	brelse(bp, 0);
   2724 	if (error) {
   2725 #if 1
   2726 		printf("Failed to write RAID component info!\n");
   2727 #endif
   2728 	}
   2729 
   2730 	return(error);
   2731 }
   2732 
   2733 void
   2734 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
   2735 {
   2736 	int c;
   2737 
   2738 	for (c = 0; c < raidPtr->numCol; c++) {
   2739 		/* Skip dead disks. */
   2740 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
   2741 			continue;
   2742 		/* XXXjld: what if an error occurs here? */
   2743 		raidwrite_component_area(raidPtr->Disks[c].dev,
   2744 		    raidPtr->raid_cinfo[c].ci_vp, map,
   2745 		    RF_PARITYMAP_NBYTE,
   2746 		    rf_parity_map_offset(raidPtr),
   2747 		    rf_parity_map_size(raidPtr), 0);
   2748 	}
   2749 }
   2750 
   2751 void
   2752 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
   2753 {
   2754 	struct rf_paritymap_ondisk tmp;
   2755 	int c,first;
   2756 
   2757 	first=1;
   2758 	for (c = 0; c < raidPtr->numCol; c++) {
   2759 		/* Skip dead disks. */
   2760 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
   2761 			continue;
   2762 		raidread_component_area(raidPtr->Disks[c].dev,
   2763 		    raidPtr->raid_cinfo[c].ci_vp, &tmp,
   2764 		    RF_PARITYMAP_NBYTE,
   2765 		    rf_parity_map_offset(raidPtr),
   2766 		    rf_parity_map_size(raidPtr));
   2767 		if (first) {
   2768 			memcpy(map, &tmp, sizeof(*map));
   2769 			first = 0;
   2770 		} else {
   2771 			rf_paritymap_merge(map, &tmp);
   2772 		}
   2773 	}
   2774 }
   2775 
   2776 void
   2777 rf_markalldirty(RF_Raid_t *raidPtr)
   2778 {
   2779 	RF_ComponentLabel_t *clabel;
   2780 	int sparecol;
   2781 	int c;
   2782 	int j;
   2783 	int scol = -1;
   2784 
   2785 	raidPtr->mod_counter++;
   2786 	for (c = 0; c < raidPtr->numCol; c++) {
   2787 		/* we don't want to touch (at all) a disk that has
   2788 		   failed */
   2789 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   2790 			clabel = raidget_component_label(raidPtr, c);
   2791 			if (clabel->status == rf_ds_spared) {
   2792 				/* XXX do something special...
   2793 				   but whatever you do, don't
   2794 				   try to access it!! */
   2795 			} else {
   2796 				raidmarkdirty(raidPtr, c);
   2797 			}
   2798 		}
   2799 	}
   2800 
   2801 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2802 		sparecol = raidPtr->numCol + c;
   2803 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2804 			/*
   2805 
   2806 			   we claim this disk is "optimal" if it's
   2807 			   rf_ds_used_spare, as that means it should be
   2808 			   directly substitutable for the disk it replaced.
   2809 			   We note that too...
   2810 
   2811 			 */
   2812 
   2813 			for(j=0;j<raidPtr->numCol;j++) {
   2814 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2815 					scol = j;
   2816 					break;
   2817 				}
   2818 			}
   2819 
   2820 			clabel = raidget_component_label(raidPtr, sparecol);
   2821 			/* make sure status is noted */
   2822 
   2823 			raid_init_component_label(raidPtr, clabel);
   2824 
   2825 			clabel->row = 0;
   2826 			clabel->column = scol;
   2827 			/* Note: we *don't* change status from rf_ds_used_spare
   2828 			   to rf_ds_optimal */
   2829 			/* clabel.status = rf_ds_optimal; */
   2830 
   2831 			raidmarkdirty(raidPtr, sparecol);
   2832 		}
   2833 	}
   2834 }
   2835 
   2836 
   2837 void
   2838 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
   2839 {
   2840 	RF_ComponentLabel_t *clabel;
   2841 	int sparecol;
   2842 	int c;
   2843 	int j;
   2844 	int scol;
   2845 
   2846 	scol = -1;
   2847 
   2848 	/* XXX should do extra checks to make sure things really are clean,
   2849 	   rather than blindly setting the clean bit... */
   2850 
   2851 	raidPtr->mod_counter++;
   2852 
   2853 	for (c = 0; c < raidPtr->numCol; c++) {
   2854 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   2855 			clabel = raidget_component_label(raidPtr, c);
   2856 			/* make sure status is noted */
   2857 			clabel->status = rf_ds_optimal;
   2858 
   2859 			/* note what unit we are configured as */
   2860 			clabel->last_unit = raidPtr->raidid;
   2861 
   2862 			raidflush_component_label(raidPtr, c);
   2863 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2864 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2865 					raidmarkclean(raidPtr, c);
   2866 				}
   2867 			}
   2868 		}
   2869 		/* else we don't touch it.. */
   2870 	}
   2871 
   2872 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2873 		sparecol = raidPtr->numCol + c;
   2874 		/* Need to ensure that the reconstruct actually completed! */
   2875 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2876 			/*
   2877 
   2878 			   we claim this disk is "optimal" if it's
   2879 			   rf_ds_used_spare, as that means it should be
   2880 			   directly substitutable for the disk it replaced.
   2881 			   We note that too...
   2882 
   2883 			 */
   2884 
   2885 			for(j=0;j<raidPtr->numCol;j++) {
   2886 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2887 					scol = j;
   2888 					break;
   2889 				}
   2890 			}
   2891 
   2892 			/* XXX shouldn't *really* need this... */
   2893 			clabel = raidget_component_label(raidPtr, sparecol);
   2894 			/* make sure status is noted */
   2895 
   2896 			raid_init_component_label(raidPtr, clabel);
   2897 
   2898 			clabel->column = scol;
   2899 			clabel->status = rf_ds_optimal;
   2900 			clabel->last_unit = raidPtr->raidid;
   2901 
   2902 			raidflush_component_label(raidPtr, sparecol);
   2903 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2904 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2905 					raidmarkclean(raidPtr, sparecol);
   2906 				}
   2907 			}
   2908 		}
   2909 	}
   2910 }
   2911 
   2912 void
   2913 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
   2914 {
   2915 
   2916 	if (vp != NULL) {
   2917 		if (auto_configured == 1) {
   2918 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2919 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2920 			vput(vp);
   2921 
   2922 		} else {
   2923 			(void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
   2924 		}
   2925 	}
   2926 }
   2927 
   2928 
   2929 void
   2930 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
   2931 {
   2932 	int r,c;
   2933 	struct vnode *vp;
   2934 	int acd;
   2935 
   2936 
   2937 	/* We take this opportunity to close the vnodes like we should.. */
   2938 
   2939 	for (c = 0; c < raidPtr->numCol; c++) {
   2940 		vp = raidPtr->raid_cinfo[c].ci_vp;
   2941 		acd = raidPtr->Disks[c].auto_configured;
   2942 		rf_close_component(raidPtr, vp, acd);
   2943 		raidPtr->raid_cinfo[c].ci_vp = NULL;
   2944 		raidPtr->Disks[c].auto_configured = 0;
   2945 	}
   2946 
   2947 	for (r = 0; r < raidPtr->numSpare; r++) {
   2948 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
   2949 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
   2950 		rf_close_component(raidPtr, vp, acd);
   2951 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
   2952 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
   2953 	}
   2954 }
   2955 
   2956 
   2957 void
   2958 rf_ReconThread(struct rf_recon_req *req)
   2959 {
   2960 	int     s;
   2961 	RF_Raid_t *raidPtr;
   2962 
   2963 	s = splbio();
   2964 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2965 	raidPtr->recon_in_progress = 1;
   2966 
   2967 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
   2968 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2969 
   2970 	RF_Free(req, sizeof(*req));
   2971 
   2972 	raidPtr->recon_in_progress = 0;
   2973 	splx(s);
   2974 
   2975 	/* That's all... */
   2976 	kthread_exit(0);	/* does not return */
   2977 }
   2978 
   2979 void
   2980 rf_RewriteParityThread(RF_Raid_t *raidPtr)
   2981 {
   2982 	int retcode;
   2983 	int s;
   2984 
   2985 	raidPtr->parity_rewrite_stripes_done = 0;
   2986 	raidPtr->parity_rewrite_in_progress = 1;
   2987 	s = splbio();
   2988 	retcode = rf_RewriteParity(raidPtr);
   2989 	splx(s);
   2990 	if (retcode) {
   2991 		printf("raid%d: Error re-writing parity (%d)!\n",
   2992 		    raidPtr->raidid, retcode);
   2993 	} else {
   2994 		/* set the clean bit!  If we shutdown correctly,
   2995 		   the clean bit on each component label will get
   2996 		   set */
   2997 		raidPtr->parity_good = RF_RAID_CLEAN;
   2998 	}
   2999 	raidPtr->parity_rewrite_in_progress = 0;
   3000 
   3001 	/* Anyone waiting for us to stop?  If so, inform them... */
   3002 	if (raidPtr->waitShutdown) {
   3003 		wakeup(&raidPtr->parity_rewrite_in_progress);
   3004 	}
   3005 
   3006 	/* That's all... */
   3007 	kthread_exit(0);	/* does not return */
   3008 }
   3009 
   3010 
   3011 void
   3012 rf_CopybackThread(RF_Raid_t *raidPtr)
   3013 {
   3014 	int s;
   3015 
   3016 	raidPtr->copyback_in_progress = 1;
   3017 	s = splbio();
   3018 	rf_CopybackReconstructedData(raidPtr);
   3019 	splx(s);
   3020 	raidPtr->copyback_in_progress = 0;
   3021 
   3022 	/* That's all... */
   3023 	kthread_exit(0);	/* does not return */
   3024 }
   3025 
   3026 
   3027 void
   3028 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
   3029 {
   3030 	int s;
   3031 	RF_Raid_t *raidPtr;
   3032 
   3033 	s = splbio();
   3034 	raidPtr = req->raidPtr;
   3035 	raidPtr->recon_in_progress = 1;
   3036 	rf_ReconstructInPlace(raidPtr, req->col);
   3037 	RF_Free(req, sizeof(*req));
   3038 	raidPtr->recon_in_progress = 0;
   3039 	splx(s);
   3040 
   3041 	/* That's all... */
   3042 	kthread_exit(0);	/* does not return */
   3043 }
   3044 
   3045 static RF_AutoConfig_t *
   3046 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
   3047     const char *cname, RF_SectorCount_t size, uint64_t numsecs,
   3048     unsigned secsize)
   3049 {
   3050 	int good_one = 0;
   3051 	RF_ComponentLabel_t *clabel;
   3052 	RF_AutoConfig_t *ac;
   3053 
   3054 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
   3055 	if (clabel == NULL) {
   3056 oomem:
   3057 		    while(ac_list) {
   3058 			    ac = ac_list;
   3059 			    if (ac->clabel)
   3060 				    free(ac->clabel, M_RAIDFRAME);
   3061 			    ac_list = ac_list->next;
   3062 			    free(ac, M_RAIDFRAME);
   3063 		    }
   3064 		    printf("RAID auto config: out of memory!\n");
   3065 		    return NULL; /* XXX probably should panic? */
   3066 	}
   3067 
   3068 	if (!raidread_component_label(secsize, dev, vp, clabel)) {
   3069 		/* Got the label.  Does it look reasonable? */
   3070 		if (rf_reasonable_label(clabel, numsecs) &&
   3071 		    (rf_component_label_partitionsize(clabel) <= size)) {
   3072 #ifdef DEBUG
   3073 			printf("Component on: %s: %llu\n",
   3074 				cname, (unsigned long long)size);
   3075 			rf_print_component_label(clabel);
   3076 #endif
   3077 			/* if it's reasonable, add it, else ignore it. */
   3078 			ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
   3079 				M_NOWAIT);
   3080 			if (ac == NULL) {
   3081 				free(clabel, M_RAIDFRAME);
   3082 				goto oomem;
   3083 			}
   3084 			strlcpy(ac->devname, cname, sizeof(ac->devname));
   3085 			ac->dev = dev;
   3086 			ac->vp = vp;
   3087 			ac->clabel = clabel;
   3088 			ac->next = ac_list;
   3089 			ac_list = ac;
   3090 			good_one = 1;
   3091 		}
   3092 	}
   3093 	if (!good_one) {
   3094 		/* cleanup */
   3095 		free(clabel, M_RAIDFRAME);
   3096 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   3097 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3098 		vput(vp);
   3099 	}
   3100 	return ac_list;
   3101 }
   3102 
   3103 RF_AutoConfig_t *
   3104 rf_find_raid_components(void)
   3105 {
   3106 	struct vnode *vp;
   3107 	struct disklabel label;
   3108 	device_t dv;
   3109 	deviter_t di;
   3110 	dev_t dev;
   3111 	int bmajor, bminor, wedge, rf_part_found;
   3112 	int error;
   3113 	int i;
   3114 	RF_AutoConfig_t *ac_list;
   3115 	uint64_t numsecs;
   3116 	unsigned secsize;
   3117 
   3118 	/* initialize the AutoConfig list */
   3119 	ac_list = NULL;
   3120 
   3121 	/* we begin by trolling through *all* the devices on the system */
   3122 
   3123 	for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
   3124 	     dv = deviter_next(&di)) {
   3125 
   3126 		/* we are only interested in disks... */
   3127 		if (device_class(dv) != DV_DISK)
   3128 			continue;
   3129 
   3130 		/* we don't care about floppies... */
   3131 		if (device_is_a(dv, "fd")) {
   3132 			continue;
   3133 		}
   3134 
   3135 		/* we don't care about CD's... */
   3136 		if (device_is_a(dv, "cd")) {
   3137 			continue;
   3138 		}
   3139 
   3140 		/* we don't care about md's... */
   3141 		if (device_is_a(dv, "md")) {
   3142 			continue;
   3143 		}
   3144 
   3145 		/* hdfd is the Atari/Hades floppy driver */
   3146 		if (device_is_a(dv, "hdfd")) {
   3147 			continue;
   3148 		}
   3149 
   3150 		/* fdisa is the Atari/Milan floppy driver */
   3151 		if (device_is_a(dv, "fdisa")) {
   3152 			continue;
   3153 		}
   3154 
   3155 		/* need to find the device_name_to_block_device_major stuff */
   3156 		bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
   3157 
   3158 		rf_part_found = 0; /*No raid partition as yet*/
   3159 
   3160 		/* get a vnode for the raw partition of this disk */
   3161 
   3162 		wedge = device_is_a(dv, "dk");
   3163 		bminor = minor(device_unit(dv));
   3164 		dev = wedge ? makedev(bmajor, bminor) :
   3165 		    MAKEDISKDEV(bmajor, bminor, RAW_PART);
   3166 		if (bdevvp(dev, &vp))
   3167 			panic("RAID can't alloc vnode");
   3168 
   3169 		error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
   3170 
   3171 		if (error) {
   3172 			/* "Who cares."  Continue looking
   3173 			   for something that exists*/
   3174 			vput(vp);
   3175 			continue;
   3176 		}
   3177 
   3178 		error = getdisksize(vp, &numsecs, &secsize);
   3179 		if (error) {
   3180 			vput(vp);
   3181 			continue;
   3182 		}
   3183 		if (wedge) {
   3184 			struct dkwedge_info dkw;
   3185 			error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
   3186 			    NOCRED);
   3187 			if (error) {
   3188 				printf("RAIDframe: can't get wedge info for "
   3189 				    "dev %s (%d)\n", device_xname(dv), error);
   3190 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   3191 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3192 				vput(vp);
   3193 				continue;
   3194 			}
   3195 
   3196 			if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
   3197 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   3198 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3199 				vput(vp);
   3200 				continue;
   3201 			}
   3202 
   3203 			ac_list = rf_get_component(ac_list, dev, vp,
   3204 			    device_xname(dv), dkw.dkw_size, numsecs, secsize);
   3205 			rf_part_found = 1; /*There is a raid component on this disk*/
   3206 			continue;
   3207 		}
   3208 
   3209 		/* Ok, the disk exists.  Go get the disklabel. */
   3210 		error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
   3211 		if (error) {
   3212 			/*
   3213 			 * XXX can't happen - open() would
   3214 			 * have errored out (or faked up one)
   3215 			 */
   3216 			if (error != ENOTTY)
   3217 				printf("RAIDframe: can't get label for dev "
   3218 				    "%s (%d)\n", device_xname(dv), error);
   3219 		}
   3220 
   3221 		/* don't need this any more.  We'll allocate it again
   3222 		   a little later if we really do... */
   3223 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   3224 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3225 		vput(vp);
   3226 
   3227 		if (error)
   3228 			continue;
   3229 
   3230 		rf_part_found = 0; /*No raid partitions yet*/
   3231 		for (i = 0; i < label.d_npartitions; i++) {
   3232 			char cname[sizeof(ac_list->devname)];
   3233 
   3234 			/* We only support partitions marked as RAID */
   3235 			if (label.d_partitions[i].p_fstype != FS_RAID)
   3236 				continue;
   3237 
   3238 			dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
   3239 			if (bdevvp(dev, &vp))
   3240 				panic("RAID can't alloc vnode");
   3241 
   3242 			error = VOP_OPEN(vp, FREAD, NOCRED);
   3243 			if (error) {
   3244 				/* Whatever... */
   3245 				vput(vp);
   3246 				continue;
   3247 			}
   3248 			snprintf(cname, sizeof(cname), "%s%c",
   3249 			    device_xname(dv), 'a' + i);
   3250 			ac_list = rf_get_component(ac_list, dev, vp, cname,
   3251 				label.d_partitions[i].p_size, numsecs, secsize);
   3252 				rf_part_found = 1; /*There is at least one raid partition on this disk*/
   3253 		}
   3254 
   3255 		/*
   3256 		 *If there is no raid component on this disk, either in a
   3257 		 *disklabel or inside a wedge, check the raw partition as well,
   3258 		 *as it is possible to configure raid components on raw disk
   3259 		 *devices.
   3260 		 */
   3261 
   3262 		if (!rf_part_found) {
   3263 			char cname[sizeof(ac_list->devname)];
   3264 
   3265 			dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
   3266 			if (bdevvp(dev, &vp))
   3267 				panic("RAID can't alloc vnode");
   3268 
   3269 			error = VOP_OPEN(vp, FREAD, NOCRED);
   3270 			if (error) {
   3271 				/* Whatever... */
   3272 				vput(vp);
   3273 				continue;
   3274 			}
   3275 			snprintf(cname, sizeof(cname), "%s%c",
   3276 			    device_xname(dv), 'a' + RAW_PART);
   3277 			ac_list = rf_get_component(ac_list, dev, vp, cname,
   3278 				label.d_partitions[RAW_PART].p_size, numsecs, secsize);
   3279 		}
   3280 	}
   3281 	deviter_release(&di);
   3282 	return ac_list;
   3283 }
   3284 
   3285 
   3286 int
   3287 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
   3288 {
   3289 
   3290 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   3291 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   3292 	    ((clabel->clean == RF_RAID_CLEAN) ||
   3293 	     (clabel->clean == RF_RAID_DIRTY)) &&
   3294 	    clabel->row >=0 &&
   3295 	    clabel->column >= 0 &&
   3296 	    clabel->num_rows > 0 &&
   3297 	    clabel->num_columns > 0 &&
   3298 	    clabel->row < clabel->num_rows &&
   3299 	    clabel->column < clabel->num_columns &&
   3300 	    clabel->blockSize > 0 &&
   3301 	    /*
   3302 	     * numBlocksHi may contain garbage, but it is ok since
   3303 	     * the type is unsigned.  If it is really garbage,
   3304 	     * rf_fix_old_label_size() will fix it.
   3305 	     */
   3306 	    rf_component_label_numblocks(clabel) > 0) {
   3307 		/*
   3308 		 * label looks reasonable enough...
   3309 		 * let's make sure it has no old garbage.
   3310 		 */
   3311 		if (numsecs)
   3312 			rf_fix_old_label_size(clabel, numsecs);
   3313 		return(1);
   3314 	}
   3315 	return(0);
   3316 }
   3317 
   3318 
   3319 /*
   3320  * For reasons yet unknown, some old component labels have garbage in
   3321  * the newer numBlocksHi region, and this causes lossage.  Since those
   3322  * disks will also have numsecs set to less than 32 bits of sectors,
   3323  * we can determine when this corruption has occurred, and fix it.
   3324  *
   3325  * The exact same problem, with the same unknown reason, happens to
   3326  * the partitionSizeHi member as well.
   3327  */
   3328 static void
   3329 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
   3330 {
   3331 
   3332 	if (numsecs < ((uint64_t)1 << 32)) {
   3333 		if (clabel->numBlocksHi) {
   3334 			printf("WARNING: total sectors < 32 bits, yet "
   3335 			       "numBlocksHi set\n"
   3336 			       "WARNING: resetting numBlocksHi to zero.\n");
   3337 			clabel->numBlocksHi = 0;
   3338 		}
   3339 
   3340 		if (clabel->partitionSizeHi) {
   3341 			printf("WARNING: total sectors < 32 bits, yet "
   3342 			       "partitionSizeHi set\n"
   3343 			       "WARNING: resetting partitionSizeHi to zero.\n");
   3344 			clabel->partitionSizeHi = 0;
   3345 		}
   3346 	}
   3347 }
   3348 
   3349 
   3350 #ifdef DEBUG
   3351 void
   3352 rf_print_component_label(RF_ComponentLabel_t *clabel)
   3353 {
   3354 	uint64_t numBlocks;
   3355 	static const char *rp[] = {
   3356 	    "No", "Force", "Soft", "*invalid*"
   3357 	};
   3358 
   3359 
   3360 	numBlocks = rf_component_label_numblocks(clabel);
   3361 
   3362 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   3363 	       clabel->row, clabel->column,
   3364 	       clabel->num_rows, clabel->num_columns);
   3365 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   3366 	       clabel->version, clabel->serial_number,
   3367 	       clabel->mod_counter);
   3368 	printf("   Clean: %s Status: %d\n",
   3369 	       clabel->clean ? "Yes" : "No", clabel->status);
   3370 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   3371 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   3372 	printf("   RAID Level: %c  blocksize: %d numBlocks: %"PRIu64"\n",
   3373 	       (char) clabel->parityConfig, clabel->blockSize, numBlocks);
   3374 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
   3375 	printf("   Root partition: %s\n", rp[clabel->root_partition & 3]);
   3376 	printf("   Last configured as: raid%d\n", clabel->last_unit);
   3377 #if 0
   3378 	   printf("   Config order: %d\n", clabel->config_order);
   3379 #endif
   3380 
   3381 }
   3382 #endif
   3383 
   3384 RF_ConfigSet_t *
   3385 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
   3386 {
   3387 	RF_AutoConfig_t *ac;
   3388 	RF_ConfigSet_t *config_sets;
   3389 	RF_ConfigSet_t *cset;
   3390 	RF_AutoConfig_t *ac_next;
   3391 
   3392 
   3393 	config_sets = NULL;
   3394 
   3395 	/* Go through the AutoConfig list, and figure out which components
   3396 	   belong to what sets.  */
   3397 	ac = ac_list;
   3398 	while(ac!=NULL) {
   3399 		/* we're going to putz with ac->next, so save it here
   3400 		   for use at the end of the loop */
   3401 		ac_next = ac->next;
   3402 
   3403 		if (config_sets == NULL) {
   3404 			/* will need at least this one... */
   3405 			config_sets = (RF_ConfigSet_t *)
   3406 				malloc(sizeof(RF_ConfigSet_t),
   3407 				       M_RAIDFRAME, M_NOWAIT);
   3408 			if (config_sets == NULL) {
   3409 				panic("rf_create_auto_sets: No memory!");
   3410 			}
   3411 			/* this one is easy :) */
   3412 			config_sets->ac = ac;
   3413 			config_sets->next = NULL;
   3414 			config_sets->rootable = 0;
   3415 			ac->next = NULL;
   3416 		} else {
   3417 			/* which set does this component fit into? */
   3418 			cset = config_sets;
   3419 			while(cset!=NULL) {
   3420 				if (rf_does_it_fit(cset, ac)) {
   3421 					/* looks like it matches... */
   3422 					ac->next = cset->ac;
   3423 					cset->ac = ac;
   3424 					break;
   3425 				}
   3426 				cset = cset->next;
   3427 			}
   3428 			if (cset==NULL) {
   3429 				/* didn't find a match above... new set..*/
   3430 				cset = (RF_ConfigSet_t *)
   3431 					malloc(sizeof(RF_ConfigSet_t),
   3432 					       M_RAIDFRAME, M_NOWAIT);
   3433 				if (cset == NULL) {
   3434 					panic("rf_create_auto_sets: No memory!");
   3435 				}
   3436 				cset->ac = ac;
   3437 				ac->next = NULL;
   3438 				cset->next = config_sets;
   3439 				cset->rootable = 0;
   3440 				config_sets = cset;
   3441 			}
   3442 		}
   3443 		ac = ac_next;
   3444 	}
   3445 
   3446 
   3447 	return(config_sets);
   3448 }
   3449 
   3450 static int
   3451 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
   3452 {
   3453 	RF_ComponentLabel_t *clabel1, *clabel2;
   3454 
   3455 	/* If this one matches the *first* one in the set, that's good
   3456 	   enough, since the other members of the set would have been
   3457 	   through here too... */
   3458 	/* note that we are not checking partitionSize here..
   3459 
   3460 	   Note that we are also not checking the mod_counters here.
   3461 	   If everything else matches except the mod_counter, that's
   3462 	   good enough for this test.  We will deal with the mod_counters
   3463 	   a little later in the autoconfiguration process.
   3464 
   3465 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   3466 
   3467 	   The reason we don't check for this is that failed disks
   3468 	   will have lower modification counts.  If those disks are
   3469 	   not added to the set they used to belong to, then they will
   3470 	   form their own set, which may result in 2 different sets,
   3471 	   for example, competing to be configured at raid0, and
   3472 	   perhaps competing to be the root filesystem set.  If the
   3473 	   wrong ones get configured, or both attempt to become /,
   3474 	   weird behaviour and or serious lossage will occur.  Thus we
   3475 	   need to bring them into the fold here, and kick them out at
   3476 	   a later point.
   3477 
   3478 	*/
   3479 
   3480 	clabel1 = cset->ac->clabel;
   3481 	clabel2 = ac->clabel;
   3482 	if ((clabel1->version == clabel2->version) &&
   3483 	    (clabel1->serial_number == clabel2->serial_number) &&
   3484 	    (clabel1->num_rows == clabel2->num_rows) &&
   3485 	    (clabel1->num_columns == clabel2->num_columns) &&
   3486 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   3487 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   3488 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   3489 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   3490 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   3491 	    (clabel1->blockSize == clabel2->blockSize) &&
   3492 	    rf_component_label_numblocks(clabel1) ==
   3493 	    rf_component_label_numblocks(clabel2) &&
   3494 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   3495 	    (clabel1->root_partition == clabel2->root_partition) &&
   3496 	    (clabel1->last_unit == clabel2->last_unit) &&
   3497 	    (clabel1->config_order == clabel2->config_order)) {
   3498 		/* if it get's here, it almost *has* to be a match */
   3499 	} else {
   3500 		/* it's not consistent with somebody in the set..
   3501 		   punt */
   3502 		return(0);
   3503 	}
   3504 	/* all was fine.. it must fit... */
   3505 	return(1);
   3506 }
   3507 
   3508 int
   3509 rf_have_enough_components(RF_ConfigSet_t *cset)
   3510 {
   3511 	RF_AutoConfig_t *ac;
   3512 	RF_AutoConfig_t *auto_config;
   3513 	RF_ComponentLabel_t *clabel;
   3514 	int c;
   3515 	int num_cols;
   3516 	int num_missing;
   3517 	int mod_counter;
   3518 	int mod_counter_found;
   3519 	int even_pair_failed;
   3520 	char parity_type;
   3521 
   3522 
   3523 	/* check to see that we have enough 'live' components
   3524 	   of this set.  If so, we can configure it if necessary */
   3525 
   3526 	num_cols = cset->ac->clabel->num_columns;
   3527 	parity_type = cset->ac->clabel->parityConfig;
   3528 
   3529 	/* XXX Check for duplicate components!?!?!? */
   3530 
   3531 	/* Determine what the mod_counter is supposed to be for this set. */
   3532 
   3533 	mod_counter_found = 0;
   3534 	mod_counter = 0;
   3535 	ac = cset->ac;
   3536 	while(ac!=NULL) {
   3537 		if (mod_counter_found==0) {
   3538 			mod_counter = ac->clabel->mod_counter;
   3539 			mod_counter_found = 1;
   3540 		} else {
   3541 			if (ac->clabel->mod_counter > mod_counter) {
   3542 				mod_counter = ac->clabel->mod_counter;
   3543 			}
   3544 		}
   3545 		ac = ac->next;
   3546 	}
   3547 
   3548 	num_missing = 0;
   3549 	auto_config = cset->ac;
   3550 
   3551 	even_pair_failed = 0;
   3552 	for(c=0; c<num_cols; c++) {
   3553 		ac = auto_config;
   3554 		while(ac!=NULL) {
   3555 			if ((ac->clabel->column == c) &&
   3556 			    (ac->clabel->mod_counter == mod_counter)) {
   3557 				/* it's this one... */
   3558 #ifdef DEBUG
   3559 				printf("Found: %s at %d\n",
   3560 				       ac->devname,c);
   3561 #endif
   3562 				break;
   3563 			}
   3564 			ac=ac->next;
   3565 		}
   3566 		if (ac==NULL) {
   3567 				/* Didn't find one here! */
   3568 				/* special case for RAID 1, especially
   3569 				   where there are more than 2
   3570 				   components (where RAIDframe treats
   3571 				   things a little differently :( ) */
   3572 			if (parity_type == '1') {
   3573 				if (c%2 == 0) { /* even component */
   3574 					even_pair_failed = 1;
   3575 				} else { /* odd component.  If
   3576 					    we're failed, and
   3577 					    so is the even
   3578 					    component, it's
   3579 					    "Good Night, Charlie" */
   3580 					if (even_pair_failed == 1) {
   3581 						return(0);
   3582 					}
   3583 				}
   3584 			} else {
   3585 				/* normal accounting */
   3586 				num_missing++;
   3587 			}
   3588 		}
   3589 		if ((parity_type == '1') && (c%2 == 1)) {
   3590 				/* Just did an even component, and we didn't
   3591 				   bail.. reset the even_pair_failed flag,
   3592 				   and go on to the next component.... */
   3593 			even_pair_failed = 0;
   3594 		}
   3595 	}
   3596 
   3597 	clabel = cset->ac->clabel;
   3598 
   3599 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3600 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3601 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3602 		/* XXX this needs to be made *much* more general */
   3603 		/* Too many failures */
   3604 		return(0);
   3605 	}
   3606 	/* otherwise, all is well, and we've got enough to take a kick
   3607 	   at autoconfiguring this set */
   3608 	return(1);
   3609 }
   3610 
   3611 void
   3612 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
   3613 			RF_Raid_t *raidPtr)
   3614 {
   3615 	RF_ComponentLabel_t *clabel;
   3616 	int i;
   3617 
   3618 	clabel = ac->clabel;
   3619 
   3620 	/* 1. Fill in the common stuff */
   3621 	config->numRow = clabel->num_rows = 1;
   3622 	config->numCol = clabel->num_columns;
   3623 	config->numSpare = 0; /* XXX should this be set here? */
   3624 	config->sectPerSU = clabel->sectPerSU;
   3625 	config->SUsPerPU = clabel->SUsPerPU;
   3626 	config->SUsPerRU = clabel->SUsPerRU;
   3627 	config->parityConfig = clabel->parityConfig;
   3628 	/* XXX... */
   3629 	strcpy(config->diskQueueType,"fifo");
   3630 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3631 	config->layoutSpecificSize = 0; /* XXX ?? */
   3632 
   3633 	while(ac!=NULL) {
   3634 		/* row/col values will be in range due to the checks
   3635 		   in reasonable_label() */
   3636 		strcpy(config->devnames[0][ac->clabel->column],
   3637 		       ac->devname);
   3638 		ac = ac->next;
   3639 	}
   3640 
   3641 	for(i=0;i<RF_MAXDBGV;i++) {
   3642 		config->debugVars[i][0] = 0;
   3643 	}
   3644 }
   3645 
   3646 int
   3647 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
   3648 {
   3649 	RF_ComponentLabel_t *clabel;
   3650 	int column;
   3651 	int sparecol;
   3652 
   3653 	raidPtr->autoconfigure = new_value;
   3654 
   3655 	for(column=0; column<raidPtr->numCol; column++) {
   3656 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3657 			clabel = raidget_component_label(raidPtr, column);
   3658 			clabel->autoconfigure = new_value;
   3659 			raidflush_component_label(raidPtr, column);
   3660 		}
   3661 	}
   3662 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3663 		sparecol = raidPtr->numCol + column;
   3664 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3665 			clabel = raidget_component_label(raidPtr, sparecol);
   3666 			clabel->autoconfigure = new_value;
   3667 			raidflush_component_label(raidPtr, sparecol);
   3668 		}
   3669 	}
   3670 	return(new_value);
   3671 }
   3672 
   3673 int
   3674 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
   3675 {
   3676 	RF_ComponentLabel_t *clabel;
   3677 	int column;
   3678 	int sparecol;
   3679 
   3680 	raidPtr->root_partition = new_value;
   3681 	for(column=0; column<raidPtr->numCol; column++) {
   3682 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3683 			clabel = raidget_component_label(raidPtr, column);
   3684 			clabel->root_partition = new_value;
   3685 			raidflush_component_label(raidPtr, column);
   3686 		}
   3687 	}
   3688 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3689 		sparecol = raidPtr->numCol + column;
   3690 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3691 			clabel = raidget_component_label(raidPtr, sparecol);
   3692 			clabel->root_partition = new_value;
   3693 			raidflush_component_label(raidPtr, sparecol);
   3694 		}
   3695 	}
   3696 	return(new_value);
   3697 }
   3698 
   3699 void
   3700 rf_release_all_vps(RF_ConfigSet_t *cset)
   3701 {
   3702 	RF_AutoConfig_t *ac;
   3703 
   3704 	ac = cset->ac;
   3705 	while(ac!=NULL) {
   3706 		/* Close the vp, and give it back */
   3707 		if (ac->vp) {
   3708 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3709 			VOP_CLOSE(ac->vp, FREAD, NOCRED);
   3710 			vput(ac->vp);
   3711 			ac->vp = NULL;
   3712 		}
   3713 		ac = ac->next;
   3714 	}
   3715 }
   3716 
   3717 
   3718 void
   3719 rf_cleanup_config_set(RF_ConfigSet_t *cset)
   3720 {
   3721 	RF_AutoConfig_t *ac;
   3722 	RF_AutoConfig_t *next_ac;
   3723 
   3724 	ac = cset->ac;
   3725 	while(ac!=NULL) {
   3726 		next_ac = ac->next;
   3727 		/* nuke the label */
   3728 		free(ac->clabel, M_RAIDFRAME);
   3729 		/* cleanup the config structure */
   3730 		free(ac, M_RAIDFRAME);
   3731 		/* "next.." */
   3732 		ac = next_ac;
   3733 	}
   3734 	/* and, finally, nuke the config set */
   3735 	free(cset, M_RAIDFRAME);
   3736 }
   3737 
   3738 
   3739 void
   3740 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   3741 {
   3742 	/* current version number */
   3743 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3744 	clabel->serial_number = raidPtr->serial_number;
   3745 	clabel->mod_counter = raidPtr->mod_counter;
   3746 
   3747 	clabel->num_rows = 1;
   3748 	clabel->num_columns = raidPtr->numCol;
   3749 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3750 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3751 
   3752 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3753 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3754 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3755 
   3756 	clabel->blockSize = raidPtr->bytesPerSector;
   3757 	rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
   3758 
   3759 	/* XXX not portable */
   3760 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3761 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3762 	clabel->autoconfigure = raidPtr->autoconfigure;
   3763 	clabel->root_partition = raidPtr->root_partition;
   3764 	clabel->last_unit = raidPtr->raidid;
   3765 	clabel->config_order = raidPtr->config_order;
   3766 
   3767 #ifndef RF_NO_PARITY_MAP
   3768 	rf_paritymap_init_label(raidPtr->parity_map, clabel);
   3769 #endif
   3770 }
   3771 
   3772 struct raid_softc *
   3773 rf_auto_config_set(RF_ConfigSet_t *cset)
   3774 {
   3775 	RF_Raid_t *raidPtr;
   3776 	RF_Config_t *config;
   3777 	int raidID;
   3778 	struct raid_softc *sc;
   3779 
   3780 #ifdef DEBUG
   3781 	printf("RAID autoconfigure\n");
   3782 #endif
   3783 
   3784 	/* 1. Create a config structure */
   3785 	config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
   3786 	if (config == NULL) {
   3787 		printf("%s: Out of mem - config!?!?\n", __func__);
   3788 				/* XXX do something more intelligent here. */
   3789 		return NULL;
   3790 	}
   3791 
   3792 	/*
   3793 	   2. Figure out what RAID ID this one is supposed to live at
   3794 	   See if we can get the same RAID dev that it was configured
   3795 	   on last time..
   3796 	*/
   3797 
   3798 	raidID = cset->ac->clabel->last_unit;
   3799 	for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
   3800 	     sc = raidget(++raidID, false))
   3801 		continue;
   3802 #ifdef DEBUG
   3803 	printf("Configuring raid%d:\n",raidID);
   3804 #endif
   3805 
   3806 	if (sc == NULL)
   3807 		sc = raidget(raidID, true);
   3808 	if (sc == NULL) {
   3809 		printf("%s: Out of mem - softc!?!?\n", __func__);
   3810 				/* XXX do something more intelligent here. */
   3811 		free(config, M_RAIDFRAME);
   3812 		return NULL;
   3813 	}
   3814 
   3815 	raidPtr = &sc->sc_r;
   3816 
   3817 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3818 	raidPtr->softc = sc;
   3819 	raidPtr->raidid = raidID;
   3820 	raidPtr->openings = RAIDOUTSTANDING;
   3821 
   3822 	/* 3. Build the configuration structure */
   3823 	rf_create_configuration(cset->ac, config, raidPtr);
   3824 
   3825 	/* 4. Do the configuration */
   3826 	if (rf_Configure(raidPtr, config, cset->ac) == 0) {
   3827 		raidinit(sc);
   3828 
   3829 		rf_markalldirty(raidPtr);
   3830 		raidPtr->autoconfigure = 1; /* XXX do this here? */
   3831 		switch (cset->ac->clabel->root_partition) {
   3832 		case 1:	/* Force Root */
   3833 		case 2:	/* Soft Root: root when boot partition part of raid */
   3834 			/*
   3835 			 * everything configured just fine.  Make a note
   3836 			 * that this set is eligible to be root,
   3837 			 * or forced to be root
   3838 			 */
   3839 			cset->rootable = cset->ac->clabel->root_partition;
   3840 			/* XXX do this here? */
   3841 			raidPtr->root_partition = cset->rootable;
   3842 			break;
   3843 		default:
   3844 			break;
   3845 		}
   3846 	} else {
   3847 		raidput(sc);
   3848 		sc = NULL;
   3849 	}
   3850 
   3851 	/* 5. Cleanup */
   3852 	free(config, M_RAIDFRAME);
   3853 	return sc;
   3854 }
   3855 
   3856 void
   3857 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
   3858 {
   3859 	struct buf *bp;
   3860 	struct raid_softc *rs;
   3861 
   3862 	bp = (struct buf *)desc->bp;
   3863 	rs = desc->raidPtr->softc;
   3864 	disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
   3865 	    (bp->b_flags & B_READ));
   3866 }
   3867 
   3868 void
   3869 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
   3870 	     size_t xmin, size_t xmax)
   3871 {
   3872 	pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
   3873 	pool_sethiwat(p, xmax);
   3874 	pool_prime(p, xmin);
   3875 	pool_setlowat(p, xmin);
   3876 }
   3877 
   3878 /*
   3879  * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
   3880  * if there is IO pending and if that IO could possibly be done for a
   3881  * given RAID set.  Returns 0 if IO is waiting and can be done, 1
   3882  * otherwise.
   3883  *
   3884  */
   3885 
   3886 int
   3887 rf_buf_queue_check(RF_Raid_t *raidPtr)
   3888 {
   3889 	struct raid_softc *rs = raidPtr->softc;
   3890 	if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
   3891 		/* there is work to do */
   3892 		return 0;
   3893 	}
   3894 	/* default is nothing to do */
   3895 	return 1;
   3896 }
   3897 
   3898 int
   3899 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
   3900 {
   3901 	uint64_t numsecs;
   3902 	unsigned secsize;
   3903 	int error;
   3904 
   3905 	error = getdisksize(vp, &numsecs, &secsize);
   3906 	if (error == 0) {
   3907 		diskPtr->blockSize = secsize;
   3908 		diskPtr->numBlocks = numsecs - rf_protectedSectors;
   3909 		diskPtr->partitionSize = numsecs;
   3910 		return 0;
   3911 	}
   3912 	return error;
   3913 }
   3914 
   3915 static int
   3916 raid_match(device_t self, cfdata_t cfdata, void *aux)
   3917 {
   3918 	return 1;
   3919 }
   3920 
   3921 static void
   3922 raid_attach(device_t parent, device_t self, void *aux)
   3923 {
   3924 
   3925 }
   3926 
   3927 
   3928 static int
   3929 raid_detach(device_t self, int flags)
   3930 {
   3931 	int error;
   3932 	struct raid_softc *rs = raidget(device_unit(self), false);
   3933 
   3934 	if (rs == NULL)
   3935 		return ENXIO;
   3936 
   3937 	if ((error = raidlock(rs)) != 0)
   3938 		return (error);
   3939 
   3940 	error = raid_detach_unlocked(rs);
   3941 
   3942 	if (error != 0)
   3943 		raidunlock(rs);
   3944 
   3945 	return error;
   3946 }
   3947 
   3948 static void
   3949 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
   3950 {
   3951 	struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
   3952 
   3953 	memset(dg, 0, sizeof(*dg));
   3954 
   3955 	dg->dg_secperunit = raidPtr->totalSectors;
   3956 	dg->dg_secsize = raidPtr->bytesPerSector;
   3957 	dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   3958 	dg->dg_ntracks = 4 * raidPtr->numCol;
   3959 
   3960 	disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
   3961 }
   3962 
   3963 /*
   3964  * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
   3965  * We end up returning whatever error was returned by the first cache flush
   3966  * that fails.
   3967  */
   3968 
   3969 int
   3970 rf_sync_component_caches(RF_Raid_t *raidPtr)
   3971 {
   3972 	int c, sparecol;
   3973 	int e,error;
   3974 	int force = 1;
   3975 
   3976 	error = 0;
   3977 	for (c = 0; c < raidPtr->numCol; c++) {
   3978 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   3979 			e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
   3980 					  &force, FWRITE, NOCRED);
   3981 			if (e) {
   3982 				if (e != ENODEV)
   3983 					printf("raid%d: cache flush to component %s failed.\n",
   3984 					       raidPtr->raidid, raidPtr->Disks[c].devname);
   3985 				if (error == 0) {
   3986 					error = e;
   3987 				}
   3988 			}
   3989 		}
   3990 	}
   3991 
   3992 	for( c = 0; c < raidPtr->numSpare ; c++) {
   3993 		sparecol = raidPtr->numCol + c;
   3994 		/* Need to ensure that the reconstruct actually completed! */
   3995 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3996 			e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
   3997 					  DIOCCACHESYNC, &force, FWRITE, NOCRED);
   3998 			if (e) {
   3999 				if (e != ENODEV)
   4000 					printf("raid%d: cache flush to component %s failed.\n",
   4001 					       raidPtr->raidid, raidPtr->Disks[sparecol].devname);
   4002 				if (error == 0) {
   4003 					error = e;
   4004 				}
   4005 			}
   4006 		}
   4007 	}
   4008 	return error;
   4009 }
   4010 
   4011 /*
   4012  * Module interface
   4013  */
   4014 
   4015 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr");
   4016 
   4017 #ifdef _MODULE
   4018 CFDRIVER_DECL(raid, DV_DISK, NULL);
   4019 #endif
   4020 
   4021 static int raid_modcmd(modcmd_t, void *);
   4022 static int raid_modcmd_init(void);
   4023 static int raid_modcmd_fini(void);
   4024 
   4025 static int
   4026 raid_modcmd(modcmd_t cmd, void *data)
   4027 {
   4028 	int error;
   4029 
   4030 	error = 0;
   4031 	switch (cmd) {
   4032 	case MODULE_CMD_INIT:
   4033 		error = raid_modcmd_init();
   4034 		break;
   4035 	case MODULE_CMD_FINI:
   4036 		error = raid_modcmd_fini();
   4037 		break;
   4038 	default:
   4039 		error = ENOTTY;
   4040 		break;
   4041 	}
   4042 	return error;
   4043 }
   4044 
   4045 static int
   4046 raid_modcmd_init(void)
   4047 {
   4048 	int error;
   4049 	int bmajor, cmajor;
   4050 
   4051 	mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
   4052 	mutex_enter(&raid_lock);
   4053 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   4054 	rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
   4055 	rf_init_cond2(rf_sparet_wait_cv, "sparetw");
   4056 	rf_init_cond2(rf_sparet_resp_cv, "rfgst");
   4057 
   4058 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
   4059 #endif
   4060 
   4061 	bmajor = cmajor = -1;
   4062 	error = devsw_attach("raid", &raid_bdevsw, &bmajor,
   4063 	    &raid_cdevsw, &cmajor);
   4064 	if (error != 0 && error != EEXIST) {
   4065 		aprint_error("%s: devsw_attach failed %d\n", __func__, error);
   4066 		mutex_exit(&raid_lock);
   4067 		return error;
   4068 	}
   4069 #ifdef _MODULE
   4070 	error = config_cfdriver_attach(&raid_cd);
   4071 	if (error != 0) {
   4072 		aprint_error("%s: config_cfdriver_attach failed %d\n",
   4073 		    __func__, error);
   4074 		devsw_detach(&raid_bdevsw, &raid_cdevsw);
   4075 		mutex_exit(&raid_lock);
   4076 		return error;
   4077 	}
   4078 #endif
   4079 	error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
   4080 	if (error != 0) {
   4081 		aprint_error("%s: config_cfattach_attach failed %d\n",
   4082 		    __func__, error);
   4083 #ifdef _MODULE
   4084 		config_cfdriver_detach(&raid_cd);
   4085 #endif
   4086 		devsw_detach(&raid_bdevsw, &raid_cdevsw);
   4087 		mutex_exit(&raid_lock);
   4088 		return error;
   4089 	}
   4090 
   4091 	raidautoconfigdone = false;
   4092 
   4093 	mutex_exit(&raid_lock);
   4094 
   4095 	if (error == 0) {
   4096 		if (rf_BootRaidframe(true) == 0)
   4097 			aprint_verbose("Kernelized RAIDframe activated\n");
   4098 		else
   4099 			panic("Serious error activating RAID!!");
   4100 	}
   4101 
   4102 	/*
   4103 	 * Register a finalizer which will be used to auto-config RAID
   4104 	 * sets once all real hardware devices have been found.
   4105 	 */
   4106 	error = config_finalize_register(NULL, rf_autoconfig);
   4107 	if (error != 0) {
   4108 		aprint_error("WARNING: unable to register RAIDframe "
   4109 		    "finalizer\n");
   4110 	}
   4111 
   4112 	return error;
   4113 }
   4114 
   4115 static int
   4116 raid_modcmd_fini(void)
   4117 {
   4118 	int error;
   4119 
   4120 	mutex_enter(&raid_lock);
   4121 
   4122 	/* Don't allow unload if raid device(s) exist.  */
   4123 	if (!LIST_EMPTY(&raids)) {
   4124 		mutex_exit(&raid_lock);
   4125 		return EBUSY;
   4126 	}
   4127 
   4128 	error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
   4129 	if (error != 0) {
   4130 		mutex_exit(&raid_lock);
   4131 		return error;
   4132 	}
   4133 #ifdef _MODULE
   4134 	error = config_cfdriver_detach(&raid_cd);
   4135 	if (error != 0) {
   4136 		config_cfattach_attach(raid_cd.cd_name, &raid_ca);
   4137 		mutex_exit(&raid_lock);
   4138 		return error;
   4139 	}
   4140 #endif
   4141 	error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
   4142 	if (error != 0) {
   4143 #ifdef _MODULE
   4144 		config_cfdriver_attach(&raid_cd);
   4145 #endif
   4146 		config_cfattach_attach(raid_cd.cd_name, &raid_ca);
   4147 		mutex_exit(&raid_lock);
   4148 		return error;
   4149 	}
   4150 	rf_BootRaidframe(false);
   4151 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   4152 	rf_destroy_mutex2(rf_sparet_wait_mutex);
   4153 	rf_destroy_cond2(rf_sparet_wait_cv);
   4154 	rf_destroy_cond2(rf_sparet_resp_cv);
   4155 #endif
   4156 	mutex_exit(&raid_lock);
   4157 	mutex_destroy(&raid_lock);
   4158 
   4159 	return error;
   4160 }
   4161