Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.331
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.331 2016/01/02 16:00:01 mlelstv Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Greg Oster; Jason R. Thorpe.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Copyright (c) 1988 University of Utah.
     34  * Copyright (c) 1990, 1993
     35  *      The Regents of the University of California.  All rights reserved.
     36  *
     37  * This code is derived from software contributed to Berkeley by
     38  * the Systems Programming Group of the University of Utah Computer
     39  * Science Department.
     40  *
     41  * Redistribution and use in source and binary forms, with or without
     42  * modification, are permitted provided that the following conditions
     43  * are met:
     44  * 1. Redistributions of source code must retain the above copyright
     45  *    notice, this list of conditions and the following disclaimer.
     46  * 2. Redistributions in binary form must reproduce the above copyright
     47  *    notice, this list of conditions and the following disclaimer in the
     48  *    documentation and/or other materials provided with the distribution.
     49  * 3. Neither the name of the University nor the names of its contributors
     50  *    may be used to endorse or promote products derived from this software
     51  *    without specific prior written permission.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     63  * SUCH DAMAGE.
     64  *
     65  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     66  *
     67  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     68  */
     69 
     70 /*
     71  * Copyright (c) 1995 Carnegie-Mellon University.
     72  * All rights reserved.
     73  *
     74  * Authors: Mark Holland, Jim Zelenka
     75  *
     76  * Permission to use, copy, modify and distribute this software and
     77  * its documentation is hereby granted, provided that both the copyright
     78  * notice and this permission notice appear in all copies of the
     79  * software, derivative works or modified versions, and any portions
     80  * thereof, and that both notices appear in supporting documentation.
     81  *
     82  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     83  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     84  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     85  *
     86  * Carnegie Mellon requests users of this software to return to
     87  *
     88  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     89  *  School of Computer Science
     90  *  Carnegie Mellon University
     91  *  Pittsburgh PA 15213-3890
     92  *
     93  * any improvements or extensions that they make and grant Carnegie the
     94  * rights to redistribute these changes.
     95  */
     96 
     97 /***********************************************************
     98  *
     99  * rf_kintf.c -- the kernel interface routines for RAIDframe
    100  *
    101  ***********************************************************/
    102 
    103 #include <sys/cdefs.h>
    104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.331 2016/01/02 16:00:01 mlelstv Exp $");
    105 
    106 #ifdef _KERNEL_OPT
    107 #include "opt_compat_netbsd.h"
    108 #include "opt_raid_autoconfig.h"
    109 #endif
    110 
    111 #include <sys/param.h>
    112 #include <sys/errno.h>
    113 #include <sys/pool.h>
    114 #include <sys/proc.h>
    115 #include <sys/queue.h>
    116 #include <sys/disk.h>
    117 #include <sys/device.h>
    118 #include <sys/stat.h>
    119 #include <sys/ioctl.h>
    120 #include <sys/fcntl.h>
    121 #include <sys/systm.h>
    122 #include <sys/vnode.h>
    123 #include <sys/disklabel.h>
    124 #include <sys/conf.h>
    125 #include <sys/buf.h>
    126 #include <sys/bufq.h>
    127 #include <sys/reboot.h>
    128 #include <sys/kauth.h>
    129 #include <sys/module.h>
    130 
    131 #include <prop/proplib.h>
    132 
    133 #include <dev/raidframe/raidframevar.h>
    134 #include <dev/raidframe/raidframeio.h>
    135 #include <dev/raidframe/rf_paritymap.h>
    136 
    137 #include "rf_raid.h"
    138 #include "rf_copyback.h"
    139 #include "rf_dag.h"
    140 #include "rf_dagflags.h"
    141 #include "rf_desc.h"
    142 #include "rf_diskqueue.h"
    143 #include "rf_etimer.h"
    144 #include "rf_general.h"
    145 #include "rf_kintf.h"
    146 #include "rf_options.h"
    147 #include "rf_driver.h"
    148 #include "rf_parityscan.h"
    149 #include "rf_threadstuff.h"
    150 
    151 #ifdef COMPAT_50
    152 #include "rf_compat50.h"
    153 #endif
    154 
    155 #include "ioconf.h"
    156 
    157 #ifdef DEBUG
    158 int     rf_kdebug_level = 0;
    159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    160 #else				/* DEBUG */
    161 #define db1_printf(a) { }
    162 #endif				/* DEBUG */
    163 
    164 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
    165 static rf_declare_mutex2(rf_sparet_wait_mutex);
    166 static rf_declare_cond2(rf_sparet_wait_cv);
    167 static rf_declare_cond2(rf_sparet_resp_cv);
    168 
    169 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    170 						 * spare table */
    171 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    172 						 * installation process */
    173 #endif
    174 
    175 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
    176 
    177 /* prototypes */
    178 static void KernelWakeupFunc(struct buf *);
    179 static void InitBP(struct buf *, struct vnode *, unsigned,
    180     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
    181     void *, int, struct proc *);
    182 struct raid_softc;
    183 static void raidinit(struct raid_softc *);
    184 
    185 static int raid_match(device_t, cfdata_t, void *);
    186 static void raid_attach(device_t, device_t, void *);
    187 static int raid_detach(device_t, int);
    188 
    189 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
    190     daddr_t, daddr_t);
    191 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
    192     daddr_t, daddr_t, int);
    193 
    194 static int raidwrite_component_label(unsigned,
    195     dev_t, struct vnode *, RF_ComponentLabel_t *);
    196 static int raidread_component_label(unsigned,
    197     dev_t, struct vnode *, RF_ComponentLabel_t *);
    198 
    199 
    200 static dev_type_open(raidopen);
    201 static dev_type_close(raidclose);
    202 static dev_type_read(raidread);
    203 static dev_type_write(raidwrite);
    204 static dev_type_ioctl(raidioctl);
    205 static dev_type_strategy(raidstrategy);
    206 static dev_type_dump(raiddump);
    207 static dev_type_size(raidsize);
    208 
    209 const struct bdevsw raid_bdevsw = {
    210 	.d_open = raidopen,
    211 	.d_close = raidclose,
    212 	.d_strategy = raidstrategy,
    213 	.d_ioctl = raidioctl,
    214 	.d_dump = raiddump,
    215 	.d_psize = raidsize,
    216 	.d_discard = nodiscard,
    217 	.d_flag = D_DISK
    218 };
    219 
    220 const struct cdevsw raid_cdevsw = {
    221 	.d_open = raidopen,
    222 	.d_close = raidclose,
    223 	.d_read = raidread,
    224 	.d_write = raidwrite,
    225 	.d_ioctl = raidioctl,
    226 	.d_stop = nostop,
    227 	.d_tty = notty,
    228 	.d_poll = nopoll,
    229 	.d_mmap = nommap,
    230 	.d_kqfilter = nokqfilter,
    231 	.d_discard = nodiscard,
    232 	.d_flag = D_DISK
    233 };
    234 
    235 static struct dkdriver rf_dkdriver = {
    236 	.d_strategy = raidstrategy,
    237 	.d_minphys = minphys
    238 };
    239 
    240 struct raid_softc {
    241 	device_t sc_dev;
    242 	int	sc_unit;
    243 	int     sc_flags;	/* flags */
    244 	int     sc_cflags;	/* configuration flags */
    245 	kmutex_t sc_mutex;	/* interlock mutex */
    246 	kcondvar_t sc_cv;	/* and the condvar */
    247 	uint64_t sc_size;	/* size of the raid device */
    248 	char    sc_xname[20];	/* XXX external name */
    249 	struct disk sc_dkdev;	/* generic disk device info */
    250 	struct bufq_state *buf_queue;	/* used for the device queue */
    251 	RF_Raid_t sc_r;
    252 	LIST_ENTRY(raid_softc) sc_link;
    253 };
    254 /* sc_flags */
    255 #define RAIDF_INITED	0x01	/* unit has been initialized */
    256 #define RAIDF_WLABEL	0x02	/* label area is writable */
    257 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    258 #define RAIDF_SHUTDOWN	0x08	/* unit is being shutdown */
    259 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    260 #define RAIDF_LOCKED	0x80	/* unit is locked */
    261 
    262 #define	raidunit(x)	DISKUNIT(x)
    263 
    264 extern struct cfdriver raid_cd;
    265 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
    266     raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
    267     DVF_DETACH_SHUTDOWN);
    268 
    269 /*
    270  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    271  * Be aware that large numbers can allow the driver to consume a lot of
    272  * kernel memory, especially on writes, and in degraded mode reads.
    273  *
    274  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    275  * a single 64K write will typically require 64K for the old data,
    276  * 64K for the old parity, and 64K for the new parity, for a total
    277  * of 192K (if the parity buffer is not re-used immediately).
    278  * Even it if is used immediately, that's still 128K, which when multiplied
    279  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    280  *
    281  * Now in degraded mode, for example, a 64K read on the above setup may
    282  * require data reconstruction, which will require *all* of the 4 remaining
    283  * disks to participate -- 4 * 32K/disk == 128K again.
    284  */
    285 
    286 #ifndef RAIDOUTSTANDING
    287 #define RAIDOUTSTANDING   6
    288 #endif
    289 
    290 #define RAIDLABELDEV(dev)	\
    291 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    292 
    293 /* declared here, and made public, for the benefit of KVM stuff.. */
    294 
    295 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    296 				     struct disklabel *);
    297 static void raidgetdisklabel(dev_t);
    298 static void raidmakedisklabel(struct raid_softc *);
    299 
    300 static int raidlock(struct raid_softc *);
    301 static void raidunlock(struct raid_softc *);
    302 
    303 static int raid_detach_unlocked(struct raid_softc *);
    304 
    305 static void rf_markalldirty(RF_Raid_t *);
    306 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
    307 
    308 void rf_ReconThread(struct rf_recon_req *);
    309 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    310 void rf_CopybackThread(RF_Raid_t *raidPtr);
    311 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    312 int rf_autoconfig(device_t);
    313 void rf_buildroothack(RF_ConfigSet_t *);
    314 
    315 RF_AutoConfig_t *rf_find_raid_components(void);
    316 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    317 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    318 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
    319 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    320 int rf_set_autoconfig(RF_Raid_t *, int);
    321 int rf_set_rootpartition(RF_Raid_t *, int);
    322 void rf_release_all_vps(RF_ConfigSet_t *);
    323 void rf_cleanup_config_set(RF_ConfigSet_t *);
    324 int rf_have_enough_components(RF_ConfigSet_t *);
    325 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
    326 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
    327 
    328 /*
    329  * Debugging, mostly.  Set to 0 to not allow autoconfig to take place.
    330  * Note that this is overridden by having RAID_AUTOCONFIG as an option
    331  * in the kernel config file.
    332  */
    333 #ifdef RAID_AUTOCONFIG
    334 int raidautoconfig = 1;
    335 #else
    336 int raidautoconfig = 0;
    337 #endif
    338 static bool raidautoconfigdone = false;
    339 
    340 struct RF_Pools_s rf_pools;
    341 
    342 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
    343 static kmutex_t raid_lock;
    344 
    345 static struct raid_softc *
    346 raidcreate(int unit) {
    347 	struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
    348 	if (sc == NULL) {
    349 #ifdef DIAGNOSTIC
    350 		printf("%s: out of memory\n", __func__);
    351 #endif
    352 		return NULL;
    353 	}
    354 	sc->sc_unit = unit;
    355 	bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK);
    356 	cv_init(&sc->sc_cv, "raidunit");
    357 	mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
    358 	return sc;
    359 }
    360 
    361 static void
    362 raiddestroy(struct raid_softc *sc) {
    363 	cv_destroy(&sc->sc_cv);
    364 	mutex_destroy(&sc->sc_mutex);
    365 	bufq_free(sc->buf_queue);
    366 	kmem_free(sc, sizeof(*sc));
    367 }
    368 
    369 static struct raid_softc *
    370 raidget(int unit, bool create) {
    371 	struct raid_softc *sc;
    372 	if (unit < 0) {
    373 #ifdef DIAGNOSTIC
    374 		panic("%s: unit %d!", __func__, unit);
    375 #endif
    376 		return NULL;
    377 	}
    378 	mutex_enter(&raid_lock);
    379 	LIST_FOREACH(sc, &raids, sc_link) {
    380 		if (sc->sc_unit == unit) {
    381 			mutex_exit(&raid_lock);
    382 			return sc;
    383 		}
    384 	}
    385 	mutex_exit(&raid_lock);
    386 	if (!create)
    387 		return NULL;
    388 	if ((sc = raidcreate(unit)) == NULL)
    389 		return NULL;
    390 	mutex_enter(&raid_lock);
    391 	LIST_INSERT_HEAD(&raids, sc, sc_link);
    392 	mutex_exit(&raid_lock);
    393 	return sc;
    394 }
    395 
    396 static void
    397 raidput(struct raid_softc *sc) {
    398 	mutex_enter(&raid_lock);
    399 	LIST_REMOVE(sc, sc_link);
    400 	mutex_exit(&raid_lock);
    401 	raiddestroy(sc);
    402 }
    403 
    404 void
    405 raidattach(int num)
    406 {
    407 
    408 	/*
    409 	 * Device attachment and associated initialization now occurs
    410 	 * as part of the module initialization.
    411 	 */
    412 }
    413 
    414 int
    415 rf_autoconfig(device_t self)
    416 {
    417 	RF_AutoConfig_t *ac_list;
    418 	RF_ConfigSet_t *config_sets;
    419 
    420 	if (!raidautoconfig || raidautoconfigdone == true)
    421 		return (0);
    422 
    423 	/* XXX This code can only be run once. */
    424 	raidautoconfigdone = true;
    425 
    426 #ifdef __HAVE_CPU_BOOTCONF
    427 	/*
    428 	 * 0. find the boot device if needed first so we can use it later
    429 	 * this needs to be done before we autoconfigure any raid sets,
    430 	 * because if we use wedges we are not going to be able to open
    431 	 * the boot device later
    432 	 */
    433 	if (booted_device == NULL)
    434 		cpu_bootconf();
    435 #endif
    436 	/* 1. locate all RAID components on the system */
    437 	aprint_debug("Searching for RAID components...\n");
    438 	ac_list = rf_find_raid_components();
    439 
    440 	/* 2. Sort them into their respective sets. */
    441 	config_sets = rf_create_auto_sets(ac_list);
    442 
    443 	/*
    444 	 * 3. Evaluate each set and configure the valid ones.
    445 	 * This gets done in rf_buildroothack().
    446 	 */
    447 	rf_buildroothack(config_sets);
    448 
    449 	return 1;
    450 }
    451 
    452 static int
    453 rf_containsboot(RF_Raid_t *r, device_t bdv) {
    454 	const char *bootname = device_xname(bdv);
    455 	size_t len = strlen(bootname);
    456 
    457 	for (int col = 0; col < r->numCol; col++) {
    458 		const char *devname = r->Disks[col].devname;
    459 		devname += sizeof("/dev/") - 1;
    460 		if (strncmp(devname, "dk", 2) == 0) {
    461 			const char *parent =
    462 			    dkwedge_get_parent_name(r->Disks[col].dev);
    463 			if (parent != NULL)
    464 				devname = parent;
    465 		}
    466 		if (strncmp(devname, bootname, len) == 0) {
    467 			struct raid_softc *sc = r->softc;
    468 			aprint_debug("raid%d includes boot device %s\n",
    469 			    sc->sc_unit, devname);
    470 			return 1;
    471 		}
    472 	}
    473 	return 0;
    474 }
    475 
    476 void
    477 rf_buildroothack(RF_ConfigSet_t *config_sets)
    478 {
    479 	RF_ConfigSet_t *cset;
    480 	RF_ConfigSet_t *next_cset;
    481 	int num_root;
    482 	struct raid_softc *sc, *rsc;
    483 
    484 	sc = rsc = NULL;
    485 	num_root = 0;
    486 	cset = config_sets;
    487 	while (cset != NULL) {
    488 		next_cset = cset->next;
    489 		if (rf_have_enough_components(cset) &&
    490 		    cset->ac->clabel->autoconfigure == 1) {
    491 			sc = rf_auto_config_set(cset);
    492 			if (sc != NULL) {
    493 				aprint_debug("raid%d: configured ok\n",
    494 				    sc->sc_unit);
    495 				if (cset->rootable) {
    496 					rsc = sc;
    497 					num_root++;
    498 				}
    499 			} else {
    500 				/* The autoconfig didn't work :( */
    501 				aprint_debug("Autoconfig failed\n");
    502 				rf_release_all_vps(cset);
    503 			}
    504 		} else {
    505 			/* we're not autoconfiguring this set...
    506 			   release the associated resources */
    507 			rf_release_all_vps(cset);
    508 		}
    509 		/* cleanup */
    510 		rf_cleanup_config_set(cset);
    511 		cset = next_cset;
    512 	}
    513 
    514 	/* if the user has specified what the root device should be
    515 	   then we don't touch booted_device or boothowto... */
    516 
    517 	if (rootspec != NULL)
    518 		return;
    519 
    520 	/* we found something bootable... */
    521 
    522 	/*
    523 	 * XXX: The following code assumes that the root raid
    524 	 * is the first ('a') partition. This is about the best
    525 	 * we can do with a BSD disklabel, but we might be able
    526 	 * to do better with a GPT label, by setting a specified
    527 	 * attribute to indicate the root partition. We can then
    528 	 * stash the partition number in the r->root_partition
    529 	 * high bits (the bottom 2 bits are already used). For
    530 	 * now we just set booted_partition to 0 when we override
    531 	 * root.
    532 	 */
    533 	if (num_root == 1) {
    534 		device_t candidate_root;
    535 		if (rsc->sc_dkdev.dk_nwedges != 0) {
    536 			char cname[sizeof(cset->ac->devname)];
    537 			/* XXX: assume 'a' */
    538 			snprintf(cname, sizeof(cname), "%s%c",
    539 			    device_xname(rsc->sc_dev), 'a');
    540 			candidate_root = dkwedge_find_by_wname(cname);
    541 		} else
    542 			candidate_root = rsc->sc_dev;
    543 		if (booted_device == NULL ||
    544 		    rsc->sc_r.root_partition == 1 ||
    545 		    rf_containsboot(&rsc->sc_r, booted_device)) {
    546 			booted_device = candidate_root;
    547 			booted_partition = 0;	/* XXX assume 'a' */
    548 		}
    549 	} else if (num_root > 1) {
    550 
    551 		/*
    552 		 * Maybe the MD code can help. If it cannot, then
    553 		 * setroot() will discover that we have no
    554 		 * booted_device and will ask the user if nothing was
    555 		 * hardwired in the kernel config file
    556 		 */
    557 		if (booted_device == NULL)
    558 			return;
    559 
    560 		num_root = 0;
    561 		mutex_enter(&raid_lock);
    562 		LIST_FOREACH(sc, &raids, sc_link) {
    563 			RF_Raid_t *r = &sc->sc_r;
    564 			if (r->valid == 0)
    565 				continue;
    566 
    567 			if (r->root_partition == 0)
    568 				continue;
    569 
    570 			if (rf_containsboot(r, booted_device)) {
    571 				num_root++;
    572 				rsc = sc;
    573 			}
    574 		}
    575 		mutex_exit(&raid_lock);
    576 
    577 		if (num_root == 1) {
    578 			booted_device = rsc->sc_dev;
    579 			booted_partition = 0;	/* XXX assume 'a' */
    580 		} else {
    581 			/* we can't guess.. require the user to answer... */
    582 			boothowto |= RB_ASKNAME;
    583 		}
    584 	}
    585 }
    586 
    587 static int
    588 raidsize(dev_t dev)
    589 {
    590 	struct raid_softc *rs;
    591 	struct disklabel *lp;
    592 	int     part, unit, omask, size;
    593 
    594 	unit = raidunit(dev);
    595 	if ((rs = raidget(unit, false)) == NULL)
    596 		return -1;
    597 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    598 		return (-1);
    599 
    600 	part = DISKPART(dev);
    601 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    602 	lp = rs->sc_dkdev.dk_label;
    603 
    604 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
    605 		return (-1);
    606 
    607 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    608 		size = -1;
    609 	else
    610 		size = lp->d_partitions[part].p_size *
    611 		    (lp->d_secsize / DEV_BSIZE);
    612 
    613 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
    614 		return (-1);
    615 
    616 	return (size);
    617 
    618 }
    619 
    620 static int
    621 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
    622 {
    623 	int     unit = raidunit(dev);
    624 	struct raid_softc *rs;
    625 	const struct bdevsw *bdev;
    626 	struct disklabel *lp;
    627 	RF_Raid_t *raidPtr;
    628 	daddr_t offset;
    629 	int     part, c, sparecol, j, scol, dumpto;
    630 	int     error = 0;
    631 
    632 	if ((rs = raidget(unit, false)) == NULL)
    633 		return ENXIO;
    634 
    635 	raidPtr = &rs->sc_r;
    636 
    637 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    638 		return ENXIO;
    639 
    640 	/* we only support dumping to RAID 1 sets */
    641 	if (raidPtr->Layout.numDataCol != 1 ||
    642 	    raidPtr->Layout.numParityCol != 1)
    643 		return EINVAL;
    644 
    645 	if ((error = raidlock(rs)) != 0)
    646 		return error;
    647 
    648 	if (size % DEV_BSIZE != 0) {
    649 		error = EINVAL;
    650 		goto out;
    651 	}
    652 
    653 	if (blkno + size / DEV_BSIZE > rs->sc_size) {
    654 		printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
    655 		    "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
    656 		    size / DEV_BSIZE, rs->sc_size);
    657 		error = EINVAL;
    658 		goto out;
    659 	}
    660 
    661 	part = DISKPART(dev);
    662 	lp = rs->sc_dkdev.dk_label;
    663 	offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
    664 
    665 	/* figure out what device is alive.. */
    666 
    667 	/*
    668 	   Look for a component to dump to.  The preference for the
    669 	   component to dump to is as follows:
    670 	   1) the master
    671 	   2) a used_spare of the master
    672 	   3) the slave
    673 	   4) a used_spare of the slave
    674 	*/
    675 
    676 	dumpto = -1;
    677 	for (c = 0; c < raidPtr->numCol; c++) {
    678 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
    679 			/* this might be the one */
    680 			dumpto = c;
    681 			break;
    682 		}
    683 	}
    684 
    685 	/*
    686 	   At this point we have possibly selected a live master or a
    687 	   live slave.  We now check to see if there is a spared
    688 	   master (or a spared slave), if we didn't find a live master
    689 	   or a live slave.
    690 	*/
    691 
    692 	for (c = 0; c < raidPtr->numSpare; c++) {
    693 		sparecol = raidPtr->numCol + c;
    694 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
    695 			/* How about this one? */
    696 			scol = -1;
    697 			for(j=0;j<raidPtr->numCol;j++) {
    698 				if (raidPtr->Disks[j].spareCol == sparecol) {
    699 					scol = j;
    700 					break;
    701 				}
    702 			}
    703 			if (scol == 0) {
    704 				/*
    705 				   We must have found a spared master!
    706 				   We'll take that over anything else
    707 				   found so far.  (We couldn't have
    708 				   found a real master before, since
    709 				   this is a used spare, and it's
    710 				   saying that it's replacing the
    711 				   master.)  On reboot (with
    712 				   autoconfiguration turned on)
    713 				   sparecol will become the 1st
    714 				   component (component0) of this set.
    715 				*/
    716 				dumpto = sparecol;
    717 				break;
    718 			} else if (scol != -1) {
    719 				/*
    720 				   Must be a spared slave.  We'll dump
    721 				   to that if we havn't found anything
    722 				   else so far.
    723 				*/
    724 				if (dumpto == -1)
    725 					dumpto = sparecol;
    726 			}
    727 		}
    728 	}
    729 
    730 	if (dumpto == -1) {
    731 		/* we couldn't find any live components to dump to!?!?
    732 		 */
    733 		error = EINVAL;
    734 		goto out;
    735 	}
    736 
    737 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
    738 
    739 	/*
    740 	   Note that blkno is relative to this particular partition.
    741 	   By adding the offset of this partition in the RAID
    742 	   set, and also adding RF_PROTECTED_SECTORS, we get a
    743 	   value that is relative to the partition used for the
    744 	   underlying component.
    745 	*/
    746 
    747 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
    748 				blkno + offset, va, size);
    749 
    750 out:
    751 	raidunlock(rs);
    752 
    753 	return error;
    754 }
    755 
    756 /* ARGSUSED */
    757 static int
    758 raidopen(dev_t dev, int flags, int fmt,
    759     struct lwp *l)
    760 {
    761 	int     unit = raidunit(dev);
    762 	struct raid_softc *rs;
    763 	struct disklabel *lp;
    764 	int     part, pmask;
    765 	int     error = 0;
    766 
    767 	if ((rs = raidget(unit, true)) == NULL)
    768 		return ENXIO;
    769 	if ((error = raidlock(rs)) != 0)
    770 		return (error);
    771 
    772 	if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
    773 		error = EBUSY;
    774 		goto bad;
    775 	}
    776 
    777 	lp = rs->sc_dkdev.dk_label;
    778 
    779 	part = DISKPART(dev);
    780 
    781 	/*
    782 	 * If there are wedges, and this is not RAW_PART, then we
    783 	 * need to fail.
    784 	 */
    785 	if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
    786 		error = EBUSY;
    787 		goto bad;
    788 	}
    789 	pmask = (1 << part);
    790 
    791 	if ((rs->sc_flags & RAIDF_INITED) &&
    792 	    (rs->sc_dkdev.dk_nwedges == 0) &&
    793 	    (rs->sc_dkdev.dk_openmask == 0))
    794 		raidgetdisklabel(dev);
    795 
    796 	/* make sure that this partition exists */
    797 
    798 	if (part != RAW_PART) {
    799 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    800 		    ((part >= lp->d_npartitions) ||
    801 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    802 			error = ENXIO;
    803 			goto bad;
    804 		}
    805 	}
    806 	/* Prevent this unit from being unconfigured while open. */
    807 	switch (fmt) {
    808 	case S_IFCHR:
    809 		rs->sc_dkdev.dk_copenmask |= pmask;
    810 		break;
    811 
    812 	case S_IFBLK:
    813 		rs->sc_dkdev.dk_bopenmask |= pmask;
    814 		break;
    815 	}
    816 
    817 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    818 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    819 		/* First one... mark things as dirty... Note that we *MUST*
    820 		 have done a configure before this.  I DO NOT WANT TO BE
    821 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    822 		 THAT THEY BELONG TOGETHER!!!!! */
    823 		/* XXX should check to see if we're only open for reading
    824 		   here... If so, we needn't do this, but then need some
    825 		   other way of keeping track of what's happened.. */
    826 
    827 		rf_markalldirty(&rs->sc_r);
    828 	}
    829 
    830 
    831 	rs->sc_dkdev.dk_openmask =
    832 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    833 
    834 bad:
    835 	raidunlock(rs);
    836 
    837 	return (error);
    838 
    839 
    840 }
    841 
    842 /* ARGSUSED */
    843 static int
    844 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
    845 {
    846 	int     unit = raidunit(dev);
    847 	struct raid_softc *rs;
    848 	int     error = 0;
    849 	int     part;
    850 
    851 	if ((rs = raidget(unit, false)) == NULL)
    852 		return ENXIO;
    853 
    854 	if ((error = raidlock(rs)) != 0)
    855 		return (error);
    856 
    857 	part = DISKPART(dev);
    858 
    859 	/* ...that much closer to allowing unconfiguration... */
    860 	switch (fmt) {
    861 	case S_IFCHR:
    862 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    863 		break;
    864 
    865 	case S_IFBLK:
    866 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    867 		break;
    868 	}
    869 	rs->sc_dkdev.dk_openmask =
    870 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    871 
    872 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    873 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    874 		/* Last one... device is not unconfigured yet.
    875 		   Device shutdown has taken care of setting the
    876 		   clean bits if RAIDF_INITED is not set
    877 		   mark things as clean... */
    878 
    879 		rf_update_component_labels(&rs->sc_r,
    880 						 RF_FINAL_COMPONENT_UPDATE);
    881 	}
    882 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    883 	    ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)) {
    884 		/*
    885 		 * Detach this raid unit
    886 		 */
    887 		cfdata_t cf = NULL;
    888 		int retcode = 0;
    889 
    890 		if (rs->sc_dev != NULL) {
    891 			cf = device_cfdata(rs->sc_dev);
    892 
    893 			raidunlock(rs);
    894 			retcode = config_detach(rs->sc_dev, DETACH_QUIET);
    895 			if (retcode == 0)
    896 				/* free the pseudo device attach bits */
    897 				free(cf, M_RAIDFRAME);
    898 		} else {
    899 			raidput(rs);
    900 		}
    901 		return retcode;
    902 	}
    903 
    904 	raidunlock(rs);
    905 	return (0);
    906 }
    907 
    908 static void
    909 raidstrategy(struct buf *bp)
    910 {
    911 	unsigned int unit = raidunit(bp->b_dev);
    912 	RF_Raid_t *raidPtr;
    913 	int     wlabel;
    914 	struct raid_softc *rs;
    915 
    916 	if ((rs = raidget(unit, false)) == NULL) {
    917 		bp->b_error = ENXIO;
    918 		goto done;
    919 	}
    920 	if ((rs->sc_flags & RAIDF_INITED) == 0) {
    921 		bp->b_error = ENXIO;
    922 		goto done;
    923 	}
    924 	raidPtr = &rs->sc_r;
    925 	if (!raidPtr->valid) {
    926 		bp->b_error = ENODEV;
    927 		goto done;
    928 	}
    929 	if (bp->b_bcount == 0) {
    930 		db1_printf(("b_bcount is zero..\n"));
    931 		goto done;
    932 	}
    933 
    934 	/*
    935 	 * Do bounds checking and adjust transfer.  If there's an
    936 	 * error, the bounds check will flag that for us.
    937 	 */
    938 
    939 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    940 	if (DISKPART(bp->b_dev) == RAW_PART) {
    941 		uint64_t size; /* device size in DEV_BSIZE unit */
    942 
    943 		if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
    944 			size = raidPtr->totalSectors <<
    945 			    (raidPtr->logBytesPerSector - DEV_BSHIFT);
    946 		} else {
    947 			size = raidPtr->totalSectors >>
    948 			    (DEV_BSHIFT - raidPtr->logBytesPerSector);
    949 		}
    950 		if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
    951 			goto done;
    952 		}
    953 	} else {
    954 		if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
    955 			db1_printf(("Bounds check failed!!:%d %d\n",
    956 				(int) bp->b_blkno, (int) wlabel));
    957 			goto done;
    958 		}
    959 	}
    960 
    961 	rf_lock_mutex2(raidPtr->iodone_lock);
    962 
    963 	bp->b_resid = 0;
    964 
    965 	/* stuff it onto our queue */
    966 	bufq_put(rs->buf_queue, bp);
    967 
    968 	/* scheduled the IO to happen at the next convenient time */
    969 	rf_signal_cond2(raidPtr->iodone_cv);
    970 	rf_unlock_mutex2(raidPtr->iodone_lock);
    971 
    972 	return;
    973 
    974 done:
    975 	bp->b_resid = bp->b_bcount;
    976 	biodone(bp);
    977 }
    978 
    979 /* ARGSUSED */
    980 static int
    981 raidread(dev_t dev, struct uio *uio, int flags)
    982 {
    983 	int     unit = raidunit(dev);
    984 	struct raid_softc *rs;
    985 
    986 	if ((rs = raidget(unit, false)) == NULL)
    987 		return ENXIO;
    988 
    989 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    990 		return (ENXIO);
    991 
    992 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    993 
    994 }
    995 
    996 /* ARGSUSED */
    997 static int
    998 raidwrite(dev_t dev, struct uio *uio, int flags)
    999 {
   1000 	int     unit = raidunit(dev);
   1001 	struct raid_softc *rs;
   1002 
   1003 	if ((rs = raidget(unit, false)) == NULL)
   1004 		return ENXIO;
   1005 
   1006 	if ((rs->sc_flags & RAIDF_INITED) == 0)
   1007 		return (ENXIO);
   1008 
   1009 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
   1010 
   1011 }
   1012 
   1013 static int
   1014 raid_detach_unlocked(struct raid_softc *rs)
   1015 {
   1016 	int error;
   1017 	RF_Raid_t *raidPtr;
   1018 
   1019 	raidPtr = &rs->sc_r;
   1020 
   1021 	/*
   1022 	 * If somebody has a partition mounted, we shouldn't
   1023 	 * shutdown.
   1024 	 */
   1025 	if (rs->sc_dkdev.dk_openmask != 0)
   1026 		return EBUSY;
   1027 
   1028 	if ((rs->sc_flags & RAIDF_INITED) == 0)
   1029 		;	/* not initialized: nothing to do */
   1030 	else if ((error = rf_Shutdown(raidPtr)) != 0)
   1031 		return error;
   1032 	else
   1033 		rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
   1034 
   1035 	/* Detach the disk. */
   1036 	dkwedge_delall(&rs->sc_dkdev);
   1037 	disk_detach(&rs->sc_dkdev);
   1038 	disk_destroy(&rs->sc_dkdev);
   1039 
   1040 	/* Free the softc */
   1041 	aprint_normal_dev(rs->sc_dev, "detached\n");
   1042 	raidunlock(rs);
   1043 	raidput(rs);
   1044 
   1045 	return 0;
   1046 }
   1047 
   1048 static int
   1049 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
   1050 {
   1051 	int     unit = raidunit(dev);
   1052 	int     error = 0;
   1053 	int     part, pmask, s;
   1054 	cfdata_t cf;
   1055 	struct raid_softc *rs;
   1056 	RF_Config_t *k_cfg, *u_cfg;
   1057 	RF_Raid_t *raidPtr;
   1058 	RF_RaidDisk_t *diskPtr;
   1059 	RF_AccTotals_t *totals;
   1060 	RF_DeviceConfig_t *d_cfg, **ucfgp;
   1061 	u_char *specific_buf;
   1062 	int retcode = 0;
   1063 	int column;
   1064 /*	int raidid; */
   1065 	struct rf_recon_req *rrcopy, *rr;
   1066 	RF_ComponentLabel_t *clabel;
   1067 	RF_ComponentLabel_t *ci_label;
   1068 	RF_ComponentLabel_t **clabel_ptr;
   1069 	RF_SingleComponent_t *sparePtr,*componentPtr;
   1070 	RF_SingleComponent_t component;
   1071 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
   1072 	int i, j, d;
   1073 #ifdef __HAVE_OLD_DISKLABEL
   1074 	struct disklabel newlabel;
   1075 #endif
   1076 
   1077 	if ((rs = raidget(unit, false)) == NULL)
   1078 		return ENXIO;
   1079 	raidPtr = &rs->sc_r;
   1080 
   1081 	db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
   1082 		(int) DISKPART(dev), (int) unit, cmd));
   1083 
   1084 	/* Must be open for writes for these commands... */
   1085 	switch (cmd) {
   1086 #ifdef DIOCGSECTORSIZE
   1087 	case DIOCGSECTORSIZE:
   1088 		*(u_int *)data = raidPtr->bytesPerSector;
   1089 		return 0;
   1090 	case DIOCGMEDIASIZE:
   1091 		*(off_t *)data =
   1092 		    (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
   1093 		return 0;
   1094 #endif
   1095 	case DIOCSDINFO:
   1096 	case DIOCWDINFO:
   1097 #ifdef __HAVE_OLD_DISKLABEL
   1098 	case ODIOCWDINFO:
   1099 	case ODIOCSDINFO:
   1100 #endif
   1101 	case DIOCWLABEL:
   1102 	case DIOCAWEDGE:
   1103 	case DIOCDWEDGE:
   1104 	case DIOCMWEDGES:
   1105 	case DIOCSSTRATEGY:
   1106 		if ((flag & FWRITE) == 0)
   1107 			return (EBADF);
   1108 	}
   1109 
   1110 	/* Must be initialized for these... */
   1111 	switch (cmd) {
   1112 	case DIOCGDINFO:
   1113 	case DIOCSDINFO:
   1114 	case DIOCWDINFO:
   1115 #ifdef __HAVE_OLD_DISKLABEL
   1116 	case ODIOCGDINFO:
   1117 	case ODIOCWDINFO:
   1118 	case ODIOCSDINFO:
   1119 	case ODIOCGDEFLABEL:
   1120 #endif
   1121 	case DIOCGPARTINFO:
   1122 	case DIOCWLABEL:
   1123 	case DIOCGDEFLABEL:
   1124 	case DIOCAWEDGE:
   1125 	case DIOCDWEDGE:
   1126 	case DIOCLWEDGES:
   1127 	case DIOCMWEDGES:
   1128 	case DIOCCACHESYNC:
   1129 	case RAIDFRAME_SHUTDOWN:
   1130 	case RAIDFRAME_REWRITEPARITY:
   1131 	case RAIDFRAME_GET_INFO:
   1132 	case RAIDFRAME_RESET_ACCTOTALS:
   1133 	case RAIDFRAME_GET_ACCTOTALS:
   1134 	case RAIDFRAME_KEEP_ACCTOTALS:
   1135 	case RAIDFRAME_GET_SIZE:
   1136 	case RAIDFRAME_FAIL_DISK:
   1137 	case RAIDFRAME_COPYBACK:
   1138 	case RAIDFRAME_CHECK_RECON_STATUS:
   1139 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1140 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1141 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1142 	case RAIDFRAME_ADD_HOT_SPARE:
   1143 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1144 	case RAIDFRAME_INIT_LABELS:
   1145 	case RAIDFRAME_REBUILD_IN_PLACE:
   1146 	case RAIDFRAME_CHECK_PARITY:
   1147 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1148 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1149 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1150 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1151 	case RAIDFRAME_SET_AUTOCONFIG:
   1152 	case RAIDFRAME_SET_ROOT:
   1153 	case RAIDFRAME_DELETE_COMPONENT:
   1154 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1155 	case RAIDFRAME_PARITYMAP_STATUS:
   1156 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
   1157 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
   1158 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
   1159 	case DIOCGSTRATEGY:
   1160 	case DIOCSSTRATEGY:
   1161 		if ((rs->sc_flags & RAIDF_INITED) == 0)
   1162 			return (ENXIO);
   1163 	}
   1164 
   1165 	switch (cmd) {
   1166 #ifdef COMPAT_50
   1167 	case RAIDFRAME_GET_INFO50:
   1168 		return rf_get_info50(raidPtr, data);
   1169 
   1170 	case RAIDFRAME_CONFIGURE50:
   1171 		if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
   1172 			return retcode;
   1173 		goto config;
   1174 #endif
   1175 		/* configure the system */
   1176 	case RAIDFRAME_CONFIGURE:
   1177 
   1178 		if (raidPtr->valid) {
   1179 			/* There is a valid RAID set running on this unit! */
   1180 			printf("raid%d: Device already configured!\n",unit);
   1181 			return(EINVAL);
   1182 		}
   1183 
   1184 		/* copy-in the configuration information */
   1185 		/* data points to a pointer to the configuration structure */
   1186 
   1187 		u_cfg = *((RF_Config_t **) data);
   1188 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
   1189 		if (k_cfg == NULL) {
   1190 			return (ENOMEM);
   1191 		}
   1192 		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
   1193 		if (retcode) {
   1194 			RF_Free(k_cfg, sizeof(RF_Config_t));
   1195 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
   1196 				retcode));
   1197 			goto no_config;
   1198 		}
   1199 		goto config;
   1200 	config:
   1201 		rs->sc_flags &= ~RAIDF_SHUTDOWN;
   1202 
   1203 		/* allocate a buffer for the layout-specific data, and copy it
   1204 		 * in */
   1205 		if (k_cfg->layoutSpecificSize) {
   1206 			if (k_cfg->layoutSpecificSize > 10000) {
   1207 				/* sanity check */
   1208 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1209 				retcode = EINVAL;
   1210 				goto no_config;
   1211 			}
   1212 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
   1213 			    (u_char *));
   1214 			if (specific_buf == NULL) {
   1215 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1216 				retcode = ENOMEM;
   1217 				goto no_config;
   1218 			}
   1219 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
   1220 			    k_cfg->layoutSpecificSize);
   1221 			if (retcode) {
   1222 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1223 				RF_Free(specific_buf,
   1224 					k_cfg->layoutSpecificSize);
   1225 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
   1226 					retcode));
   1227 				goto no_config;
   1228 			}
   1229 		} else
   1230 			specific_buf = NULL;
   1231 		k_cfg->layoutSpecific = specific_buf;
   1232 
   1233 		/* should do some kind of sanity check on the configuration.
   1234 		 * Store the sum of all the bytes in the last byte? */
   1235 
   1236 		/* configure the system */
   1237 
   1238 		/*
   1239 		 * Clear the entire RAID descriptor, just to make sure
   1240 		 *  there is no stale data left in the case of a
   1241 		 *  reconfiguration
   1242 		 */
   1243 		memset(raidPtr, 0, sizeof(*raidPtr));
   1244 		raidPtr->softc = rs;
   1245 		raidPtr->raidid = unit;
   1246 
   1247 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
   1248 
   1249 		if (retcode == 0) {
   1250 
   1251 			/* allow this many simultaneous IO's to
   1252 			   this RAID device */
   1253 			raidPtr->openings = RAIDOUTSTANDING;
   1254 
   1255 			raidinit(rs);
   1256 			rf_markalldirty(raidPtr);
   1257 		}
   1258 		/* free the buffers.  No return code here. */
   1259 		if (k_cfg->layoutSpecificSize) {
   1260 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
   1261 		}
   1262 		RF_Free(k_cfg, sizeof(RF_Config_t));
   1263 
   1264 	no_config:
   1265 		/*
   1266 		 * If configuration failed, set sc_flags so that we
   1267 		 * will detach the device when we close it.
   1268 		 */
   1269 		if (retcode != 0)
   1270 			rs->sc_flags |= RAIDF_SHUTDOWN;
   1271 		return (retcode);
   1272 
   1273 		/* shutdown the system */
   1274 	case RAIDFRAME_SHUTDOWN:
   1275 
   1276 		part = DISKPART(dev);
   1277 		pmask = (1 << part);
   1278 
   1279 		if ((error = raidlock(rs)) != 0)
   1280 			return (error);
   1281 
   1282 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
   1283 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
   1284 			(rs->sc_dkdev.dk_copenmask & pmask)))
   1285 			retcode = EBUSY;
   1286 		else {
   1287 			rs->sc_flags |= RAIDF_SHUTDOWN;
   1288 			rs->sc_dkdev.dk_copenmask &= ~pmask;
   1289 			rs->sc_dkdev.dk_bopenmask &= ~pmask;
   1290 			rs->sc_dkdev.dk_openmask &= ~pmask;
   1291 			retcode = 0;
   1292 		}
   1293 
   1294 		raidunlock(rs);
   1295 
   1296 		if (retcode != 0)
   1297 			return retcode;
   1298 
   1299 		/* free the pseudo device attach bits */
   1300 
   1301 		cf = device_cfdata(rs->sc_dev);
   1302 		if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
   1303 			free(cf, M_RAIDFRAME);
   1304 
   1305 		return (retcode);
   1306 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1307 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1308 		/* need to read the component label for the disk indicated
   1309 		   by row,column in clabel */
   1310 
   1311 		/*
   1312 		 * Perhaps there should be an option to skip the in-core
   1313 		 * copy and hit the disk, as with disklabel(8).
   1314 		 */
   1315 		RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
   1316 
   1317 		retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
   1318 
   1319 		if (retcode) {
   1320 			RF_Free(clabel, sizeof(*clabel));
   1321 			return retcode;
   1322 		}
   1323 
   1324 		clabel->row = 0; /* Don't allow looking at anything else.*/
   1325 
   1326 		column = clabel->column;
   1327 
   1328 		if ((column < 0) || (column >= raidPtr->numCol +
   1329 		    raidPtr->numSpare)) {
   1330 			RF_Free(clabel, sizeof(*clabel));
   1331 			return EINVAL;
   1332 		}
   1333 
   1334 		RF_Free(clabel, sizeof(*clabel));
   1335 
   1336 		clabel = raidget_component_label(raidPtr, column);
   1337 
   1338 		return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
   1339 
   1340 #if 0
   1341 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1342 		clabel = (RF_ComponentLabel_t *) data;
   1343 
   1344 		/* XXX check the label for valid stuff... */
   1345 		/* Note that some things *should not* get modified --
   1346 		   the user should be re-initing the labels instead of
   1347 		   trying to patch things.
   1348 		   */
   1349 
   1350 		raidid = raidPtr->raidid;
   1351 #ifdef DEBUG
   1352 		printf("raid%d: Got component label:\n", raidid);
   1353 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1354 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1355 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1356 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1357 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1358 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1359 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1360 #endif
   1361 		clabel->row = 0;
   1362 		column = clabel->column;
   1363 
   1364 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1365 			return(EINVAL);
   1366 		}
   1367 
   1368 		/* XXX this isn't allowed to do anything for now :-) */
   1369 
   1370 		/* XXX and before it is, we need to fill in the rest
   1371 		   of the fields!?!?!?! */
   1372 		memcpy(raidget_component_label(raidPtr, column),
   1373 		    clabel, sizeof(*clabel));
   1374 		raidflush_component_label(raidPtr, column);
   1375 		return (0);
   1376 #endif
   1377 
   1378 	case RAIDFRAME_INIT_LABELS:
   1379 		clabel = (RF_ComponentLabel_t *) data;
   1380 		/*
   1381 		   we only want the serial number from
   1382 		   the above.  We get all the rest of the information
   1383 		   from the config that was used to create this RAID
   1384 		   set.
   1385 		   */
   1386 
   1387 		raidPtr->serial_number = clabel->serial_number;
   1388 
   1389 		for(column=0;column<raidPtr->numCol;column++) {
   1390 			diskPtr = &raidPtr->Disks[column];
   1391 			if (!RF_DEAD_DISK(diskPtr->status)) {
   1392 				ci_label = raidget_component_label(raidPtr,
   1393 				    column);
   1394 				/* Zeroing this is important. */
   1395 				memset(ci_label, 0, sizeof(*ci_label));
   1396 				raid_init_component_label(raidPtr, ci_label);
   1397 				ci_label->serial_number =
   1398 				    raidPtr->serial_number;
   1399 				ci_label->row = 0; /* we dont' pretend to support more */
   1400 				rf_component_label_set_partitionsize(ci_label,
   1401 				    diskPtr->partitionSize);
   1402 				ci_label->column = column;
   1403 				raidflush_component_label(raidPtr, column);
   1404 			}
   1405 			/* XXXjld what about the spares? */
   1406 		}
   1407 
   1408 		return (retcode);
   1409 	case RAIDFRAME_SET_AUTOCONFIG:
   1410 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1411 		printf("raid%d: New autoconfig value is: %d\n",
   1412 		       raidPtr->raidid, d);
   1413 		*(int *) data = d;
   1414 		return (retcode);
   1415 
   1416 	case RAIDFRAME_SET_ROOT:
   1417 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1418 		printf("raid%d: New rootpartition value is: %d\n",
   1419 		       raidPtr->raidid, d);
   1420 		*(int *) data = d;
   1421 		return (retcode);
   1422 
   1423 		/* initialize all parity */
   1424 	case RAIDFRAME_REWRITEPARITY:
   1425 
   1426 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1427 			/* Parity for RAID 0 is trivially correct */
   1428 			raidPtr->parity_good = RF_RAID_CLEAN;
   1429 			return(0);
   1430 		}
   1431 
   1432 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1433 			/* Re-write is already in progress! */
   1434 			return(EINVAL);
   1435 		}
   1436 
   1437 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1438 					   rf_RewriteParityThread,
   1439 					   raidPtr,"raid_parity");
   1440 		return (retcode);
   1441 
   1442 
   1443 	case RAIDFRAME_ADD_HOT_SPARE:
   1444 		sparePtr = (RF_SingleComponent_t *) data;
   1445 		memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
   1446 		retcode = rf_add_hot_spare(raidPtr, &component);
   1447 		return(retcode);
   1448 
   1449 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1450 		return(retcode);
   1451 
   1452 	case RAIDFRAME_DELETE_COMPONENT:
   1453 		componentPtr = (RF_SingleComponent_t *)data;
   1454 		memcpy( &component, componentPtr,
   1455 			sizeof(RF_SingleComponent_t));
   1456 		retcode = rf_delete_component(raidPtr, &component);
   1457 		return(retcode);
   1458 
   1459 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1460 		componentPtr = (RF_SingleComponent_t *)data;
   1461 		memcpy( &component, componentPtr,
   1462 			sizeof(RF_SingleComponent_t));
   1463 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1464 		return(retcode);
   1465 
   1466 	case RAIDFRAME_REBUILD_IN_PLACE:
   1467 
   1468 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1469 			/* Can't do this on a RAID 0!! */
   1470 			return(EINVAL);
   1471 		}
   1472 
   1473 		if (raidPtr->recon_in_progress == 1) {
   1474 			/* a reconstruct is already in progress! */
   1475 			return(EINVAL);
   1476 		}
   1477 
   1478 		componentPtr = (RF_SingleComponent_t *) data;
   1479 		memcpy( &component, componentPtr,
   1480 			sizeof(RF_SingleComponent_t));
   1481 		component.row = 0; /* we don't support any more */
   1482 		column = component.column;
   1483 
   1484 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1485 			return(EINVAL);
   1486 		}
   1487 
   1488 		rf_lock_mutex2(raidPtr->mutex);
   1489 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
   1490 		    (raidPtr->numFailures > 0)) {
   1491 			/* XXX 0 above shouldn't be constant!!! */
   1492 			/* some component other than this has failed.
   1493 			   Let's not make things worse than they already
   1494 			   are... */
   1495 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1496 			       raidPtr->raidid);
   1497 			printf("raid%d:     Col: %d   Too many failures.\n",
   1498 			       raidPtr->raidid, column);
   1499 			rf_unlock_mutex2(raidPtr->mutex);
   1500 			return (EINVAL);
   1501 		}
   1502 		if (raidPtr->Disks[column].status ==
   1503 		    rf_ds_reconstructing) {
   1504 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1505 			       raidPtr->raidid);
   1506 			printf("raid%d:    Col: %d   Reconstruction already occurring!\n", raidPtr->raidid, column);
   1507 
   1508 			rf_unlock_mutex2(raidPtr->mutex);
   1509 			return (EINVAL);
   1510 		}
   1511 		if (raidPtr->Disks[column].status == rf_ds_spared) {
   1512 			rf_unlock_mutex2(raidPtr->mutex);
   1513 			return (EINVAL);
   1514 		}
   1515 		rf_unlock_mutex2(raidPtr->mutex);
   1516 
   1517 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1518 		if (rrcopy == NULL)
   1519 			return(ENOMEM);
   1520 
   1521 		rrcopy->raidPtr = (void *) raidPtr;
   1522 		rrcopy->col = column;
   1523 
   1524 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1525 					   rf_ReconstructInPlaceThread,
   1526 					   rrcopy,"raid_reconip");
   1527 		return(retcode);
   1528 
   1529 	case RAIDFRAME_GET_INFO:
   1530 		if (!raidPtr->valid)
   1531 			return (ENODEV);
   1532 		ucfgp = (RF_DeviceConfig_t **) data;
   1533 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1534 			  (RF_DeviceConfig_t *));
   1535 		if (d_cfg == NULL)
   1536 			return (ENOMEM);
   1537 		d_cfg->rows = 1; /* there is only 1 row now */
   1538 		d_cfg->cols = raidPtr->numCol;
   1539 		d_cfg->ndevs = raidPtr->numCol;
   1540 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1541 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1542 			return (ENOMEM);
   1543 		}
   1544 		d_cfg->nspares = raidPtr->numSpare;
   1545 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1546 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1547 			return (ENOMEM);
   1548 		}
   1549 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1550 		d = 0;
   1551 		for (j = 0; j < d_cfg->cols; j++) {
   1552 			d_cfg->devs[d] = raidPtr->Disks[j];
   1553 			d++;
   1554 		}
   1555 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1556 			d_cfg->spares[i] = raidPtr->Disks[j];
   1557 			if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
   1558 				/* XXX: raidctl(8) expects to see this as a used spare */
   1559 				d_cfg->spares[i].status = rf_ds_used_spare;
   1560 			}
   1561 		}
   1562 		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
   1563 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1564 
   1565 		return (retcode);
   1566 
   1567 	case RAIDFRAME_CHECK_PARITY:
   1568 		*(int *) data = raidPtr->parity_good;
   1569 		return (0);
   1570 
   1571 	case RAIDFRAME_PARITYMAP_STATUS:
   1572 		if (rf_paritymap_ineligible(raidPtr))
   1573 			return EINVAL;
   1574 		rf_paritymap_status(raidPtr->parity_map,
   1575 		    (struct rf_pmstat *)data);
   1576 		return 0;
   1577 
   1578 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
   1579 		if (rf_paritymap_ineligible(raidPtr))
   1580 			return EINVAL;
   1581 		if (raidPtr->parity_map == NULL)
   1582 			return ENOENT; /* ??? */
   1583 		if (0 != rf_paritymap_set_params(raidPtr->parity_map,
   1584 			(struct rf_pmparams *)data, 1))
   1585 			return EINVAL;
   1586 		return 0;
   1587 
   1588 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
   1589 		if (rf_paritymap_ineligible(raidPtr))
   1590 			return EINVAL;
   1591 		*(int *) data = rf_paritymap_get_disable(raidPtr);
   1592 		return 0;
   1593 
   1594 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
   1595 		if (rf_paritymap_ineligible(raidPtr))
   1596 			return EINVAL;
   1597 		rf_paritymap_set_disable(raidPtr, *(int *)data);
   1598 		/* XXX should errors be passed up? */
   1599 		return 0;
   1600 
   1601 	case RAIDFRAME_RESET_ACCTOTALS:
   1602 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1603 		return (0);
   1604 
   1605 	case RAIDFRAME_GET_ACCTOTALS:
   1606 		totals = (RF_AccTotals_t *) data;
   1607 		*totals = raidPtr->acc_totals;
   1608 		return (0);
   1609 
   1610 	case RAIDFRAME_KEEP_ACCTOTALS:
   1611 		raidPtr->keep_acc_totals = *(int *)data;
   1612 		return (0);
   1613 
   1614 	case RAIDFRAME_GET_SIZE:
   1615 		*(int *) data = raidPtr->totalSectors;
   1616 		return (0);
   1617 
   1618 		/* fail a disk & optionally start reconstruction */
   1619 	case RAIDFRAME_FAIL_DISK:
   1620 
   1621 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1622 			/* Can't do this on a RAID 0!! */
   1623 			return(EINVAL);
   1624 		}
   1625 
   1626 		rr = (struct rf_recon_req *) data;
   1627 		rr->row = 0;
   1628 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
   1629 			return (EINVAL);
   1630 
   1631 
   1632 		rf_lock_mutex2(raidPtr->mutex);
   1633 		if (raidPtr->status == rf_rs_reconstructing) {
   1634 			/* you can't fail a disk while we're reconstructing! */
   1635 			/* XXX wrong for RAID6 */
   1636 			rf_unlock_mutex2(raidPtr->mutex);
   1637 			return (EINVAL);
   1638 		}
   1639 		if ((raidPtr->Disks[rr->col].status ==
   1640 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
   1641 			/* some other component has failed.  Let's not make
   1642 			   things worse. XXX wrong for RAID6 */
   1643 			rf_unlock_mutex2(raidPtr->mutex);
   1644 			return (EINVAL);
   1645 		}
   1646 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
   1647 			/* Can't fail a spared disk! */
   1648 			rf_unlock_mutex2(raidPtr->mutex);
   1649 			return (EINVAL);
   1650 		}
   1651 		rf_unlock_mutex2(raidPtr->mutex);
   1652 
   1653 		/* make a copy of the recon request so that we don't rely on
   1654 		 * the user's buffer */
   1655 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1656 		if (rrcopy == NULL)
   1657 			return(ENOMEM);
   1658 		memcpy(rrcopy, rr, sizeof(*rr));
   1659 		rrcopy->raidPtr = (void *) raidPtr;
   1660 
   1661 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1662 					   rf_ReconThread,
   1663 					   rrcopy,"raid_recon");
   1664 		return (0);
   1665 
   1666 		/* invoke a copyback operation after recon on whatever disk
   1667 		 * needs it, if any */
   1668 	case RAIDFRAME_COPYBACK:
   1669 
   1670 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1671 			/* This makes no sense on a RAID 0!! */
   1672 			return(EINVAL);
   1673 		}
   1674 
   1675 		if (raidPtr->copyback_in_progress == 1) {
   1676 			/* Copyback is already in progress! */
   1677 			return(EINVAL);
   1678 		}
   1679 
   1680 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1681 					   rf_CopybackThread,
   1682 					   raidPtr,"raid_copyback");
   1683 		return (retcode);
   1684 
   1685 		/* return the percentage completion of reconstruction */
   1686 	case RAIDFRAME_CHECK_RECON_STATUS:
   1687 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1688 			/* This makes no sense on a RAID 0, so tell the
   1689 			   user it's done. */
   1690 			*(int *) data = 100;
   1691 			return(0);
   1692 		}
   1693 		if (raidPtr->status != rf_rs_reconstructing)
   1694 			*(int *) data = 100;
   1695 		else {
   1696 			if (raidPtr->reconControl->numRUsTotal > 0) {
   1697 				*(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
   1698 			} else {
   1699 				*(int *) data = 0;
   1700 			}
   1701 		}
   1702 		return (0);
   1703 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1704 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1705 		if (raidPtr->status != rf_rs_reconstructing) {
   1706 			progressInfo.remaining = 0;
   1707 			progressInfo.completed = 100;
   1708 			progressInfo.total = 100;
   1709 		} else {
   1710 			progressInfo.total =
   1711 				raidPtr->reconControl->numRUsTotal;
   1712 			progressInfo.completed =
   1713 				raidPtr->reconControl->numRUsComplete;
   1714 			progressInfo.remaining = progressInfo.total -
   1715 				progressInfo.completed;
   1716 		}
   1717 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1718 				  sizeof(RF_ProgressInfo_t));
   1719 		return (retcode);
   1720 
   1721 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1722 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1723 			/* This makes no sense on a RAID 0, so tell the
   1724 			   user it's done. */
   1725 			*(int *) data = 100;
   1726 			return(0);
   1727 		}
   1728 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1729 			*(int *) data = 100 *
   1730 				raidPtr->parity_rewrite_stripes_done /
   1731 				raidPtr->Layout.numStripe;
   1732 		} else {
   1733 			*(int *) data = 100;
   1734 		}
   1735 		return (0);
   1736 
   1737 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1738 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1739 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1740 			progressInfo.total = raidPtr->Layout.numStripe;
   1741 			progressInfo.completed =
   1742 				raidPtr->parity_rewrite_stripes_done;
   1743 			progressInfo.remaining = progressInfo.total -
   1744 				progressInfo.completed;
   1745 		} else {
   1746 			progressInfo.remaining = 0;
   1747 			progressInfo.completed = 100;
   1748 			progressInfo.total = 100;
   1749 		}
   1750 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1751 				  sizeof(RF_ProgressInfo_t));
   1752 		return (retcode);
   1753 
   1754 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1755 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1756 			/* This makes no sense on a RAID 0 */
   1757 			*(int *) data = 100;
   1758 			return(0);
   1759 		}
   1760 		if (raidPtr->copyback_in_progress == 1) {
   1761 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1762 				raidPtr->Layout.numStripe;
   1763 		} else {
   1764 			*(int *) data = 100;
   1765 		}
   1766 		return (0);
   1767 
   1768 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1769 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1770 		if (raidPtr->copyback_in_progress == 1) {
   1771 			progressInfo.total = raidPtr->Layout.numStripe;
   1772 			progressInfo.completed =
   1773 				raidPtr->copyback_stripes_done;
   1774 			progressInfo.remaining = progressInfo.total -
   1775 				progressInfo.completed;
   1776 		} else {
   1777 			progressInfo.remaining = 0;
   1778 			progressInfo.completed = 100;
   1779 			progressInfo.total = 100;
   1780 		}
   1781 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1782 				  sizeof(RF_ProgressInfo_t));
   1783 		return (retcode);
   1784 
   1785 		/* the sparetable daemon calls this to wait for the kernel to
   1786 		 * need a spare table. this ioctl does not return until a
   1787 		 * spare table is needed. XXX -- calling mpsleep here in the
   1788 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1789 		 * -- I should either compute the spare table in the kernel,
   1790 		 * or have a different -- XXX XXX -- interface (a different
   1791 		 * character device) for delivering the table     -- XXX */
   1792 #if 0
   1793 	case RAIDFRAME_SPARET_WAIT:
   1794 		rf_lock_mutex2(rf_sparet_wait_mutex);
   1795 		while (!rf_sparet_wait_queue)
   1796 			rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
   1797 		waitreq = rf_sparet_wait_queue;
   1798 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1799 		rf_unlock_mutex2(rf_sparet_wait_mutex);
   1800 
   1801 		/* structure assignment */
   1802 		*((RF_SparetWait_t *) data) = *waitreq;
   1803 
   1804 		RF_Free(waitreq, sizeof(*waitreq));
   1805 		return (0);
   1806 
   1807 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1808 		 * code in it that will cause the dameon to exit */
   1809 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1810 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1811 		waitreq->fcol = -1;
   1812 		rf_lock_mutex2(rf_sparet_wait_mutex);
   1813 		waitreq->next = rf_sparet_wait_queue;
   1814 		rf_sparet_wait_queue = waitreq;
   1815 		rf_broadcast_conf2(rf_sparet_wait_cv);
   1816 		rf_unlock_mutex2(rf_sparet_wait_mutex);
   1817 		return (0);
   1818 
   1819 		/* used by the spare table daemon to deliver a spare table
   1820 		 * into the kernel */
   1821 	case RAIDFRAME_SEND_SPARET:
   1822 
   1823 		/* install the spare table */
   1824 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1825 
   1826 		/* respond to the requestor.  the return status of the spare
   1827 		 * table installation is passed in the "fcol" field */
   1828 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1829 		waitreq->fcol = retcode;
   1830 		rf_lock_mutex2(rf_sparet_wait_mutex);
   1831 		waitreq->next = rf_sparet_resp_queue;
   1832 		rf_sparet_resp_queue = waitreq;
   1833 		rf_broadcast_cond2(rf_sparet_resp_cv);
   1834 		rf_unlock_mutex2(rf_sparet_wait_mutex);
   1835 
   1836 		return (retcode);
   1837 #endif
   1838 
   1839 	default:
   1840 		break; /* fall through to the os-specific code below */
   1841 
   1842 	}
   1843 
   1844 	if (!raidPtr->valid)
   1845 		return (EINVAL);
   1846 
   1847 	/*
   1848 	 * Add support for "regular" device ioctls here.
   1849 	 */
   1850 
   1851 	error = disk_ioctl(&rs->sc_dkdev, dev, cmd, data, flag, l);
   1852 	if (error != EPASSTHROUGH)
   1853 		return (error);
   1854 
   1855 	switch (cmd) {
   1856 	case DIOCWDINFO:
   1857 	case DIOCSDINFO:
   1858 #ifdef __HAVE_OLD_DISKLABEL
   1859 	case ODIOCWDINFO:
   1860 	case ODIOCSDINFO:
   1861 #endif
   1862 	{
   1863 		struct disklabel *lp;
   1864 #ifdef __HAVE_OLD_DISKLABEL
   1865 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1866 			memset(&newlabel, 0, sizeof newlabel);
   1867 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1868 			lp = &newlabel;
   1869 		} else
   1870 #endif
   1871 		lp = (struct disklabel *)data;
   1872 
   1873 		if ((error = raidlock(rs)) != 0)
   1874 			return (error);
   1875 
   1876 		rs->sc_flags |= RAIDF_LABELLING;
   1877 
   1878 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1879 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1880 		if (error == 0) {
   1881 			if (cmd == DIOCWDINFO
   1882 #ifdef __HAVE_OLD_DISKLABEL
   1883 			    || cmd == ODIOCWDINFO
   1884 #endif
   1885 			   )
   1886 				error = writedisklabel(RAIDLABELDEV(dev),
   1887 				    raidstrategy, rs->sc_dkdev.dk_label,
   1888 				    rs->sc_dkdev.dk_cpulabel);
   1889 		}
   1890 		rs->sc_flags &= ~RAIDF_LABELLING;
   1891 
   1892 		raidunlock(rs);
   1893 
   1894 		if (error)
   1895 			return (error);
   1896 		break;
   1897 	}
   1898 
   1899 	case DIOCWLABEL:
   1900 		if (*(int *) data != 0)
   1901 			rs->sc_flags |= RAIDF_WLABEL;
   1902 		else
   1903 			rs->sc_flags &= ~RAIDF_WLABEL;
   1904 		break;
   1905 
   1906 	case DIOCGDEFLABEL:
   1907 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1908 		break;
   1909 
   1910 #ifdef __HAVE_OLD_DISKLABEL
   1911 	case ODIOCGDEFLABEL:
   1912 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1913 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1914 			return ENOTTY;
   1915 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1916 		break;
   1917 #endif
   1918 
   1919 	case DIOCCACHESYNC:
   1920 		return rf_sync_component_caches(raidPtr);
   1921 
   1922 	case DIOCGSTRATEGY:
   1923 	    {
   1924 		struct disk_strategy *dks = (void *)data;
   1925 
   1926 		s = splbio();
   1927 		strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
   1928 		    sizeof(dks->dks_name));
   1929 		splx(s);
   1930 		dks->dks_paramlen = 0;
   1931 
   1932 		return 0;
   1933 	    }
   1934 
   1935 	case DIOCSSTRATEGY:
   1936 	    {
   1937 		struct disk_strategy *dks = (void *)data;
   1938 		struct bufq_state *new;
   1939 		struct bufq_state *old;
   1940 
   1941 		if (dks->dks_param != NULL) {
   1942 			return EINVAL;
   1943 		}
   1944 		dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
   1945 		error = bufq_alloc(&new, dks->dks_name,
   1946 		    BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
   1947 		if (error) {
   1948 			return error;
   1949 		}
   1950 		s = splbio();
   1951 		old = rs->buf_queue;
   1952 		bufq_move(new, old);
   1953 		rs->buf_queue = new;
   1954 		splx(s);
   1955 		bufq_free(old);
   1956 
   1957 		return 0;
   1958 	    }
   1959 
   1960 	default:
   1961 		retcode = ENOTTY;
   1962 	}
   1963 	return (retcode);
   1964 
   1965 }
   1966 
   1967 
   1968 /* raidinit -- complete the rest of the initialization for the
   1969    RAIDframe device.  */
   1970 
   1971 
   1972 static void
   1973 raidinit(struct raid_softc *rs)
   1974 {
   1975 	cfdata_t cf;
   1976 	int     unit;
   1977 	RF_Raid_t *raidPtr = &rs->sc_r;
   1978 
   1979 	unit = raidPtr->raidid;
   1980 
   1981 
   1982 	/* XXX should check return code first... */
   1983 	rs->sc_flags |= RAIDF_INITED;
   1984 
   1985 	/* XXX doesn't check bounds. */
   1986 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
   1987 
   1988 	/* attach the pseudo device */
   1989 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
   1990 	cf->cf_name = raid_cd.cd_name;
   1991 	cf->cf_atname = raid_cd.cd_name;
   1992 	cf->cf_unit = unit;
   1993 	cf->cf_fstate = FSTATE_STAR;
   1994 
   1995 	rs->sc_dev = config_attach_pseudo(cf);
   1996 
   1997 	if (rs->sc_dev == NULL) {
   1998 		printf("raid%d: config_attach_pseudo failed\n",
   1999 		    raidPtr->raidid);
   2000 		rs->sc_flags &= ~RAIDF_INITED;
   2001 		free(cf, M_RAIDFRAME);
   2002 		return;
   2003 	}
   2004 
   2005 	/* disk_attach actually creates space for the CPU disklabel, among
   2006 	 * other things, so it's critical to call this *BEFORE* we try putzing
   2007 	 * with disklabels. */
   2008 
   2009 	disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
   2010 	disk_attach(&rs->sc_dkdev);
   2011 
   2012 	/* XXX There may be a weird interaction here between this, and
   2013 	 * protectedSectors, as used in RAIDframe.  */
   2014 
   2015 	rs->sc_size = raidPtr->totalSectors;
   2016 
   2017 	rf_set_geometry(rs, raidPtr);
   2018 
   2019 	dkwedge_discover(&rs->sc_dkdev);
   2020 
   2021 }
   2022 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   2023 /* wake up the daemon & tell it to get us a spare table
   2024  * XXX
   2025  * the entries in the queues should be tagged with the raidPtr
   2026  * so that in the extremely rare case that two recons happen at once,
   2027  * we know for which device were requesting a spare table
   2028  * XXX
   2029  *
   2030  * XXX This code is not currently used. GO
   2031  */
   2032 int
   2033 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
   2034 {
   2035 	int     retcode;
   2036 
   2037 	rf_lock_mutex2(rf_sparet_wait_mutex);
   2038 	req->next = rf_sparet_wait_queue;
   2039 	rf_sparet_wait_queue = req;
   2040 	rf_broadcast_cond2(rf_sparet_wait_cv);
   2041 
   2042 	/* mpsleep unlocks the mutex */
   2043 	while (!rf_sparet_resp_queue) {
   2044 		rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
   2045 	}
   2046 	req = rf_sparet_resp_queue;
   2047 	rf_sparet_resp_queue = req->next;
   2048 	rf_unlock_mutex2(rf_sparet_wait_mutex);
   2049 
   2050 	retcode = req->fcol;
   2051 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   2052 					 * alloc'd */
   2053 	return (retcode);
   2054 }
   2055 #endif
   2056 
   2057 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   2058  * bp & passes it down.
   2059  * any calls originating in the kernel must use non-blocking I/O
   2060  * do some extra sanity checking to return "appropriate" error values for
   2061  * certain conditions (to make some standard utilities work)
   2062  *
   2063  * Formerly known as: rf_DoAccessKernel
   2064  */
   2065 void
   2066 raidstart(RF_Raid_t *raidPtr)
   2067 {
   2068 	RF_SectorCount_t num_blocks, pb, sum;
   2069 	RF_RaidAddr_t raid_addr;
   2070 	struct partition *pp;
   2071 	daddr_t blocknum;
   2072 	struct raid_softc *rs;
   2073 	int     do_async;
   2074 	struct buf *bp;
   2075 	int rc;
   2076 
   2077 	rs = raidPtr->softc;
   2078 	/* quick check to see if anything has died recently */
   2079 	rf_lock_mutex2(raidPtr->mutex);
   2080 	if (raidPtr->numNewFailures > 0) {
   2081 		rf_unlock_mutex2(raidPtr->mutex);
   2082 		rf_update_component_labels(raidPtr,
   2083 					   RF_NORMAL_COMPONENT_UPDATE);
   2084 		rf_lock_mutex2(raidPtr->mutex);
   2085 		raidPtr->numNewFailures--;
   2086 	}
   2087 
   2088 	/* Check to see if we're at the limit... */
   2089 	while (raidPtr->openings > 0) {
   2090 		rf_unlock_mutex2(raidPtr->mutex);
   2091 
   2092 		/* get the next item, if any, from the queue */
   2093 		if ((bp = bufq_get(rs->buf_queue)) == NULL) {
   2094 			/* nothing more to do */
   2095 			return;
   2096 		}
   2097 
   2098 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   2099 		 * partition.. Need to make it absolute to the underlying
   2100 		 * device.. */
   2101 
   2102 		blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
   2103 		if (DISKPART(bp->b_dev) != RAW_PART) {
   2104 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   2105 			blocknum += pp->p_offset;
   2106 		}
   2107 
   2108 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   2109 			    (int) blocknum));
   2110 
   2111 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   2112 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   2113 
   2114 		/* *THIS* is where we adjust what block we're going to...
   2115 		 * but DO NOT TOUCH bp->b_blkno!!! */
   2116 		raid_addr = blocknum;
   2117 
   2118 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   2119 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   2120 		sum = raid_addr + num_blocks + pb;
   2121 		if (1 || rf_debugKernelAccess) {
   2122 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   2123 				    (int) raid_addr, (int) sum, (int) num_blocks,
   2124 				    (int) pb, (int) bp->b_resid));
   2125 		}
   2126 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   2127 		    || (sum < num_blocks) || (sum < pb)) {
   2128 			bp->b_error = ENOSPC;
   2129 			bp->b_resid = bp->b_bcount;
   2130 			biodone(bp);
   2131 			rf_lock_mutex2(raidPtr->mutex);
   2132 			continue;
   2133 		}
   2134 		/*
   2135 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   2136 		 */
   2137 
   2138 		if (bp->b_bcount & raidPtr->sectorMask) {
   2139 			bp->b_error = EINVAL;
   2140 			bp->b_resid = bp->b_bcount;
   2141 			biodone(bp);
   2142 			rf_lock_mutex2(raidPtr->mutex);
   2143 			continue;
   2144 
   2145 		}
   2146 		db1_printf(("Calling DoAccess..\n"));
   2147 
   2148 
   2149 		rf_lock_mutex2(raidPtr->mutex);
   2150 		raidPtr->openings--;
   2151 		rf_unlock_mutex2(raidPtr->mutex);
   2152 
   2153 		/*
   2154 		 * Everything is async.
   2155 		 */
   2156 		do_async = 1;
   2157 
   2158 		disk_busy(&rs->sc_dkdev);
   2159 
   2160 		/* XXX we're still at splbio() here... do we *really*
   2161 		   need to be? */
   2162 
   2163 		/* don't ever condition on bp->b_flags & B_WRITE.
   2164 		 * always condition on B_READ instead */
   2165 
   2166 		rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   2167 				 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   2168 				 do_async, raid_addr, num_blocks,
   2169 				 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   2170 
   2171 		if (rc) {
   2172 			bp->b_error = rc;
   2173 			bp->b_resid = bp->b_bcount;
   2174 			biodone(bp);
   2175 			/* continue loop */
   2176 		}
   2177 
   2178 		rf_lock_mutex2(raidPtr->mutex);
   2179 	}
   2180 	rf_unlock_mutex2(raidPtr->mutex);
   2181 }
   2182 
   2183 
   2184 
   2185 
   2186 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   2187 
   2188 int
   2189 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
   2190 {
   2191 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   2192 	struct buf *bp;
   2193 
   2194 	req->queue = queue;
   2195 	bp = req->bp;
   2196 
   2197 	switch (req->type) {
   2198 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   2199 		/* XXX need to do something extra here.. */
   2200 		/* I'm leaving this in, as I've never actually seen it used,
   2201 		 * and I'd like folks to report it... GO */
   2202 		printf(("WAKEUP CALLED\n"));
   2203 		queue->numOutstanding++;
   2204 
   2205 		bp->b_flags = 0;
   2206 		bp->b_private = req;
   2207 
   2208 		KernelWakeupFunc(bp);
   2209 		break;
   2210 
   2211 	case RF_IO_TYPE_READ:
   2212 	case RF_IO_TYPE_WRITE:
   2213 #if RF_ACC_TRACE > 0
   2214 		if (req->tracerec) {
   2215 			RF_ETIMER_START(req->tracerec->timer);
   2216 		}
   2217 #endif
   2218 		InitBP(bp, queue->rf_cinfo->ci_vp,
   2219 		    op, queue->rf_cinfo->ci_dev,
   2220 		    req->sectorOffset, req->numSector,
   2221 		    req->buf, KernelWakeupFunc, (void *) req,
   2222 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   2223 
   2224 		if (rf_debugKernelAccess) {
   2225 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   2226 				(long) bp->b_blkno));
   2227 		}
   2228 		queue->numOutstanding++;
   2229 		queue->last_deq_sector = req->sectorOffset;
   2230 		/* acc wouldn't have been let in if there were any pending
   2231 		 * reqs at any other priority */
   2232 		queue->curPriority = req->priority;
   2233 
   2234 		db1_printf(("Going for %c to unit %d col %d\n",
   2235 			    req->type, queue->raidPtr->raidid,
   2236 			    queue->col));
   2237 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   2238 			(int) req->sectorOffset, (int) req->numSector,
   2239 			(int) (req->numSector <<
   2240 			    queue->raidPtr->logBytesPerSector),
   2241 			(int) queue->raidPtr->logBytesPerSector));
   2242 
   2243 		/*
   2244 		 * XXX: drop lock here since this can block at
   2245 		 * least with backing SCSI devices.  Retake it
   2246 		 * to minimize fuss with calling interfaces.
   2247 		 */
   2248 
   2249 		RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
   2250 		bdev_strategy(bp);
   2251 		RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
   2252 		break;
   2253 
   2254 	default:
   2255 		panic("bad req->type in rf_DispatchKernelIO");
   2256 	}
   2257 	db1_printf(("Exiting from DispatchKernelIO\n"));
   2258 
   2259 	return (0);
   2260 }
   2261 /* this is the callback function associated with a I/O invoked from
   2262    kernel code.
   2263  */
   2264 static void
   2265 KernelWakeupFunc(struct buf *bp)
   2266 {
   2267 	RF_DiskQueueData_t *req = NULL;
   2268 	RF_DiskQueue_t *queue;
   2269 
   2270 	db1_printf(("recovering the request queue:\n"));
   2271 
   2272 	req = bp->b_private;
   2273 
   2274 	queue = (RF_DiskQueue_t *) req->queue;
   2275 
   2276 	rf_lock_mutex2(queue->raidPtr->iodone_lock);
   2277 
   2278 #if RF_ACC_TRACE > 0
   2279 	if (req->tracerec) {
   2280 		RF_ETIMER_STOP(req->tracerec->timer);
   2281 		RF_ETIMER_EVAL(req->tracerec->timer);
   2282 		rf_lock_mutex2(rf_tracing_mutex);
   2283 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2284 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2285 		req->tracerec->num_phys_ios++;
   2286 		rf_unlock_mutex2(rf_tracing_mutex);
   2287 	}
   2288 #endif
   2289 
   2290 	/* XXX Ok, let's get aggressive... If b_error is set, let's go
   2291 	 * ballistic, and mark the component as hosed... */
   2292 
   2293 	if (bp->b_error != 0) {
   2294 		/* Mark the disk as dead */
   2295 		/* but only mark it once... */
   2296 		/* and only if it wouldn't leave this RAID set
   2297 		   completely broken */
   2298 		if (((queue->raidPtr->Disks[queue->col].status ==
   2299 		      rf_ds_optimal) ||
   2300 		     (queue->raidPtr->Disks[queue->col].status ==
   2301 		      rf_ds_used_spare)) &&
   2302 		     (queue->raidPtr->numFailures <
   2303 		      queue->raidPtr->Layout.map->faultsTolerated)) {
   2304 			printf("raid%d: IO Error (%d). Marking %s as failed.\n",
   2305 			       queue->raidPtr->raidid,
   2306 			       bp->b_error,
   2307 			       queue->raidPtr->Disks[queue->col].devname);
   2308 			queue->raidPtr->Disks[queue->col].status =
   2309 			    rf_ds_failed;
   2310 			queue->raidPtr->status = rf_rs_degraded;
   2311 			queue->raidPtr->numFailures++;
   2312 			queue->raidPtr->numNewFailures++;
   2313 		} else {	/* Disk is already dead... */
   2314 			/* printf("Disk already marked as dead!\n"); */
   2315 		}
   2316 
   2317 	}
   2318 
   2319 	/* Fill in the error value */
   2320 	req->error = bp->b_error;
   2321 
   2322 	/* Drop this one on the "finished" queue... */
   2323 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   2324 
   2325 	/* Let the raidio thread know there is work to be done. */
   2326 	rf_signal_cond2(queue->raidPtr->iodone_cv);
   2327 
   2328 	rf_unlock_mutex2(queue->raidPtr->iodone_lock);
   2329 }
   2330 
   2331 
   2332 /*
   2333  * initialize a buf structure for doing an I/O in the kernel.
   2334  */
   2335 static void
   2336 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
   2337        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
   2338        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
   2339        struct proc *b_proc)
   2340 {
   2341 	/* bp->b_flags       = B_PHYS | rw_flag; */
   2342 	bp->b_flags = rw_flag;	/* XXX need B_PHYS here too??? */
   2343 	bp->b_oflags = 0;
   2344 	bp->b_cflags = 0;
   2345 	bp->b_bcount = numSect << logBytesPerSector;
   2346 	bp->b_bufsize = bp->b_bcount;
   2347 	bp->b_error = 0;
   2348 	bp->b_dev = dev;
   2349 	bp->b_data = bf;
   2350 	bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
   2351 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   2352 	if (bp->b_bcount == 0) {
   2353 		panic("bp->b_bcount is zero in InitBP!!");
   2354 	}
   2355 	bp->b_proc = b_proc;
   2356 	bp->b_iodone = cbFunc;
   2357 	bp->b_private = cbArg;
   2358 }
   2359 
   2360 static void
   2361 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
   2362 		    struct disklabel *lp)
   2363 {
   2364 	memset(lp, 0, sizeof(*lp));
   2365 
   2366 	/* fabricate a label... */
   2367 	if (raidPtr->totalSectors > UINT32_MAX)
   2368 		lp->d_secperunit = UINT32_MAX;
   2369 	else
   2370 		lp->d_secperunit = raidPtr->totalSectors;
   2371 	lp->d_secsize = raidPtr->bytesPerSector;
   2372 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2373 	lp->d_ntracks = 4 * raidPtr->numCol;
   2374 	lp->d_ncylinders = raidPtr->totalSectors /
   2375 		(lp->d_nsectors * lp->d_ntracks);
   2376 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2377 
   2378 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2379 	lp->d_type = DKTYPE_RAID;
   2380 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2381 	lp->d_rpm = 3600;
   2382 	lp->d_interleave = 1;
   2383 	lp->d_flags = 0;
   2384 
   2385 	lp->d_partitions[RAW_PART].p_offset = 0;
   2386 	lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
   2387 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2388 	lp->d_npartitions = RAW_PART + 1;
   2389 
   2390 	lp->d_magic = DISKMAGIC;
   2391 	lp->d_magic2 = DISKMAGIC;
   2392 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2393 
   2394 }
   2395 /*
   2396  * Read the disklabel from the raid device.  If one is not present, fake one
   2397  * up.
   2398  */
   2399 static void
   2400 raidgetdisklabel(dev_t dev)
   2401 {
   2402 	int     unit = raidunit(dev);
   2403 	struct raid_softc *rs;
   2404 	const char   *errstring;
   2405 	struct disklabel *lp;
   2406 	struct cpu_disklabel *clp;
   2407 	RF_Raid_t *raidPtr;
   2408 
   2409 	if ((rs = raidget(unit, false)) == NULL)
   2410 		return;
   2411 
   2412 	lp = rs->sc_dkdev.dk_label;
   2413 	clp = rs->sc_dkdev.dk_cpulabel;
   2414 
   2415 	db1_printf(("Getting the disklabel...\n"));
   2416 
   2417 	memset(clp, 0, sizeof(*clp));
   2418 
   2419 	raidPtr = &rs->sc_r;
   2420 
   2421 	raidgetdefaultlabel(raidPtr, rs, lp);
   2422 
   2423 	/*
   2424 	 * Call the generic disklabel extraction routine.
   2425 	 */
   2426 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2427 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2428 	if (errstring)
   2429 		raidmakedisklabel(rs);
   2430 	else {
   2431 		int     i;
   2432 		struct partition *pp;
   2433 
   2434 		/*
   2435 		 * Sanity check whether the found disklabel is valid.
   2436 		 *
   2437 		 * This is necessary since total size of the raid device
   2438 		 * may vary when an interleave is changed even though exactly
   2439 		 * same components are used, and old disklabel may used
   2440 		 * if that is found.
   2441 		 */
   2442 		if (lp->d_secperunit < UINT32_MAX ?
   2443 		    lp->d_secperunit != rs->sc_size :
   2444 		    lp->d_secperunit > rs->sc_size)
   2445 			printf("raid%d: WARNING: %s: "
   2446 			    "total sector size in disklabel (%ju) != "
   2447 			    "the size of raid (%ju)\n", unit, rs->sc_xname,
   2448 			    (uintmax_t)lp->d_secperunit,
   2449 			    (uintmax_t)rs->sc_size);
   2450 		for (i = 0; i < lp->d_npartitions; i++) {
   2451 			pp = &lp->d_partitions[i];
   2452 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2453 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2454 				       "exceeds the size of raid (%ju)\n",
   2455 				       unit, rs->sc_xname, 'a' + i,
   2456 				       (uintmax_t)rs->sc_size);
   2457 		}
   2458 	}
   2459 
   2460 }
   2461 /*
   2462  * Take care of things one might want to take care of in the event
   2463  * that a disklabel isn't present.
   2464  */
   2465 static void
   2466 raidmakedisklabel(struct raid_softc *rs)
   2467 {
   2468 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2469 	db1_printf(("Making a label..\n"));
   2470 
   2471 	/*
   2472 	 * For historical reasons, if there's no disklabel present
   2473 	 * the raw partition must be marked FS_BSDFFS.
   2474 	 */
   2475 
   2476 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2477 
   2478 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2479 
   2480 	lp->d_checksum = dkcksum(lp);
   2481 }
   2482 /*
   2483  * Wait interruptibly for an exclusive lock.
   2484  *
   2485  * XXX
   2486  * Several drivers do this; it should be abstracted and made MP-safe.
   2487  * (Hmm... where have we seen this warning before :->  GO )
   2488  */
   2489 static int
   2490 raidlock(struct raid_softc *rs)
   2491 {
   2492 	int     error;
   2493 
   2494 	mutex_enter(&rs->sc_mutex);
   2495 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2496 		rs->sc_flags |= RAIDF_WANTED;
   2497 		error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
   2498 		if (error != 0)
   2499 			return (error);
   2500 	}
   2501 	rs->sc_flags |= RAIDF_LOCKED;
   2502 	mutex_exit(&rs->sc_mutex);
   2503 	return (0);
   2504 }
   2505 /*
   2506  * Unlock and wake up any waiters.
   2507  */
   2508 static void
   2509 raidunlock(struct raid_softc *rs)
   2510 {
   2511 
   2512 	mutex_enter(&rs->sc_mutex);
   2513 	rs->sc_flags &= ~RAIDF_LOCKED;
   2514 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2515 		rs->sc_flags &= ~RAIDF_WANTED;
   2516 		cv_broadcast(&rs->sc_cv);
   2517 	}
   2518 	mutex_exit(&rs->sc_mutex);
   2519 }
   2520 
   2521 
   2522 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2523 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2524 #define RF_PARITY_MAP_SIZE   RF_PARITYMAP_NBYTE
   2525 
   2526 static daddr_t
   2527 rf_component_info_offset(void)
   2528 {
   2529 
   2530 	return RF_COMPONENT_INFO_OFFSET;
   2531 }
   2532 
   2533 static daddr_t
   2534 rf_component_info_size(unsigned secsize)
   2535 {
   2536 	daddr_t info_size;
   2537 
   2538 	KASSERT(secsize);
   2539 	if (secsize > RF_COMPONENT_INFO_SIZE)
   2540 		info_size = secsize;
   2541 	else
   2542 		info_size = RF_COMPONENT_INFO_SIZE;
   2543 
   2544 	return info_size;
   2545 }
   2546 
   2547 static daddr_t
   2548 rf_parity_map_offset(RF_Raid_t *raidPtr)
   2549 {
   2550 	daddr_t map_offset;
   2551 
   2552 	KASSERT(raidPtr->bytesPerSector);
   2553 	if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
   2554 		map_offset = raidPtr->bytesPerSector;
   2555 	else
   2556 		map_offset = RF_COMPONENT_INFO_SIZE;
   2557 	map_offset += rf_component_info_offset();
   2558 
   2559 	return map_offset;
   2560 }
   2561 
   2562 static daddr_t
   2563 rf_parity_map_size(RF_Raid_t *raidPtr)
   2564 {
   2565 	daddr_t map_size;
   2566 
   2567 	if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
   2568 		map_size = raidPtr->bytesPerSector;
   2569 	else
   2570 		map_size = RF_PARITY_MAP_SIZE;
   2571 
   2572 	return map_size;
   2573 }
   2574 
   2575 int
   2576 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2577 {
   2578 	RF_ComponentLabel_t *clabel;
   2579 
   2580 	clabel = raidget_component_label(raidPtr, col);
   2581 	clabel->clean = RF_RAID_CLEAN;
   2582 	raidflush_component_label(raidPtr, col);
   2583 	return(0);
   2584 }
   2585 
   2586 
   2587 int
   2588 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2589 {
   2590 	RF_ComponentLabel_t *clabel;
   2591 
   2592 	clabel = raidget_component_label(raidPtr, col);
   2593 	clabel->clean = RF_RAID_DIRTY;
   2594 	raidflush_component_label(raidPtr, col);
   2595 	return(0);
   2596 }
   2597 
   2598 int
   2599 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2600 {
   2601 	KASSERT(raidPtr->bytesPerSector);
   2602 	return raidread_component_label(raidPtr->bytesPerSector,
   2603 	    raidPtr->Disks[col].dev,
   2604 	    raidPtr->raid_cinfo[col].ci_vp,
   2605 	    &raidPtr->raid_cinfo[col].ci_label);
   2606 }
   2607 
   2608 RF_ComponentLabel_t *
   2609 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2610 {
   2611 	return &raidPtr->raid_cinfo[col].ci_label;
   2612 }
   2613 
   2614 int
   2615 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2616 {
   2617 	RF_ComponentLabel_t *label;
   2618 
   2619 	label = &raidPtr->raid_cinfo[col].ci_label;
   2620 	label->mod_counter = raidPtr->mod_counter;
   2621 #ifndef RF_NO_PARITY_MAP
   2622 	label->parity_map_modcount = label->mod_counter;
   2623 #endif
   2624 	return raidwrite_component_label(raidPtr->bytesPerSector,
   2625 	    raidPtr->Disks[col].dev,
   2626 	    raidPtr->raid_cinfo[col].ci_vp, label);
   2627 }
   2628 
   2629 
   2630 static int
   2631 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
   2632     RF_ComponentLabel_t *clabel)
   2633 {
   2634 	return raidread_component_area(dev, b_vp, clabel,
   2635 	    sizeof(RF_ComponentLabel_t),
   2636 	    rf_component_info_offset(),
   2637 	    rf_component_info_size(secsize));
   2638 }
   2639 
   2640 /* ARGSUSED */
   2641 static int
   2642 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
   2643     size_t msize, daddr_t offset, daddr_t dsize)
   2644 {
   2645 	struct buf *bp;
   2646 	int error;
   2647 
   2648 	/* XXX should probably ensure that we don't try to do this if
   2649 	   someone has changed rf_protected_sectors. */
   2650 
   2651 	if (b_vp == NULL) {
   2652 		/* For whatever reason, this component is not valid.
   2653 		   Don't try to read a component label from it. */
   2654 		return(EINVAL);
   2655 	}
   2656 
   2657 	/* get a block of the appropriate size... */
   2658 	bp = geteblk((int)dsize);
   2659 	bp->b_dev = dev;
   2660 
   2661 	/* get our ducks in a row for the read */
   2662 	bp->b_blkno = offset / DEV_BSIZE;
   2663 	bp->b_bcount = dsize;
   2664 	bp->b_flags |= B_READ;
   2665  	bp->b_resid = dsize;
   2666 
   2667 	bdev_strategy(bp);
   2668 	error = biowait(bp);
   2669 
   2670 	if (!error) {
   2671 		memcpy(data, bp->b_data, msize);
   2672 	}
   2673 
   2674 	brelse(bp, 0);
   2675 	return(error);
   2676 }
   2677 
   2678 
   2679 static int
   2680 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
   2681     RF_ComponentLabel_t *clabel)
   2682 {
   2683 	return raidwrite_component_area(dev, b_vp, clabel,
   2684 	    sizeof(RF_ComponentLabel_t),
   2685 	    rf_component_info_offset(),
   2686 	    rf_component_info_size(secsize), 0);
   2687 }
   2688 
   2689 /* ARGSUSED */
   2690 static int
   2691 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
   2692     size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
   2693 {
   2694 	struct buf *bp;
   2695 	int error;
   2696 
   2697 	/* get a block of the appropriate size... */
   2698 	bp = geteblk((int)dsize);
   2699 	bp->b_dev = dev;
   2700 
   2701 	/* get our ducks in a row for the write */
   2702 	bp->b_blkno = offset / DEV_BSIZE;
   2703 	bp->b_bcount = dsize;
   2704 	bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
   2705  	bp->b_resid = dsize;
   2706 
   2707 	memset(bp->b_data, 0, dsize);
   2708 	memcpy(bp->b_data, data, msize);
   2709 
   2710 	bdev_strategy(bp);
   2711 	if (asyncp)
   2712 		return 0;
   2713 	error = biowait(bp);
   2714 	brelse(bp, 0);
   2715 	if (error) {
   2716 #if 1
   2717 		printf("Failed to write RAID component info!\n");
   2718 #endif
   2719 	}
   2720 
   2721 	return(error);
   2722 }
   2723 
   2724 void
   2725 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
   2726 {
   2727 	int c;
   2728 
   2729 	for (c = 0; c < raidPtr->numCol; c++) {
   2730 		/* Skip dead disks. */
   2731 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
   2732 			continue;
   2733 		/* XXXjld: what if an error occurs here? */
   2734 		raidwrite_component_area(raidPtr->Disks[c].dev,
   2735 		    raidPtr->raid_cinfo[c].ci_vp, map,
   2736 		    RF_PARITYMAP_NBYTE,
   2737 		    rf_parity_map_offset(raidPtr),
   2738 		    rf_parity_map_size(raidPtr), 0);
   2739 	}
   2740 }
   2741 
   2742 void
   2743 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
   2744 {
   2745 	struct rf_paritymap_ondisk tmp;
   2746 	int c,first;
   2747 
   2748 	first=1;
   2749 	for (c = 0; c < raidPtr->numCol; c++) {
   2750 		/* Skip dead disks. */
   2751 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
   2752 			continue;
   2753 		raidread_component_area(raidPtr->Disks[c].dev,
   2754 		    raidPtr->raid_cinfo[c].ci_vp, &tmp,
   2755 		    RF_PARITYMAP_NBYTE,
   2756 		    rf_parity_map_offset(raidPtr),
   2757 		    rf_parity_map_size(raidPtr));
   2758 		if (first) {
   2759 			memcpy(map, &tmp, sizeof(*map));
   2760 			first = 0;
   2761 		} else {
   2762 			rf_paritymap_merge(map, &tmp);
   2763 		}
   2764 	}
   2765 }
   2766 
   2767 void
   2768 rf_markalldirty(RF_Raid_t *raidPtr)
   2769 {
   2770 	RF_ComponentLabel_t *clabel;
   2771 	int sparecol;
   2772 	int c;
   2773 	int j;
   2774 	int scol = -1;
   2775 
   2776 	raidPtr->mod_counter++;
   2777 	for (c = 0; c < raidPtr->numCol; c++) {
   2778 		/* we don't want to touch (at all) a disk that has
   2779 		   failed */
   2780 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   2781 			clabel = raidget_component_label(raidPtr, c);
   2782 			if (clabel->status == rf_ds_spared) {
   2783 				/* XXX do something special...
   2784 				   but whatever you do, don't
   2785 				   try to access it!! */
   2786 			} else {
   2787 				raidmarkdirty(raidPtr, c);
   2788 			}
   2789 		}
   2790 	}
   2791 
   2792 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2793 		sparecol = raidPtr->numCol + c;
   2794 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2795 			/*
   2796 
   2797 			   we claim this disk is "optimal" if it's
   2798 			   rf_ds_used_spare, as that means it should be
   2799 			   directly substitutable for the disk it replaced.
   2800 			   We note that too...
   2801 
   2802 			 */
   2803 
   2804 			for(j=0;j<raidPtr->numCol;j++) {
   2805 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2806 					scol = j;
   2807 					break;
   2808 				}
   2809 			}
   2810 
   2811 			clabel = raidget_component_label(raidPtr, sparecol);
   2812 			/* make sure status is noted */
   2813 
   2814 			raid_init_component_label(raidPtr, clabel);
   2815 
   2816 			clabel->row = 0;
   2817 			clabel->column = scol;
   2818 			/* Note: we *don't* change status from rf_ds_used_spare
   2819 			   to rf_ds_optimal */
   2820 			/* clabel.status = rf_ds_optimal; */
   2821 
   2822 			raidmarkdirty(raidPtr, sparecol);
   2823 		}
   2824 	}
   2825 }
   2826 
   2827 
   2828 void
   2829 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
   2830 {
   2831 	RF_ComponentLabel_t *clabel;
   2832 	int sparecol;
   2833 	int c;
   2834 	int j;
   2835 	int scol;
   2836 
   2837 	scol = -1;
   2838 
   2839 	/* XXX should do extra checks to make sure things really are clean,
   2840 	   rather than blindly setting the clean bit... */
   2841 
   2842 	raidPtr->mod_counter++;
   2843 
   2844 	for (c = 0; c < raidPtr->numCol; c++) {
   2845 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   2846 			clabel = raidget_component_label(raidPtr, c);
   2847 			/* make sure status is noted */
   2848 			clabel->status = rf_ds_optimal;
   2849 
   2850 			/* note what unit we are configured as */
   2851 			clabel->last_unit = raidPtr->raidid;
   2852 
   2853 			raidflush_component_label(raidPtr, c);
   2854 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2855 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2856 					raidmarkclean(raidPtr, c);
   2857 				}
   2858 			}
   2859 		}
   2860 		/* else we don't touch it.. */
   2861 	}
   2862 
   2863 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2864 		sparecol = raidPtr->numCol + c;
   2865 		/* Need to ensure that the reconstruct actually completed! */
   2866 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2867 			/*
   2868 
   2869 			   we claim this disk is "optimal" if it's
   2870 			   rf_ds_used_spare, as that means it should be
   2871 			   directly substitutable for the disk it replaced.
   2872 			   We note that too...
   2873 
   2874 			 */
   2875 
   2876 			for(j=0;j<raidPtr->numCol;j++) {
   2877 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2878 					scol = j;
   2879 					break;
   2880 				}
   2881 			}
   2882 
   2883 			/* XXX shouldn't *really* need this... */
   2884 			clabel = raidget_component_label(raidPtr, sparecol);
   2885 			/* make sure status is noted */
   2886 
   2887 			raid_init_component_label(raidPtr, clabel);
   2888 
   2889 			clabel->column = scol;
   2890 			clabel->status = rf_ds_optimal;
   2891 			clabel->last_unit = raidPtr->raidid;
   2892 
   2893 			raidflush_component_label(raidPtr, sparecol);
   2894 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2895 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2896 					raidmarkclean(raidPtr, sparecol);
   2897 				}
   2898 			}
   2899 		}
   2900 	}
   2901 }
   2902 
   2903 void
   2904 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
   2905 {
   2906 
   2907 	if (vp != NULL) {
   2908 		if (auto_configured == 1) {
   2909 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2910 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2911 			vput(vp);
   2912 
   2913 		} else {
   2914 			(void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
   2915 		}
   2916 	}
   2917 }
   2918 
   2919 
   2920 void
   2921 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
   2922 {
   2923 	int r,c;
   2924 	struct vnode *vp;
   2925 	int acd;
   2926 
   2927 
   2928 	/* We take this opportunity to close the vnodes like we should.. */
   2929 
   2930 	for (c = 0; c < raidPtr->numCol; c++) {
   2931 		vp = raidPtr->raid_cinfo[c].ci_vp;
   2932 		acd = raidPtr->Disks[c].auto_configured;
   2933 		rf_close_component(raidPtr, vp, acd);
   2934 		raidPtr->raid_cinfo[c].ci_vp = NULL;
   2935 		raidPtr->Disks[c].auto_configured = 0;
   2936 	}
   2937 
   2938 	for (r = 0; r < raidPtr->numSpare; r++) {
   2939 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
   2940 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
   2941 		rf_close_component(raidPtr, vp, acd);
   2942 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
   2943 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
   2944 	}
   2945 }
   2946 
   2947 
   2948 void
   2949 rf_ReconThread(struct rf_recon_req *req)
   2950 {
   2951 	int     s;
   2952 	RF_Raid_t *raidPtr;
   2953 
   2954 	s = splbio();
   2955 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2956 	raidPtr->recon_in_progress = 1;
   2957 
   2958 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
   2959 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2960 
   2961 	RF_Free(req, sizeof(*req));
   2962 
   2963 	raidPtr->recon_in_progress = 0;
   2964 	splx(s);
   2965 
   2966 	/* That's all... */
   2967 	kthread_exit(0);	/* does not return */
   2968 }
   2969 
   2970 void
   2971 rf_RewriteParityThread(RF_Raid_t *raidPtr)
   2972 {
   2973 	int retcode;
   2974 	int s;
   2975 
   2976 	raidPtr->parity_rewrite_stripes_done = 0;
   2977 	raidPtr->parity_rewrite_in_progress = 1;
   2978 	s = splbio();
   2979 	retcode = rf_RewriteParity(raidPtr);
   2980 	splx(s);
   2981 	if (retcode) {
   2982 		printf("raid%d: Error re-writing parity (%d)!\n",
   2983 		    raidPtr->raidid, retcode);
   2984 	} else {
   2985 		/* set the clean bit!  If we shutdown correctly,
   2986 		   the clean bit on each component label will get
   2987 		   set */
   2988 		raidPtr->parity_good = RF_RAID_CLEAN;
   2989 	}
   2990 	raidPtr->parity_rewrite_in_progress = 0;
   2991 
   2992 	/* Anyone waiting for us to stop?  If so, inform them... */
   2993 	if (raidPtr->waitShutdown) {
   2994 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2995 	}
   2996 
   2997 	/* That's all... */
   2998 	kthread_exit(0);	/* does not return */
   2999 }
   3000 
   3001 
   3002 void
   3003 rf_CopybackThread(RF_Raid_t *raidPtr)
   3004 {
   3005 	int s;
   3006 
   3007 	raidPtr->copyback_in_progress = 1;
   3008 	s = splbio();
   3009 	rf_CopybackReconstructedData(raidPtr);
   3010 	splx(s);
   3011 	raidPtr->copyback_in_progress = 0;
   3012 
   3013 	/* That's all... */
   3014 	kthread_exit(0);	/* does not return */
   3015 }
   3016 
   3017 
   3018 void
   3019 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
   3020 {
   3021 	int s;
   3022 	RF_Raid_t *raidPtr;
   3023 
   3024 	s = splbio();
   3025 	raidPtr = req->raidPtr;
   3026 	raidPtr->recon_in_progress = 1;
   3027 	rf_ReconstructInPlace(raidPtr, req->col);
   3028 	RF_Free(req, sizeof(*req));
   3029 	raidPtr->recon_in_progress = 0;
   3030 	splx(s);
   3031 
   3032 	/* That's all... */
   3033 	kthread_exit(0);	/* does not return */
   3034 }
   3035 
   3036 static RF_AutoConfig_t *
   3037 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
   3038     const char *cname, RF_SectorCount_t size, uint64_t numsecs,
   3039     unsigned secsize)
   3040 {
   3041 	int good_one = 0;
   3042 	RF_ComponentLabel_t *clabel;
   3043 	RF_AutoConfig_t *ac;
   3044 
   3045 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
   3046 	if (clabel == NULL) {
   3047 oomem:
   3048 		    while(ac_list) {
   3049 			    ac = ac_list;
   3050 			    if (ac->clabel)
   3051 				    free(ac->clabel, M_RAIDFRAME);
   3052 			    ac_list = ac_list->next;
   3053 			    free(ac, M_RAIDFRAME);
   3054 		    }
   3055 		    printf("RAID auto config: out of memory!\n");
   3056 		    return NULL; /* XXX probably should panic? */
   3057 	}
   3058 
   3059 	if (!raidread_component_label(secsize, dev, vp, clabel)) {
   3060 		/* Got the label.  Does it look reasonable? */
   3061 		if (rf_reasonable_label(clabel, numsecs) &&
   3062 		    (rf_component_label_partitionsize(clabel) <= size)) {
   3063 #ifdef DEBUG
   3064 			printf("Component on: %s: %llu\n",
   3065 				cname, (unsigned long long)size);
   3066 			rf_print_component_label(clabel);
   3067 #endif
   3068 			/* if it's reasonable, add it, else ignore it. */
   3069 			ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
   3070 				M_NOWAIT);
   3071 			if (ac == NULL) {
   3072 				free(clabel, M_RAIDFRAME);
   3073 				goto oomem;
   3074 			}
   3075 			strlcpy(ac->devname, cname, sizeof(ac->devname));
   3076 			ac->dev = dev;
   3077 			ac->vp = vp;
   3078 			ac->clabel = clabel;
   3079 			ac->next = ac_list;
   3080 			ac_list = ac;
   3081 			good_one = 1;
   3082 		}
   3083 	}
   3084 	if (!good_one) {
   3085 		/* cleanup */
   3086 		free(clabel, M_RAIDFRAME);
   3087 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   3088 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3089 		vput(vp);
   3090 	}
   3091 	return ac_list;
   3092 }
   3093 
   3094 RF_AutoConfig_t *
   3095 rf_find_raid_components(void)
   3096 {
   3097 	struct vnode *vp;
   3098 	struct disklabel label;
   3099 	device_t dv;
   3100 	deviter_t di;
   3101 	dev_t dev;
   3102 	int bmajor, bminor, wedge, rf_part_found;
   3103 	int error;
   3104 	int i;
   3105 	RF_AutoConfig_t *ac_list;
   3106 	uint64_t numsecs;
   3107 	unsigned secsize;
   3108 
   3109 	/* initialize the AutoConfig list */
   3110 	ac_list = NULL;
   3111 
   3112 	/* we begin by trolling through *all* the devices on the system */
   3113 
   3114 	for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
   3115 	     dv = deviter_next(&di)) {
   3116 
   3117 		/* we are only interested in disks... */
   3118 		if (device_class(dv) != DV_DISK)
   3119 			continue;
   3120 
   3121 		/* we don't care about floppies... */
   3122 		if (device_is_a(dv, "fd")) {
   3123 			continue;
   3124 		}
   3125 
   3126 		/* we don't care about CD's... */
   3127 		if (device_is_a(dv, "cd")) {
   3128 			continue;
   3129 		}
   3130 
   3131 		/* we don't care about md's... */
   3132 		if (device_is_a(dv, "md")) {
   3133 			continue;
   3134 		}
   3135 
   3136 		/* hdfd is the Atari/Hades floppy driver */
   3137 		if (device_is_a(dv, "hdfd")) {
   3138 			continue;
   3139 		}
   3140 
   3141 		/* fdisa is the Atari/Milan floppy driver */
   3142 		if (device_is_a(dv, "fdisa")) {
   3143 			continue;
   3144 		}
   3145 
   3146 		/* need to find the device_name_to_block_device_major stuff */
   3147 		bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
   3148 
   3149 		rf_part_found = 0; /*No raid partition as yet*/
   3150 
   3151 		/* get a vnode for the raw partition of this disk */
   3152 
   3153 		wedge = device_is_a(dv, "dk");
   3154 		bminor = minor(device_unit(dv));
   3155 		dev = wedge ? makedev(bmajor, bminor) :
   3156 		    MAKEDISKDEV(bmajor, bminor, RAW_PART);
   3157 		if (bdevvp(dev, &vp))
   3158 			panic("RAID can't alloc vnode");
   3159 
   3160 		error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
   3161 
   3162 		if (error) {
   3163 			/* "Who cares."  Continue looking
   3164 			   for something that exists*/
   3165 			vput(vp);
   3166 			continue;
   3167 		}
   3168 
   3169 		error = getdisksize(vp, &numsecs, &secsize);
   3170 		if (error) {
   3171 			vput(vp);
   3172 			continue;
   3173 		}
   3174 		if (wedge) {
   3175 			struct dkwedge_info dkw;
   3176 			error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
   3177 			    NOCRED);
   3178 			if (error) {
   3179 				printf("RAIDframe: can't get wedge info for "
   3180 				    "dev %s (%d)\n", device_xname(dv), error);
   3181 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   3182 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3183 				vput(vp);
   3184 				continue;
   3185 			}
   3186 
   3187 			if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
   3188 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   3189 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3190 				vput(vp);
   3191 				continue;
   3192 			}
   3193 
   3194 			ac_list = rf_get_component(ac_list, dev, vp,
   3195 			    device_xname(dv), dkw.dkw_size, numsecs, secsize);
   3196 			rf_part_found = 1; /*There is a raid component on this disk*/
   3197 			continue;
   3198 		}
   3199 
   3200 		/* Ok, the disk exists.  Go get the disklabel. */
   3201 		error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
   3202 		if (error) {
   3203 			/*
   3204 			 * XXX can't happen - open() would
   3205 			 * have errored out (or faked up one)
   3206 			 */
   3207 			if (error != ENOTTY)
   3208 				printf("RAIDframe: can't get label for dev "
   3209 				    "%s (%d)\n", device_xname(dv), error);
   3210 		}
   3211 
   3212 		/* don't need this any more.  We'll allocate it again
   3213 		   a little later if we really do... */
   3214 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   3215 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3216 		vput(vp);
   3217 
   3218 		if (error)
   3219 			continue;
   3220 
   3221 		rf_part_found = 0; /*No raid partitions yet*/
   3222 		for (i = 0; i < label.d_npartitions; i++) {
   3223 			char cname[sizeof(ac_list->devname)];
   3224 
   3225 			/* We only support partitions marked as RAID */
   3226 			if (label.d_partitions[i].p_fstype != FS_RAID)
   3227 				continue;
   3228 
   3229 			dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
   3230 			if (bdevvp(dev, &vp))
   3231 				panic("RAID can't alloc vnode");
   3232 
   3233 			error = VOP_OPEN(vp, FREAD, NOCRED);
   3234 			if (error) {
   3235 				/* Whatever... */
   3236 				vput(vp);
   3237 				continue;
   3238 			}
   3239 			snprintf(cname, sizeof(cname), "%s%c",
   3240 			    device_xname(dv), 'a' + i);
   3241 			ac_list = rf_get_component(ac_list, dev, vp, cname,
   3242 				label.d_partitions[i].p_size, numsecs, secsize);
   3243 				rf_part_found = 1; /*There is at least one raid partition on this disk*/
   3244 		}
   3245 
   3246 		/*
   3247 		 *If there is no raid component on this disk, either in a
   3248 		 *disklabel or inside a wedge, check the raw partition as well,
   3249 		 *as it is possible to configure raid components on raw disk
   3250 		 *devices.
   3251 		 */
   3252 
   3253 		if (!rf_part_found) {
   3254 			char cname[sizeof(ac_list->devname)];
   3255 
   3256 			dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
   3257 			if (bdevvp(dev, &vp))
   3258 				panic("RAID can't alloc vnode");
   3259 
   3260 			error = VOP_OPEN(vp, FREAD, NOCRED);
   3261 			if (error) {
   3262 				/* Whatever... */
   3263 				vput(vp);
   3264 				continue;
   3265 			}
   3266 			snprintf(cname, sizeof(cname), "%s%c",
   3267 			    device_xname(dv), 'a' + RAW_PART);
   3268 			ac_list = rf_get_component(ac_list, dev, vp, cname,
   3269 				label.d_partitions[RAW_PART].p_size, numsecs, secsize);
   3270 		}
   3271 	}
   3272 	deviter_release(&di);
   3273 	return ac_list;
   3274 }
   3275 
   3276 
   3277 int
   3278 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
   3279 {
   3280 
   3281 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   3282 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   3283 	    ((clabel->clean == RF_RAID_CLEAN) ||
   3284 	     (clabel->clean == RF_RAID_DIRTY)) &&
   3285 	    clabel->row >=0 &&
   3286 	    clabel->column >= 0 &&
   3287 	    clabel->num_rows > 0 &&
   3288 	    clabel->num_columns > 0 &&
   3289 	    clabel->row < clabel->num_rows &&
   3290 	    clabel->column < clabel->num_columns &&
   3291 	    clabel->blockSize > 0 &&
   3292 	    /*
   3293 	     * numBlocksHi may contain garbage, but it is ok since
   3294 	     * the type is unsigned.  If it is really garbage,
   3295 	     * rf_fix_old_label_size() will fix it.
   3296 	     */
   3297 	    rf_component_label_numblocks(clabel) > 0) {
   3298 		/*
   3299 		 * label looks reasonable enough...
   3300 		 * let's make sure it has no old garbage.
   3301 		 */
   3302 		if (numsecs)
   3303 			rf_fix_old_label_size(clabel, numsecs);
   3304 		return(1);
   3305 	}
   3306 	return(0);
   3307 }
   3308 
   3309 
   3310 /*
   3311  * For reasons yet unknown, some old component labels have garbage in
   3312  * the newer numBlocksHi region, and this causes lossage.  Since those
   3313  * disks will also have numsecs set to less than 32 bits of sectors,
   3314  * we can determine when this corruption has occurred, and fix it.
   3315  *
   3316  * The exact same problem, with the same unknown reason, happens to
   3317  * the partitionSizeHi member as well.
   3318  */
   3319 static void
   3320 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
   3321 {
   3322 
   3323 	if (numsecs < ((uint64_t)1 << 32)) {
   3324 		if (clabel->numBlocksHi) {
   3325 			printf("WARNING: total sectors < 32 bits, yet "
   3326 			       "numBlocksHi set\n"
   3327 			       "WARNING: resetting numBlocksHi to zero.\n");
   3328 			clabel->numBlocksHi = 0;
   3329 		}
   3330 
   3331 		if (clabel->partitionSizeHi) {
   3332 			printf("WARNING: total sectors < 32 bits, yet "
   3333 			       "partitionSizeHi set\n"
   3334 			       "WARNING: resetting partitionSizeHi to zero.\n");
   3335 			clabel->partitionSizeHi = 0;
   3336 		}
   3337 	}
   3338 }
   3339 
   3340 
   3341 #ifdef DEBUG
   3342 void
   3343 rf_print_component_label(RF_ComponentLabel_t *clabel)
   3344 {
   3345 	uint64_t numBlocks;
   3346 	static const char *rp[] = {
   3347 	    "No", "Force", "Soft", "*invalid*"
   3348 	};
   3349 
   3350 
   3351 	numBlocks = rf_component_label_numblocks(clabel);
   3352 
   3353 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   3354 	       clabel->row, clabel->column,
   3355 	       clabel->num_rows, clabel->num_columns);
   3356 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   3357 	       clabel->version, clabel->serial_number,
   3358 	       clabel->mod_counter);
   3359 	printf("   Clean: %s Status: %d\n",
   3360 	       clabel->clean ? "Yes" : "No", clabel->status);
   3361 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   3362 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   3363 	printf("   RAID Level: %c  blocksize: %d numBlocks: %"PRIu64"\n",
   3364 	       (char) clabel->parityConfig, clabel->blockSize, numBlocks);
   3365 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
   3366 	printf("   Root partition: %s\n", rp[clabel->root_partition & 3]);
   3367 	printf("   Last configured as: raid%d\n", clabel->last_unit);
   3368 #if 0
   3369 	   printf("   Config order: %d\n", clabel->config_order);
   3370 #endif
   3371 
   3372 }
   3373 #endif
   3374 
   3375 RF_ConfigSet_t *
   3376 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
   3377 {
   3378 	RF_AutoConfig_t *ac;
   3379 	RF_ConfigSet_t *config_sets;
   3380 	RF_ConfigSet_t *cset;
   3381 	RF_AutoConfig_t *ac_next;
   3382 
   3383 
   3384 	config_sets = NULL;
   3385 
   3386 	/* Go through the AutoConfig list, and figure out which components
   3387 	   belong to what sets.  */
   3388 	ac = ac_list;
   3389 	while(ac!=NULL) {
   3390 		/* we're going to putz with ac->next, so save it here
   3391 		   for use at the end of the loop */
   3392 		ac_next = ac->next;
   3393 
   3394 		if (config_sets == NULL) {
   3395 			/* will need at least this one... */
   3396 			config_sets = (RF_ConfigSet_t *)
   3397 				malloc(sizeof(RF_ConfigSet_t),
   3398 				       M_RAIDFRAME, M_NOWAIT);
   3399 			if (config_sets == NULL) {
   3400 				panic("rf_create_auto_sets: No memory!");
   3401 			}
   3402 			/* this one is easy :) */
   3403 			config_sets->ac = ac;
   3404 			config_sets->next = NULL;
   3405 			config_sets->rootable = 0;
   3406 			ac->next = NULL;
   3407 		} else {
   3408 			/* which set does this component fit into? */
   3409 			cset = config_sets;
   3410 			while(cset!=NULL) {
   3411 				if (rf_does_it_fit(cset, ac)) {
   3412 					/* looks like it matches... */
   3413 					ac->next = cset->ac;
   3414 					cset->ac = ac;
   3415 					break;
   3416 				}
   3417 				cset = cset->next;
   3418 			}
   3419 			if (cset==NULL) {
   3420 				/* didn't find a match above... new set..*/
   3421 				cset = (RF_ConfigSet_t *)
   3422 					malloc(sizeof(RF_ConfigSet_t),
   3423 					       M_RAIDFRAME, M_NOWAIT);
   3424 				if (cset == NULL) {
   3425 					panic("rf_create_auto_sets: No memory!");
   3426 				}
   3427 				cset->ac = ac;
   3428 				ac->next = NULL;
   3429 				cset->next = config_sets;
   3430 				cset->rootable = 0;
   3431 				config_sets = cset;
   3432 			}
   3433 		}
   3434 		ac = ac_next;
   3435 	}
   3436 
   3437 
   3438 	return(config_sets);
   3439 }
   3440 
   3441 static int
   3442 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
   3443 {
   3444 	RF_ComponentLabel_t *clabel1, *clabel2;
   3445 
   3446 	/* If this one matches the *first* one in the set, that's good
   3447 	   enough, since the other members of the set would have been
   3448 	   through here too... */
   3449 	/* note that we are not checking partitionSize here..
   3450 
   3451 	   Note that we are also not checking the mod_counters here.
   3452 	   If everything else matches except the mod_counter, that's
   3453 	   good enough for this test.  We will deal with the mod_counters
   3454 	   a little later in the autoconfiguration process.
   3455 
   3456 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   3457 
   3458 	   The reason we don't check for this is that failed disks
   3459 	   will have lower modification counts.  If those disks are
   3460 	   not added to the set they used to belong to, then they will
   3461 	   form their own set, which may result in 2 different sets,
   3462 	   for example, competing to be configured at raid0, and
   3463 	   perhaps competing to be the root filesystem set.  If the
   3464 	   wrong ones get configured, or both attempt to become /,
   3465 	   weird behaviour and or serious lossage will occur.  Thus we
   3466 	   need to bring them into the fold here, and kick them out at
   3467 	   a later point.
   3468 
   3469 	*/
   3470 
   3471 	clabel1 = cset->ac->clabel;
   3472 	clabel2 = ac->clabel;
   3473 	if ((clabel1->version == clabel2->version) &&
   3474 	    (clabel1->serial_number == clabel2->serial_number) &&
   3475 	    (clabel1->num_rows == clabel2->num_rows) &&
   3476 	    (clabel1->num_columns == clabel2->num_columns) &&
   3477 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   3478 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   3479 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   3480 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   3481 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   3482 	    (clabel1->blockSize == clabel2->blockSize) &&
   3483 	    rf_component_label_numblocks(clabel1) ==
   3484 	    rf_component_label_numblocks(clabel2) &&
   3485 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   3486 	    (clabel1->root_partition == clabel2->root_partition) &&
   3487 	    (clabel1->last_unit == clabel2->last_unit) &&
   3488 	    (clabel1->config_order == clabel2->config_order)) {
   3489 		/* if it get's here, it almost *has* to be a match */
   3490 	} else {
   3491 		/* it's not consistent with somebody in the set..
   3492 		   punt */
   3493 		return(0);
   3494 	}
   3495 	/* all was fine.. it must fit... */
   3496 	return(1);
   3497 }
   3498 
   3499 int
   3500 rf_have_enough_components(RF_ConfigSet_t *cset)
   3501 {
   3502 	RF_AutoConfig_t *ac;
   3503 	RF_AutoConfig_t *auto_config;
   3504 	RF_ComponentLabel_t *clabel;
   3505 	int c;
   3506 	int num_cols;
   3507 	int num_missing;
   3508 	int mod_counter;
   3509 	int mod_counter_found;
   3510 	int even_pair_failed;
   3511 	char parity_type;
   3512 
   3513 
   3514 	/* check to see that we have enough 'live' components
   3515 	   of this set.  If so, we can configure it if necessary */
   3516 
   3517 	num_cols = cset->ac->clabel->num_columns;
   3518 	parity_type = cset->ac->clabel->parityConfig;
   3519 
   3520 	/* XXX Check for duplicate components!?!?!? */
   3521 
   3522 	/* Determine what the mod_counter is supposed to be for this set. */
   3523 
   3524 	mod_counter_found = 0;
   3525 	mod_counter = 0;
   3526 	ac = cset->ac;
   3527 	while(ac!=NULL) {
   3528 		if (mod_counter_found==0) {
   3529 			mod_counter = ac->clabel->mod_counter;
   3530 			mod_counter_found = 1;
   3531 		} else {
   3532 			if (ac->clabel->mod_counter > mod_counter) {
   3533 				mod_counter = ac->clabel->mod_counter;
   3534 			}
   3535 		}
   3536 		ac = ac->next;
   3537 	}
   3538 
   3539 	num_missing = 0;
   3540 	auto_config = cset->ac;
   3541 
   3542 	even_pair_failed = 0;
   3543 	for(c=0; c<num_cols; c++) {
   3544 		ac = auto_config;
   3545 		while(ac!=NULL) {
   3546 			if ((ac->clabel->column == c) &&
   3547 			    (ac->clabel->mod_counter == mod_counter)) {
   3548 				/* it's this one... */
   3549 #ifdef DEBUG
   3550 				printf("Found: %s at %d\n",
   3551 				       ac->devname,c);
   3552 #endif
   3553 				break;
   3554 			}
   3555 			ac=ac->next;
   3556 		}
   3557 		if (ac==NULL) {
   3558 				/* Didn't find one here! */
   3559 				/* special case for RAID 1, especially
   3560 				   where there are more than 2
   3561 				   components (where RAIDframe treats
   3562 				   things a little differently :( ) */
   3563 			if (parity_type == '1') {
   3564 				if (c%2 == 0) { /* even component */
   3565 					even_pair_failed = 1;
   3566 				} else { /* odd component.  If
   3567 					    we're failed, and
   3568 					    so is the even
   3569 					    component, it's
   3570 					    "Good Night, Charlie" */
   3571 					if (even_pair_failed == 1) {
   3572 						return(0);
   3573 					}
   3574 				}
   3575 			} else {
   3576 				/* normal accounting */
   3577 				num_missing++;
   3578 			}
   3579 		}
   3580 		if ((parity_type == '1') && (c%2 == 1)) {
   3581 				/* Just did an even component, and we didn't
   3582 				   bail.. reset the even_pair_failed flag,
   3583 				   and go on to the next component.... */
   3584 			even_pair_failed = 0;
   3585 		}
   3586 	}
   3587 
   3588 	clabel = cset->ac->clabel;
   3589 
   3590 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3591 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3592 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3593 		/* XXX this needs to be made *much* more general */
   3594 		/* Too many failures */
   3595 		return(0);
   3596 	}
   3597 	/* otherwise, all is well, and we've got enough to take a kick
   3598 	   at autoconfiguring this set */
   3599 	return(1);
   3600 }
   3601 
   3602 void
   3603 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
   3604 			RF_Raid_t *raidPtr)
   3605 {
   3606 	RF_ComponentLabel_t *clabel;
   3607 	int i;
   3608 
   3609 	clabel = ac->clabel;
   3610 
   3611 	/* 1. Fill in the common stuff */
   3612 	config->numRow = clabel->num_rows = 1;
   3613 	config->numCol = clabel->num_columns;
   3614 	config->numSpare = 0; /* XXX should this be set here? */
   3615 	config->sectPerSU = clabel->sectPerSU;
   3616 	config->SUsPerPU = clabel->SUsPerPU;
   3617 	config->SUsPerRU = clabel->SUsPerRU;
   3618 	config->parityConfig = clabel->parityConfig;
   3619 	/* XXX... */
   3620 	strcpy(config->diskQueueType,"fifo");
   3621 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3622 	config->layoutSpecificSize = 0; /* XXX ?? */
   3623 
   3624 	while(ac!=NULL) {
   3625 		/* row/col values will be in range due to the checks
   3626 		   in reasonable_label() */
   3627 		strcpy(config->devnames[0][ac->clabel->column],
   3628 		       ac->devname);
   3629 		ac = ac->next;
   3630 	}
   3631 
   3632 	for(i=0;i<RF_MAXDBGV;i++) {
   3633 		config->debugVars[i][0] = 0;
   3634 	}
   3635 }
   3636 
   3637 int
   3638 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
   3639 {
   3640 	RF_ComponentLabel_t *clabel;
   3641 	int column;
   3642 	int sparecol;
   3643 
   3644 	raidPtr->autoconfigure = new_value;
   3645 
   3646 	for(column=0; column<raidPtr->numCol; column++) {
   3647 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3648 			clabel = raidget_component_label(raidPtr, column);
   3649 			clabel->autoconfigure = new_value;
   3650 			raidflush_component_label(raidPtr, column);
   3651 		}
   3652 	}
   3653 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3654 		sparecol = raidPtr->numCol + column;
   3655 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3656 			clabel = raidget_component_label(raidPtr, sparecol);
   3657 			clabel->autoconfigure = new_value;
   3658 			raidflush_component_label(raidPtr, sparecol);
   3659 		}
   3660 	}
   3661 	return(new_value);
   3662 }
   3663 
   3664 int
   3665 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
   3666 {
   3667 	RF_ComponentLabel_t *clabel;
   3668 	int column;
   3669 	int sparecol;
   3670 
   3671 	raidPtr->root_partition = new_value;
   3672 	for(column=0; column<raidPtr->numCol; column++) {
   3673 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3674 			clabel = raidget_component_label(raidPtr, column);
   3675 			clabel->root_partition = new_value;
   3676 			raidflush_component_label(raidPtr, column);
   3677 		}
   3678 	}
   3679 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3680 		sparecol = raidPtr->numCol + column;
   3681 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3682 			clabel = raidget_component_label(raidPtr, sparecol);
   3683 			clabel->root_partition = new_value;
   3684 			raidflush_component_label(raidPtr, sparecol);
   3685 		}
   3686 	}
   3687 	return(new_value);
   3688 }
   3689 
   3690 void
   3691 rf_release_all_vps(RF_ConfigSet_t *cset)
   3692 {
   3693 	RF_AutoConfig_t *ac;
   3694 
   3695 	ac = cset->ac;
   3696 	while(ac!=NULL) {
   3697 		/* Close the vp, and give it back */
   3698 		if (ac->vp) {
   3699 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3700 			VOP_CLOSE(ac->vp, FREAD, NOCRED);
   3701 			vput(ac->vp);
   3702 			ac->vp = NULL;
   3703 		}
   3704 		ac = ac->next;
   3705 	}
   3706 }
   3707 
   3708 
   3709 void
   3710 rf_cleanup_config_set(RF_ConfigSet_t *cset)
   3711 {
   3712 	RF_AutoConfig_t *ac;
   3713 	RF_AutoConfig_t *next_ac;
   3714 
   3715 	ac = cset->ac;
   3716 	while(ac!=NULL) {
   3717 		next_ac = ac->next;
   3718 		/* nuke the label */
   3719 		free(ac->clabel, M_RAIDFRAME);
   3720 		/* cleanup the config structure */
   3721 		free(ac, M_RAIDFRAME);
   3722 		/* "next.." */
   3723 		ac = next_ac;
   3724 	}
   3725 	/* and, finally, nuke the config set */
   3726 	free(cset, M_RAIDFRAME);
   3727 }
   3728 
   3729 
   3730 void
   3731 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   3732 {
   3733 	/* current version number */
   3734 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3735 	clabel->serial_number = raidPtr->serial_number;
   3736 	clabel->mod_counter = raidPtr->mod_counter;
   3737 
   3738 	clabel->num_rows = 1;
   3739 	clabel->num_columns = raidPtr->numCol;
   3740 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3741 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3742 
   3743 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3744 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3745 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3746 
   3747 	clabel->blockSize = raidPtr->bytesPerSector;
   3748 	rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
   3749 
   3750 	/* XXX not portable */
   3751 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3752 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3753 	clabel->autoconfigure = raidPtr->autoconfigure;
   3754 	clabel->root_partition = raidPtr->root_partition;
   3755 	clabel->last_unit = raidPtr->raidid;
   3756 	clabel->config_order = raidPtr->config_order;
   3757 
   3758 #ifndef RF_NO_PARITY_MAP
   3759 	rf_paritymap_init_label(raidPtr->parity_map, clabel);
   3760 #endif
   3761 }
   3762 
   3763 struct raid_softc *
   3764 rf_auto_config_set(RF_ConfigSet_t *cset)
   3765 {
   3766 	RF_Raid_t *raidPtr;
   3767 	RF_Config_t *config;
   3768 	int raidID;
   3769 	struct raid_softc *sc;
   3770 
   3771 #ifdef DEBUG
   3772 	printf("RAID autoconfigure\n");
   3773 #endif
   3774 
   3775 	/* 1. Create a config structure */
   3776 	config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
   3777 	if (config == NULL) {
   3778 		printf("%s: Out of mem - config!?!?\n", __func__);
   3779 				/* XXX do something more intelligent here. */
   3780 		return NULL;
   3781 	}
   3782 
   3783 	/*
   3784 	   2. Figure out what RAID ID this one is supposed to live at
   3785 	   See if we can get the same RAID dev that it was configured
   3786 	   on last time..
   3787 	*/
   3788 
   3789 	raidID = cset->ac->clabel->last_unit;
   3790 	for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
   3791 	     sc = raidget(++raidID, false))
   3792 		continue;
   3793 #ifdef DEBUG
   3794 	printf("Configuring raid%d:\n",raidID);
   3795 #endif
   3796 
   3797 	if (sc == NULL)
   3798 		sc = raidget(raidID, true);
   3799 	if (sc == NULL) {
   3800 		printf("%s: Out of mem - softc!?!?\n", __func__);
   3801 				/* XXX do something more intelligent here. */
   3802 		free(config, M_RAIDFRAME);
   3803 		return NULL;
   3804 	}
   3805 
   3806 	raidPtr = &sc->sc_r;
   3807 
   3808 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3809 	raidPtr->softc = sc;
   3810 	raidPtr->raidid = raidID;
   3811 	raidPtr->openings = RAIDOUTSTANDING;
   3812 
   3813 	/* 3. Build the configuration structure */
   3814 	rf_create_configuration(cset->ac, config, raidPtr);
   3815 
   3816 	/* 4. Do the configuration */
   3817 	if (rf_Configure(raidPtr, config, cset->ac) == 0) {
   3818 		raidinit(sc);
   3819 
   3820 		rf_markalldirty(raidPtr);
   3821 		raidPtr->autoconfigure = 1; /* XXX do this here? */
   3822 		switch (cset->ac->clabel->root_partition) {
   3823 		case 1:	/* Force Root */
   3824 		case 2:	/* Soft Root: root when boot partition part of raid */
   3825 			/*
   3826 			 * everything configured just fine.  Make a note
   3827 			 * that this set is eligible to be root,
   3828 			 * or forced to be root
   3829 			 */
   3830 			cset->rootable = cset->ac->clabel->root_partition;
   3831 			/* XXX do this here? */
   3832 			raidPtr->root_partition = cset->rootable;
   3833 			break;
   3834 		default:
   3835 			break;
   3836 		}
   3837 	} else {
   3838 		raidput(sc);
   3839 		sc = NULL;
   3840 	}
   3841 
   3842 	/* 5. Cleanup */
   3843 	free(config, M_RAIDFRAME);
   3844 	return sc;
   3845 }
   3846 
   3847 void
   3848 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
   3849 {
   3850 	struct buf *bp;
   3851 	struct raid_softc *rs;
   3852 
   3853 	bp = (struct buf *)desc->bp;
   3854 	rs = desc->raidPtr->softc;
   3855 	disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
   3856 	    (bp->b_flags & B_READ));
   3857 }
   3858 
   3859 void
   3860 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
   3861 	     size_t xmin, size_t xmax)
   3862 {
   3863 	pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
   3864 	pool_sethiwat(p, xmax);
   3865 	pool_prime(p, xmin);
   3866 	pool_setlowat(p, xmin);
   3867 }
   3868 
   3869 /*
   3870  * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
   3871  * if there is IO pending and if that IO could possibly be done for a
   3872  * given RAID set.  Returns 0 if IO is waiting and can be done, 1
   3873  * otherwise.
   3874  *
   3875  */
   3876 
   3877 int
   3878 rf_buf_queue_check(RF_Raid_t *raidPtr)
   3879 {
   3880 	struct raid_softc *rs = raidPtr->softc;
   3881 	if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
   3882 		/* there is work to do */
   3883 		return 0;
   3884 	}
   3885 	/* default is nothing to do */
   3886 	return 1;
   3887 }
   3888 
   3889 int
   3890 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
   3891 {
   3892 	uint64_t numsecs;
   3893 	unsigned secsize;
   3894 	int error;
   3895 
   3896 	error = getdisksize(vp, &numsecs, &secsize);
   3897 	if (error == 0) {
   3898 		diskPtr->blockSize = secsize;
   3899 		diskPtr->numBlocks = numsecs - rf_protectedSectors;
   3900 		diskPtr->partitionSize = numsecs;
   3901 		return 0;
   3902 	}
   3903 	return error;
   3904 }
   3905 
   3906 static int
   3907 raid_match(device_t self, cfdata_t cfdata, void *aux)
   3908 {
   3909 	return 1;
   3910 }
   3911 
   3912 static void
   3913 raid_attach(device_t parent, device_t self, void *aux)
   3914 {
   3915 
   3916 }
   3917 
   3918 
   3919 static int
   3920 raid_detach(device_t self, int flags)
   3921 {
   3922 	int error;
   3923 	struct raid_softc *rs = raidget(device_unit(self), false);
   3924 
   3925 	if (rs == NULL)
   3926 		return ENXIO;
   3927 
   3928 	if ((error = raidlock(rs)) != 0)
   3929 		return (error);
   3930 
   3931 	error = raid_detach_unlocked(rs);
   3932 
   3933 	return error;
   3934 }
   3935 
   3936 static void
   3937 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
   3938 {
   3939 	struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
   3940 
   3941 	memset(dg, 0, sizeof(*dg));
   3942 
   3943 	dg->dg_secperunit = raidPtr->totalSectors;
   3944 	dg->dg_secsize = raidPtr->bytesPerSector;
   3945 	dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   3946 	dg->dg_ntracks = 4 * raidPtr->numCol;
   3947 
   3948 	disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
   3949 }
   3950 
   3951 /*
   3952  * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
   3953  * We end up returning whatever error was returned by the first cache flush
   3954  * that fails.
   3955  */
   3956 
   3957 int
   3958 rf_sync_component_caches(RF_Raid_t *raidPtr)
   3959 {
   3960 	int c, sparecol;
   3961 	int e,error;
   3962 	int force = 1;
   3963 
   3964 	error = 0;
   3965 	for (c = 0; c < raidPtr->numCol; c++) {
   3966 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   3967 			e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
   3968 					  &force, FWRITE, NOCRED);
   3969 			if (e) {
   3970 				if (e != ENODEV)
   3971 					printf("raid%d: cache flush to component %s failed.\n",
   3972 					       raidPtr->raidid, raidPtr->Disks[c].devname);
   3973 				if (error == 0) {
   3974 					error = e;
   3975 				}
   3976 			}
   3977 		}
   3978 	}
   3979 
   3980 	for( c = 0; c < raidPtr->numSpare ; c++) {
   3981 		sparecol = raidPtr->numCol + c;
   3982 		/* Need to ensure that the reconstruct actually completed! */
   3983 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3984 			e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
   3985 					  DIOCCACHESYNC, &force, FWRITE, NOCRED);
   3986 			if (e) {
   3987 				if (e != ENODEV)
   3988 					printf("raid%d: cache flush to component %s failed.\n",
   3989 					       raidPtr->raidid, raidPtr->Disks[sparecol].devname);
   3990 				if (error == 0) {
   3991 					error = e;
   3992 				}
   3993 			}
   3994 		}
   3995 	}
   3996 	return error;
   3997 }
   3998 
   3999 /*
   4000  * Module interface
   4001  */
   4002 
   4003 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr");
   4004 
   4005 #ifdef _MODULE
   4006 CFDRIVER_DECL(raid, DV_DISK, NULL);
   4007 #endif
   4008 
   4009 static int raid_modcmd(modcmd_t, void *);
   4010 static int raid_modcmd_init(void);
   4011 static int raid_modcmd_fini(void);
   4012 
   4013 static int
   4014 raid_modcmd(modcmd_t cmd, void *data)
   4015 {
   4016 	int error;
   4017 
   4018 	error = 0;
   4019 	switch (cmd) {
   4020 	case MODULE_CMD_INIT:
   4021 		error = raid_modcmd_init();
   4022 		break;
   4023 	case MODULE_CMD_FINI:
   4024 		error = raid_modcmd_fini();
   4025 		break;
   4026 	default:
   4027 		error = ENOTTY;
   4028 		break;
   4029 	}
   4030 	return error;
   4031 }
   4032 
   4033 static int
   4034 raid_modcmd_init(void)
   4035 {
   4036 	int error;
   4037 	int bmajor, cmajor;
   4038 
   4039 	mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
   4040 	mutex_enter(&raid_lock);
   4041 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   4042 	rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
   4043 	rf_init_cond2(rf_sparet_wait_cv, "sparetw");
   4044 	rf_init_cond2(rf_sparet_resp_cv, "rfgst");
   4045 
   4046 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
   4047 #endif
   4048 
   4049 	bmajor = cmajor = -1;
   4050 	error = devsw_attach("raid", &raid_bdevsw, &bmajor,
   4051 	    &raid_cdevsw, &cmajor);
   4052 	if (error != 0 && error != EEXIST) {
   4053 		aprint_error("%s: devsw_attach failed %d\n", __func__, error);
   4054 		mutex_exit(&raid_lock);
   4055 		return error;
   4056 	}
   4057 #ifdef _MODULE
   4058 	error = config_cfdriver_attach(&raid_cd);
   4059 	if (error != 0) {
   4060 		aprint_error("%s: config_cfdriver_attach failed %d\n",
   4061 		    __func__, error);
   4062 		devsw_detach(&raid_bdevsw, &raid_cdevsw);
   4063 		mutex_exit(&raid_lock);
   4064 		return error;
   4065 	}
   4066 #endif
   4067 	error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
   4068 	if (error != 0) {
   4069 		aprint_error("%s: config_cfattach_attach failed %d\n",
   4070 		    __func__, error);
   4071 #ifdef _MODULE
   4072 		config_cfdriver_detach(&raid_cd);
   4073 #endif
   4074 		devsw_detach(&raid_bdevsw, &raid_cdevsw);
   4075 		mutex_exit(&raid_lock);
   4076 		return error;
   4077 	}
   4078 
   4079 	raidautoconfigdone = false;
   4080 
   4081 	mutex_exit(&raid_lock);
   4082 
   4083 	if (error == 0) {
   4084 		if (rf_BootRaidframe(true) == 0)
   4085 			aprint_verbose("Kernelized RAIDframe activated\n");
   4086 		else
   4087 			panic("Serious error activating RAID!!");
   4088 	}
   4089 
   4090 	/*
   4091 	 * Register a finalizer which will be used to auto-config RAID
   4092 	 * sets once all real hardware devices have been found.
   4093 	 */
   4094 	error = config_finalize_register(NULL, rf_autoconfig);
   4095 	if (error != 0) {
   4096 		aprint_error("WARNING: unable to register RAIDframe "
   4097 		    "finalizer\n");
   4098 		error = 0;
   4099 	}
   4100 
   4101 	return error;
   4102 }
   4103 
   4104 static int
   4105 raid_modcmd_fini(void)
   4106 {
   4107 	int error;
   4108 
   4109 	mutex_enter(&raid_lock);
   4110 
   4111 	/* Don't allow unload if raid device(s) exist.  */
   4112 	if (!LIST_EMPTY(&raids)) {
   4113 		mutex_exit(&raid_lock);
   4114 		return EBUSY;
   4115 	}
   4116 
   4117 	error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
   4118 	if (error != 0) {
   4119 		mutex_exit(&raid_lock);
   4120 		return error;
   4121 	}
   4122 #ifdef _MODULE
   4123 	error = config_cfdriver_detach(&raid_cd);
   4124 	if (error != 0) {
   4125 		config_cfattach_attach(raid_cd.cd_name, &raid_ca);
   4126 		mutex_exit(&raid_lock);
   4127 		return error;
   4128 	}
   4129 #endif
   4130 	error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
   4131 	if (error != 0) {
   4132 #ifdef _MODULE
   4133 		config_cfdriver_attach(&raid_cd);
   4134 #endif
   4135 		config_cfattach_attach(raid_cd.cd_name, &raid_ca);
   4136 		mutex_exit(&raid_lock);
   4137 		return error;
   4138 	}
   4139 	rf_BootRaidframe(false);
   4140 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   4141 	rf_destroy_mutex2(rf_sparet_wait_mutex);
   4142 	rf_destroy_cond2(rf_sparet_wait_cv);
   4143 	rf_destroy_cond2(rf_sparet_resp_cv);
   4144 #endif
   4145 	mutex_exit(&raid_lock);
   4146 	mutex_destroy(&raid_lock);
   4147 
   4148 	return error;
   4149 }
   4150