Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.410.4.3
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.410.4.3 2023/10/18 11:44:22 martin Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Greg Oster; Jason R. Thorpe.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Copyright (c) 1988 University of Utah.
     34  * Copyright (c) 1990, 1993
     35  *      The Regents of the University of California.  All rights reserved.
     36  *
     37  * This code is derived from software contributed to Berkeley by
     38  * the Systems Programming Group of the University of Utah Computer
     39  * Science Department.
     40  *
     41  * Redistribution and use in source and binary forms, with or without
     42  * modification, are permitted provided that the following conditions
     43  * are met:
     44  * 1. Redistributions of source code must retain the above copyright
     45  *    notice, this list of conditions and the following disclaimer.
     46  * 2. Redistributions in binary form must reproduce the above copyright
     47  *    notice, this list of conditions and the following disclaimer in the
     48  *    documentation and/or other materials provided with the distribution.
     49  * 3. Neither the name of the University nor the names of its contributors
     50  *    may be used to endorse or promote products derived from this software
     51  *    without specific prior written permission.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     63  * SUCH DAMAGE.
     64  *
     65  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     66  *
     67  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     68  */
     69 
     70 /*
     71  * Copyright (c) 1995 Carnegie-Mellon University.
     72  * All rights reserved.
     73  *
     74  * Authors: Mark Holland, Jim Zelenka
     75  *
     76  * Permission to use, copy, modify and distribute this software and
     77  * its documentation is hereby granted, provided that both the copyright
     78  * notice and this permission notice appear in all copies of the
     79  * software, derivative works or modified versions, and any portions
     80  * thereof, and that both notices appear in supporting documentation.
     81  *
     82  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     83  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     84  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     85  *
     86  * Carnegie Mellon requests users of this software to return to
     87  *
     88  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     89  *  School of Computer Science
     90  *  Carnegie Mellon University
     91  *  Pittsburgh PA 15213-3890
     92  *
     93  * any improvements or extensions that they make and grant Carnegie the
     94  * rights to redistribute these changes.
     95  */
     96 
     97 /***********************************************************
     98  *
     99  * rf_kintf.c -- the kernel interface routines for RAIDframe
    100  *
    101  ***********************************************************/
    102 
    103 #include <sys/cdefs.h>
    104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.410.4.3 2023/10/18 11:44:22 martin Exp $");
    105 
    106 #ifdef _KERNEL_OPT
    107 #include "opt_raid_autoconfig.h"
    108 #include "opt_compat_netbsd32.h"
    109 #endif
    110 
    111 #include <sys/param.h>
    112 #include <sys/errno.h>
    113 #include <sys/pool.h>
    114 #include <sys/proc.h>
    115 #include <sys/queue.h>
    116 #include <sys/disk.h>
    117 #include <sys/device.h>
    118 #include <sys/stat.h>
    119 #include <sys/ioctl.h>
    120 #include <sys/fcntl.h>
    121 #include <sys/systm.h>
    122 #include <sys/vnode.h>
    123 #include <sys/disklabel.h>
    124 #include <sys/conf.h>
    125 #include <sys/buf.h>
    126 #include <sys/bufq.h>
    127 #include <sys/reboot.h>
    128 #include <sys/kauth.h>
    129 #include <sys/module.h>
    130 #include <sys/compat_stub.h>
    131 
    132 #include <prop/proplib.h>
    133 
    134 #include <dev/raidframe/raidframevar.h>
    135 #include <dev/raidframe/raidframeio.h>
    136 #include <dev/raidframe/rf_paritymap.h>
    137 
    138 #include "rf_raid.h"
    139 #include "rf_copyback.h"
    140 #include "rf_dag.h"
    141 #include "rf_dagflags.h"
    142 #include "rf_desc.h"
    143 #include "rf_diskqueue.h"
    144 #include "rf_etimer.h"
    145 #include "rf_general.h"
    146 #include "rf_kintf.h"
    147 #include "rf_options.h"
    148 #include "rf_driver.h"
    149 #include "rf_parityscan.h"
    150 #include "rf_threadstuff.h"
    151 
    152 #include "ioconf.h"
    153 
    154 #ifdef DEBUG
    155 int     rf_kdebug_level = 0;
    156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    157 #else				/* DEBUG */
    158 #define db1_printf(a) { }
    159 #endif				/* DEBUG */
    160 
    161 #define DEVICE_XNAME(dev) dev ? device_xname(dev) : "null"
    162 
    163 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
    164 static rf_declare_mutex2(rf_sparet_wait_mutex);
    165 static rf_declare_cond2(rf_sparet_wait_cv);
    166 static rf_declare_cond2(rf_sparet_resp_cv);
    167 
    168 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    169 						 * spare table */
    170 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    171 						 * installation process */
    172 #endif
    173 
    174 const int rf_b_pass = (B_PHYS|B_RAW|B_MEDIA_FLAGS);
    175 
    176 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
    177 
    178 /* prototypes */
    179 static void KernelWakeupFunc(struct buf *);
    180 static void InitBP(struct buf *, struct vnode *, unsigned,
    181     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
    182     void *, int);
    183 static void raidinit(struct raid_softc *);
    184 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
    185 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
    186 
    187 static int raid_match(device_t, cfdata_t, void *);
    188 static void raid_attach(device_t, device_t, void *);
    189 static int raid_detach(device_t, int);
    190 
    191 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
    192     daddr_t, daddr_t);
    193 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
    194     daddr_t, daddr_t, int);
    195 
    196 static int raidwrite_component_label(unsigned,
    197     dev_t, struct vnode *, RF_ComponentLabel_t *);
    198 static int raidread_component_label(unsigned,
    199     dev_t, struct vnode *, RF_ComponentLabel_t *);
    200 
    201 static int raid_diskstart(device_t, struct buf *bp);
    202 static int raid_dumpblocks(device_t, void *, daddr_t, int);
    203 static int raid_lastclose(device_t);
    204 
    205 static dev_type_open(raidopen);
    206 static dev_type_close(raidclose);
    207 static dev_type_read(raidread);
    208 static dev_type_write(raidwrite);
    209 static dev_type_ioctl(raidioctl);
    210 static dev_type_strategy(raidstrategy);
    211 static dev_type_dump(raiddump);
    212 static dev_type_size(raidsize);
    213 
    214 const struct bdevsw raid_bdevsw = {
    215 	.d_open = raidopen,
    216 	.d_close = raidclose,
    217 	.d_strategy = raidstrategy,
    218 	.d_ioctl = raidioctl,
    219 	.d_dump = raiddump,
    220 	.d_psize = raidsize,
    221 	.d_discard = nodiscard,
    222 	.d_flag = D_DISK
    223 };
    224 
    225 const struct cdevsw raid_cdevsw = {
    226 	.d_open = raidopen,
    227 	.d_close = raidclose,
    228 	.d_read = raidread,
    229 	.d_write = raidwrite,
    230 	.d_ioctl = raidioctl,
    231 	.d_stop = nostop,
    232 	.d_tty = notty,
    233 	.d_poll = nopoll,
    234 	.d_mmap = nommap,
    235 	.d_kqfilter = nokqfilter,
    236 	.d_discard = nodiscard,
    237 	.d_flag = D_DISK
    238 };
    239 
    240 static struct dkdriver rf_dkdriver = {
    241 	.d_open = raidopen,
    242 	.d_close = raidclose,
    243 	.d_strategy = raidstrategy,
    244 	.d_diskstart = raid_diskstart,
    245 	.d_dumpblocks = raid_dumpblocks,
    246 	.d_lastclose = raid_lastclose,
    247 	.d_minphys = minphys
    248 };
    249 
    250 #define	raidunit(x)	DISKUNIT(x)
    251 #define	raidsoftc(dev)	(((struct raid_softc *)device_private(dev))->sc_r.softc)
    252 
    253 extern struct cfdriver raid_cd;
    254 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
    255     raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
    256     DVF_DETACH_SHUTDOWN);
    257 
    258 /* Internal representation of a rf_recon_req */
    259 struct rf_recon_req_internal {
    260 	RF_RowCol_t col;
    261 	RF_ReconReqFlags_t flags;
    262 	void   *raidPtr;
    263 };
    264 
    265 /*
    266  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    267  * Be aware that large numbers can allow the driver to consume a lot of
    268  * kernel memory, especially on writes, and in degraded mode reads.
    269  *
    270  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    271  * a single 64K write will typically require 64K for the old data,
    272  * 64K for the old parity, and 64K for the new parity, for a total
    273  * of 192K (if the parity buffer is not re-used immediately).
    274  * Even it if is used immediately, that's still 128K, which when multiplied
    275  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    276  *
    277  * Now in degraded mode, for example, a 64K read on the above setup may
    278  * require data reconstruction, which will require *all* of the 4 remaining
    279  * disks to participate -- 4 * 32K/disk == 128K again.
    280  */
    281 
    282 #ifndef RAIDOUTSTANDING
    283 #define RAIDOUTSTANDING   6
    284 #endif
    285 
    286 #define RAIDLABELDEV(dev)	\
    287 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    288 
    289 /* declared here, and made public, for the benefit of KVM stuff.. */
    290 
    291 static int raidlock(struct raid_softc *);
    292 static void raidunlock(struct raid_softc *);
    293 
    294 static int raid_detach_unlocked(struct raid_softc *);
    295 
    296 static void rf_markalldirty(RF_Raid_t *);
    297 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
    298 
    299 static void rf_ReconThread(struct rf_recon_req_internal *);
    300 static void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    301 static void rf_CopybackThread(RF_Raid_t *raidPtr);
    302 static void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
    303 static int rf_autoconfig(device_t);
    304 static int rf_rescan(void);
    305 static void rf_buildroothack(RF_ConfigSet_t *);
    306 
    307 static RF_AutoConfig_t *rf_find_raid_components(void);
    308 static RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    309 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    310 static void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    311 static int rf_set_autoconfig(RF_Raid_t *, int);
    312 static int rf_set_rootpartition(RF_Raid_t *, int);
    313 static void rf_release_all_vps(RF_ConfigSet_t *);
    314 static void rf_cleanup_config_set(RF_ConfigSet_t *);
    315 static int rf_have_enough_components(RF_ConfigSet_t *);
    316 static struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
    317 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
    318 
    319 /*
    320  * Debugging, mostly.  Set to 0 to not allow autoconfig to take place.
    321  * Note that this is overridden by having RAID_AUTOCONFIG as an option
    322  * in the kernel config file.
    323  */
    324 #ifdef RAID_AUTOCONFIG
    325 int raidautoconfig = 1;
    326 #else
    327 int raidautoconfig = 0;
    328 #endif
    329 static bool raidautoconfigdone = false;
    330 
    331 struct pool rf_alloclist_pool;   /* AllocList */
    332 
    333 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
    334 static kmutex_t raid_lock;
    335 
    336 static struct raid_softc *
    337 raidcreate(int unit) {
    338 	struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
    339 	sc->sc_unit = unit;
    340 	cv_init(&sc->sc_cv, "raidunit");
    341 	mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
    342 	return sc;
    343 }
    344 
    345 static void
    346 raiddestroy(struct raid_softc *sc) {
    347 	cv_destroy(&sc->sc_cv);
    348 	mutex_destroy(&sc->sc_mutex);
    349 	kmem_free(sc, sizeof(*sc));
    350 }
    351 
    352 static struct raid_softc *
    353 raidget(int unit, bool create) {
    354 	struct raid_softc *sc;
    355 	if (unit < 0) {
    356 #ifdef DIAGNOSTIC
    357 		panic("%s: unit %d!", __func__, unit);
    358 #endif
    359 		return NULL;
    360 	}
    361 	mutex_enter(&raid_lock);
    362 	LIST_FOREACH(sc, &raids, sc_link) {
    363 		if (sc->sc_unit == unit) {
    364 			mutex_exit(&raid_lock);
    365 			return sc;
    366 		}
    367 	}
    368 	mutex_exit(&raid_lock);
    369 	if (!create)
    370 		return NULL;
    371 	sc = raidcreate(unit);
    372 	mutex_enter(&raid_lock);
    373 	LIST_INSERT_HEAD(&raids, sc, sc_link);
    374 	mutex_exit(&raid_lock);
    375 	return sc;
    376 }
    377 
    378 static void
    379 raidput(struct raid_softc *sc) {
    380 	mutex_enter(&raid_lock);
    381 	LIST_REMOVE(sc, sc_link);
    382 	mutex_exit(&raid_lock);
    383 	raiddestroy(sc);
    384 }
    385 
    386 void
    387 raidattach(int num)
    388 {
    389 
    390 	/*
    391 	 * Device attachment and associated initialization now occurs
    392 	 * as part of the module initialization.
    393 	 */
    394 }
    395 
    396 static int
    397 rf_autoconfig(device_t self)
    398 {
    399 	RF_AutoConfig_t *ac_list;
    400 	RF_ConfigSet_t *config_sets;
    401 
    402 	if (!raidautoconfig || raidautoconfigdone == true)
    403 		return 0;
    404 
    405 	/* XXX This code can only be run once. */
    406 	raidautoconfigdone = true;
    407 
    408 #ifdef __HAVE_CPU_BOOTCONF
    409 	/*
    410 	 * 0. find the boot device if needed first so we can use it later
    411 	 * this needs to be done before we autoconfigure any raid sets,
    412 	 * because if we use wedges we are not going to be able to open
    413 	 * the boot device later
    414 	 */
    415 	if (booted_device == NULL)
    416 		cpu_bootconf();
    417 #endif
    418 	/* 1. locate all RAID components on the system */
    419 	aprint_debug("Searching for RAID components...\n");
    420 	ac_list = rf_find_raid_components();
    421 
    422 	/* 2. Sort them into their respective sets. */
    423 	config_sets = rf_create_auto_sets(ac_list);
    424 
    425 	/*
    426 	 * 3. Evaluate each set and configure the valid ones.
    427 	 * This gets done in rf_buildroothack().
    428 	 */
    429 	rf_buildroothack(config_sets);
    430 
    431 	return 1;
    432 }
    433 
    434 int
    435 rf_inited(const struct raid_softc *rs) {
    436 	return (rs->sc_flags & RAIDF_INITED) != 0;
    437 }
    438 
    439 RF_Raid_t *
    440 rf_get_raid(struct raid_softc *rs) {
    441 	return &rs->sc_r;
    442 }
    443 
    444 int
    445 rf_get_unit(const struct raid_softc *rs) {
    446 	return rs->sc_unit;
    447 }
    448 
    449 static int
    450 rf_containsboot(RF_Raid_t *r, device_t bdv) {
    451 	const char *bootname;
    452 	size_t len;
    453 
    454 	/* if bdv is NULL, the set can't contain it. exit early. */
    455 	if (bdv == NULL)
    456 		return 0;
    457 
    458 	bootname = device_xname(bdv);
    459 	len = strlen(bootname);
    460 
    461 	for (int col = 0; col < r->numCol; col++) {
    462 		const char *devname = r->Disks[col].devname;
    463 		devname += sizeof("/dev/") - 1;
    464 		if (strncmp(devname, "dk", 2) == 0) {
    465 			const char *parent =
    466 			    dkwedge_get_parent_name(r->Disks[col].dev);
    467 			if (parent != NULL)
    468 				devname = parent;
    469 		}
    470 		if (strncmp(devname, bootname, len) == 0) {
    471 			struct raid_softc *sc = r->softc;
    472 			aprint_debug("raid%d includes boot device %s\n",
    473 			    sc->sc_unit, devname);
    474 			return 1;
    475 		}
    476 	}
    477 	return 0;
    478 }
    479 
    480 static int
    481 rf_rescan(void)
    482 {
    483 	RF_AutoConfig_t *ac_list;
    484 	RF_ConfigSet_t *config_sets, *cset, *next_cset;
    485 	struct raid_softc *sc;
    486 	int raid_added;
    487 
    488 	ac_list = rf_find_raid_components();
    489 	config_sets = rf_create_auto_sets(ac_list);
    490 
    491 	raid_added = 1;
    492 	while (raid_added > 0) {
    493 		raid_added = 0;
    494 		cset = config_sets;
    495 		while (cset != NULL) {
    496 			next_cset = cset->next;
    497 			if (rf_have_enough_components(cset) &&
    498 			    cset->ac->clabel->autoconfigure == 1) {
    499 				sc = rf_auto_config_set(cset);
    500 				if (sc != NULL) {
    501 					aprint_debug("raid%d: configured ok, rootable %d\n",
    502 						     sc->sc_unit, cset->rootable);
    503 					/* We added one RAID set */
    504 					raid_added++;
    505 				} else {
    506 					/* The autoconfig didn't work :( */
    507 					aprint_debug("Autoconfig failed\n");
    508 					rf_release_all_vps(cset);
    509 				}
    510 			} else {
    511 				/* we're not autoconfiguring this set...
    512 				   release the associated resources */
    513 				rf_release_all_vps(cset);
    514 			}
    515 			/* cleanup */
    516 			rf_cleanup_config_set(cset);
    517 			cset = next_cset;
    518 		}
    519 		if (raid_added > 0) {
    520 			/* We added at least one RAID set, so re-scan for recursive RAID */
    521 			ac_list = rf_find_raid_components();
    522 			config_sets = rf_create_auto_sets(ac_list);
    523 		}
    524 	}
    525 
    526 	return 0;
    527 }
    528 
    529 /*
    530  * Example setup:
    531  * dk1 at wd0: "raid@wd0", 171965 blocks at 32802, type: raidframe
    532  * dk3 at wd1: "raid@wd1", 171965 blocks at 32802, type: raidframz
    533  * raid1: Components: /dev/dk1 /dev/dk3
    534  * dk4 at raid1: "empty@raid1", 8192 blocks at 34, type: msdos
    535  * dk5 at raid1: "root@raid1", 163517 blocks at 8226, type: ffs
    536  *
    537  * If booted from wd0, booted_device will be
    538  * disk wd0, startblk = 41092, nblks = 163517
    539  *
    540  * That is, dk5 with startblk computed from the beginning of wd0
    541  * instead of beginning of raid1:
    542  * 32802 + 64 (RF_PROTECTED_SECTORS) + 8226 = 41092
    543  *
    544  * In order to find the boot wedge, we must iterate on each component,
    545  * find its offset from disk beginning, abd look for the boot wedge with
    546  * startblck adjusted.
    547  */
    548 static device_t
    549 rf_find_bootwedge(struct raid_softc *rsc)
    550 {
    551 	RF_Raid_t *r = &rsc->sc_r;
    552 	const char *bootname;
    553 	size_t len;
    554 	device_t rdev = NULL;
    555 
    556 	if (booted_device == NULL)
    557 		goto out;
    558 
    559 	bootname = device_xname(booted_device);
    560 	len = strlen(bootname);
    561 
    562 	aprint_debug("%s: booted_device %s, startblk = %"PRId64", "
    563 		     "nblks = %"PRId64"\n", __func__,
    564 		     bootname, booted_startblk, booted_nblks);
    565 
    566 	for (int col = 0; col < r->numCol; col++) {
    567 		const char *devname = r->Disks[col].devname;
    568 		const char *parent;
    569 		struct disk *dk;
    570 		u_int nwedges;
    571 		struct dkwedge_info *dkwi;
    572 		struct dkwedge_list dkwl;
    573 		size_t dkwi_len;
    574 		int i;
    575 
    576 		devname += sizeof("/dev/") - 1;
    577 		if (strncmp(devname, "dk", 2) != 0)
    578 			continue;
    579 
    580 		parent = dkwedge_get_parent_name(r->Disks[col].dev);
    581 		if (parent == NULL) {
    582 			aprint_debug("%s: cannot find parent for "
    583 				     "component /dev/%s", __func__, devname);
    584 			continue;
    585 		}
    586 
    587 		if (strncmp(parent, bootname, len) != 0)
    588 			continue;
    589 
    590 		aprint_debug("%s: looking up wedge %s in device %s\n",
    591 			     __func__, devname, parent);
    592 
    593 		dk = disk_find(parent);
    594 		nwedges = dk->dk_nwedges;
    595 		dkwi_len = sizeof(*dkwi) * nwedges;
    596 		dkwi = RF_Malloc(dkwi_len);
    597 
    598 		dkwl.dkwl_buf = dkwi;
    599 		dkwl.dkwl_bufsize = dkwi_len;
    600 		dkwl.dkwl_nwedges = 0;
    601 		dkwl.dkwl_ncopied = 0;
    602 
    603 		if (dkwedge_list(dk, &dkwl, curlwp) == 0) {
    604 			daddr_t startblk;
    605 
    606 			for (i = 0; i < dkwl.dkwl_ncopied; i++) {
    607 				if (strcmp(dkwi[i].dkw_devname, devname) == 0)
    608 					break;
    609 			}
    610 
    611 			KASSERT(i < dkwl.dkwl_ncopied);
    612 
    613 			aprint_debug("%s: wedge %s, "
    614 				     "startblk = %"PRId64", "
    615 				     "nblks = %"PRId64"\n",
    616 				     __func__,
    617 				     dkwi[i].dkw_devname,
    618 				     dkwi[i].dkw_offset,
    619 				     dkwi[i].dkw_size);
    620 
    621 			startblk = booted_startblk
    622 				 - dkwi[i].dkw_offset
    623 				 - RF_PROTECTED_SECTORS;
    624 
    625 			aprint_debug("%s: looking for wedge in %s, "
    626 				     "startblk = %"PRId64", "
    627 				     "nblks = %"PRId64"\n",
    628 				     __func__,
    629 				     DEVICE_XNAME(rsc->sc_dksc.sc_dev),
    630 				     startblk, booted_nblks);
    631 
    632 			rdev = dkwedge_find_partition(rsc->sc_dksc.sc_dev,
    633 						      startblk,
    634 						      booted_nblks);
    635 			if (rdev) {
    636 				aprint_debug("%s: root candidate wedge %s "
    637 					     "shifted from %s\n", __func__,
    638 					     device_xname(rdev),
    639 					     dkwi[i].dkw_devname);
    640 				goto done;
    641 			} else {
    642 				aprint_debug("%s: not found\n", __func__);
    643 			}
    644 		}
    645 
    646 		aprint_debug("%s: nothing found for col %d\n", __func__, col);
    647 done:
    648 		RF_Free(dkwi, dkwi_len);
    649 	}
    650 
    651 out:
    652 	if (!rdev)
    653 		aprint_debug("%s: nothing found\n", __func__);
    654 
    655 	return rdev;
    656 }
    657 
    658 static void
    659 rf_buildroothack(RF_ConfigSet_t *config_sets)
    660 {
    661 	RF_AutoConfig_t *ac_list;
    662 	RF_ConfigSet_t *cset;
    663 	RF_ConfigSet_t *next_cset;
    664 	int num_root;
    665 	int raid_added;
    666 	struct raid_softc *sc, *rsc;
    667 	struct dk_softc *dksc = NULL;	/* XXX gcc -Os: may be used uninit. */
    668 
    669 	sc = rsc = NULL;
    670 	num_root = 0;
    671 
    672 	raid_added = 1;
    673 	while (raid_added > 0) {
    674 		raid_added = 0;
    675 		cset = config_sets;
    676 		while (cset != NULL) {
    677 			next_cset = cset->next;
    678 			if (rf_have_enough_components(cset) &&
    679 			    cset->ac->clabel->autoconfigure == 1) {
    680 				sc = rf_auto_config_set(cset);
    681 				if (sc != NULL) {
    682 					aprint_debug("raid%d: configured ok, rootable %d\n",
    683 						     sc->sc_unit, cset->rootable);
    684 					/* We added one RAID set */
    685 					raid_added++;
    686 					if (cset->rootable) {
    687 						rsc = sc;
    688 						num_root++;
    689 					}
    690 				} else {
    691 					/* The autoconfig didn't work :( */
    692 					aprint_debug("Autoconfig failed\n");
    693 					rf_release_all_vps(cset);
    694 				}
    695 			} else {
    696 				/* we're not autoconfiguring this set...
    697 				   release the associated resources */
    698 				rf_release_all_vps(cset);
    699 			}
    700 			/* cleanup */
    701 			rf_cleanup_config_set(cset);
    702 			cset = next_cset;
    703 		}
    704 		if (raid_added > 0) {
    705 			/* We added at least one RAID set, so re-scan for recursive RAID */
    706 			ac_list = rf_find_raid_components();
    707 			config_sets = rf_create_auto_sets(ac_list);
    708 		}
    709 	}
    710 
    711 	/* if the user has specified what the root device should be
    712 	   then we don't touch booted_device or boothowto... */
    713 
    714 	if (rootspec != NULL) {
    715 		aprint_debug("%s: rootspec %s\n", __func__, rootspec);
    716 		return;
    717 	}
    718 
    719 	/* we found something bootable... */
    720 	if (num_root == 1) {
    721 		device_t candidate_root = NULL;
    722 		dksc = &rsc->sc_dksc;
    723 
    724 		if (dksc->sc_dkdev.dk_nwedges != 0) {
    725 
    726 			/* Find the wedge we booted from */
    727 			candidate_root = rf_find_bootwedge(rsc);
    728 
    729 			/* Try first partition */
    730 			if (candidate_root == NULL) {
    731 				size_t i = 0;
    732 				candidate_root = dkwedge_find_by_parent(
    733 				    device_xname(dksc->sc_dev), &i);
    734 			}
    735 			aprint_debug("%s: candidate wedge root %s\n",
    736 			    __func__, DEVICE_XNAME(candidate_root));
    737 		} else {
    738 			candidate_root = dksc->sc_dev;
    739 		}
    740 
    741 		aprint_debug("%s: candidate root = %s, booted_device = %s, "
    742 			     "root_partition = %d, contains_boot=%d\n",
    743 		    __func__, DEVICE_XNAME(candidate_root),
    744 		    DEVICE_XNAME(booted_device), rsc->sc_r.root_partition,
    745 		    rf_containsboot(&rsc->sc_r, booted_device));
    746 
    747 		/* XXX the check for booted_device == NULL can probably be
    748 		 * dropped, now that rf_containsboot handles that case.
    749 		 */
    750 		if (booted_device == NULL ||
    751 		    rsc->sc_r.root_partition == 1 ||
    752 		    rf_containsboot(&rsc->sc_r, booted_device)) {
    753 			booted_device = candidate_root;
    754 			booted_method = "raidframe/single";
    755 			booted_partition = 0;	/* XXX assume 'a' */
    756 			aprint_debug("%s: set booted_device = %s\n", __func__,
    757 			    DEVICE_XNAME(booted_device));
    758 		}
    759 	} else if (num_root > 1) {
    760 		aprint_debug("%s: many roots=%d, %s\n", __func__, num_root,
    761 		    DEVICE_XNAME(booted_device));
    762 
    763 		/*
    764 		 * Maybe the MD code can help. If it cannot, then
    765 		 * setroot() will discover that we have no
    766 		 * booted_device and will ask the user if nothing was
    767 		 * hardwired in the kernel config file
    768 		 */
    769 		if (booted_device == NULL)
    770 			return;
    771 
    772 		num_root = 0;
    773 		mutex_enter(&raid_lock);
    774 		LIST_FOREACH(sc, &raids, sc_link) {
    775 			RF_Raid_t *r = &sc->sc_r;
    776 			if (r->valid == 0)
    777 				continue;
    778 
    779 			if (r->root_partition == 0)
    780 				continue;
    781 
    782 			if (rf_containsboot(r, booted_device)) {
    783 				num_root++;
    784 				rsc = sc;
    785 				dksc = &rsc->sc_dksc;
    786 			}
    787 		}
    788 		mutex_exit(&raid_lock);
    789 
    790 		if (num_root == 1) {
    791 			booted_device = dksc->sc_dev;
    792 			booted_method = "raidframe/multi";
    793 			booted_partition = 0;	/* XXX assume 'a' */
    794 		} else {
    795 			/* we can't guess.. require the user to answer... */
    796 			boothowto |= RB_ASKNAME;
    797 		}
    798 	}
    799 }
    800 
    801 static int
    802 raidsize(dev_t dev)
    803 {
    804 	struct raid_softc *rs;
    805 	struct dk_softc *dksc;
    806 	unsigned int unit;
    807 
    808 	unit = raidunit(dev);
    809 	if ((rs = raidget(unit, false)) == NULL)
    810 		return -1;
    811 	dksc = &rs->sc_dksc;
    812 
    813 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    814 		return -1;
    815 
    816 	return dk_size(dksc, dev);
    817 }
    818 
    819 static int
    820 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
    821 {
    822 	unsigned int unit;
    823 	struct raid_softc *rs;
    824 	struct dk_softc *dksc;
    825 
    826 	unit = raidunit(dev);
    827 	if ((rs = raidget(unit, false)) == NULL)
    828 		return ENXIO;
    829 	dksc = &rs->sc_dksc;
    830 
    831 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    832 		return ENODEV;
    833 
    834         /*
    835            Note that blkno is relative to this particular partition.
    836            By adding adding RF_PROTECTED_SECTORS, we get a value that
    837 	   is relative to the partition used for the underlying component.
    838         */
    839 	blkno += RF_PROTECTED_SECTORS;
    840 
    841 	return dk_dump(dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE);
    842 }
    843 
    844 static int
    845 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
    846 {
    847 	struct raid_softc *rs = raidsoftc(dev);
    848 	const struct bdevsw *bdev;
    849 	RF_Raid_t *raidPtr;
    850 	int     c, sparecol, j, scol, dumpto;
    851 	int     error = 0;
    852 
    853 	raidPtr = &rs->sc_r;
    854 
    855 	/* we only support dumping to RAID 1 sets */
    856 	if (raidPtr->Layout.numDataCol != 1 ||
    857 	    raidPtr->Layout.numParityCol != 1)
    858 		return EINVAL;
    859 
    860 	if ((error = raidlock(rs)) != 0)
    861 		return error;
    862 
    863 	/* figure out what device is alive.. */
    864 
    865 	/*
    866 	   Look for a component to dump to.  The preference for the
    867 	   component to dump to is as follows:
    868 	   1) the first component
    869 	   2) a used_spare of the first component
    870 	   3) the second component
    871 	   4) a used_spare of the second component
    872 	*/
    873 
    874 	dumpto = -1;
    875 	for (c = 0; c < raidPtr->numCol; c++) {
    876 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
    877 			/* this might be the one */
    878 			dumpto = c;
    879 			break;
    880 		}
    881 	}
    882 
    883 	/*
    884 	   At this point we have possibly selected a live component.
    885 	   If we didn't find a live ocmponent, we now check to see
    886 	   if there is a relevant spared component.
    887 	*/
    888 
    889 	for (c = 0; c < raidPtr->numSpare; c++) {
    890 		sparecol = raidPtr->numCol + c;
    891 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
    892 			/* How about this one? */
    893 			scol = -1;
    894 			for(j=0;j<raidPtr->numCol;j++) {
    895 				if (raidPtr->Disks[j].spareCol == sparecol) {
    896 					scol = j;
    897 					break;
    898 				}
    899 			}
    900 			if (scol == 0) {
    901 				/*
    902 				   We must have found a spared first
    903 				   component!  We'll take that over
    904 				   anything else found so far.  (We
    905 				   couldn't have found a real first
    906 				   component before, since this is a
    907 				   used spare, and it's saying that
    908 				   it's replacing the first
    909 				   component.)  On reboot (with
    910 				   autoconfiguration turned on)
    911 				   sparecol will become the first
    912 				   component (component0) of this set.
    913 				*/
    914 				dumpto = sparecol;
    915 				break;
    916 			} else if (scol != -1) {
    917 				/*
    918 				   Must be a spared second component.
    919 				   We'll dump to that if we havn't found
    920 				   anything else so far.
    921 				*/
    922 				if (dumpto == -1)
    923 					dumpto = sparecol;
    924 			}
    925 		}
    926 	}
    927 
    928 	if (dumpto == -1) {
    929 		/* we couldn't find any live components to dump to!?!?
    930 		 */
    931 		error = EINVAL;
    932 		goto out;
    933 	}
    934 
    935 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
    936 	if (bdev == NULL) {
    937 		error = ENXIO;
    938 		goto out;
    939 	}
    940 
    941 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
    942 				blkno, va, nblk * raidPtr->bytesPerSector);
    943 
    944 out:
    945 	raidunlock(rs);
    946 
    947 	return error;
    948 }
    949 
    950 /* ARGSUSED */
    951 static int
    952 raidopen(dev_t dev, int flags, int fmt,
    953     struct lwp *l)
    954 {
    955 	int     unit = raidunit(dev);
    956 	struct raid_softc *rs;
    957 	struct dk_softc *dksc;
    958 	int     error = 0;
    959 	int     part, pmask;
    960 
    961 	if ((rs = raidget(unit, true)) == NULL)
    962 		return ENXIO;
    963 	if ((error = raidlock(rs)) != 0)
    964 		return error;
    965 
    966 	if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
    967 		error = EBUSY;
    968 		goto bad;
    969 	}
    970 
    971 	dksc = &rs->sc_dksc;
    972 
    973 	part = DISKPART(dev);
    974 	pmask = (1 << part);
    975 
    976 	if (!DK_BUSY(dksc, pmask) &&
    977 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    978 		/* First one... mark things as dirty... Note that we *MUST*
    979 		 have done a configure before this.  I DO NOT WANT TO BE
    980 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    981 		 THAT THEY BELONG TOGETHER!!!!! */
    982 		/* XXX should check to see if we're only open for reading
    983 		   here... If so, we needn't do this, but then need some
    984 		   other way of keeping track of what's happened.. */
    985 
    986 		rf_markalldirty(&rs->sc_r);
    987 	}
    988 
    989 	if ((rs->sc_flags & RAIDF_INITED) != 0)
    990 		error = dk_open(dksc, dev, flags, fmt, l);
    991 
    992 bad:
    993 	raidunlock(rs);
    994 
    995 	return error;
    996 
    997 
    998 }
    999 
   1000 static int
   1001 raid_lastclose(device_t self)
   1002 {
   1003 	struct raid_softc *rs = raidsoftc(self);
   1004 
   1005 	/* Last one... device is not unconfigured yet.
   1006 	   Device shutdown has taken care of setting the
   1007 	   clean bits if RAIDF_INITED is not set
   1008 	   mark things as clean... */
   1009 
   1010 	rf_update_component_labels(&rs->sc_r,
   1011 	    RF_FINAL_COMPONENT_UPDATE);
   1012 
   1013 	/* pass to unlocked code */
   1014 	if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
   1015 		rs->sc_flags |= RAIDF_DETACH;
   1016 
   1017 	return 0;
   1018 }
   1019 
   1020 /* ARGSUSED */
   1021 static int
   1022 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
   1023 {
   1024 	int     unit = raidunit(dev);
   1025 	struct raid_softc *rs;
   1026 	struct dk_softc *dksc;
   1027 	cfdata_t cf;
   1028 	int     error = 0, do_detach = 0, do_put = 0;
   1029 
   1030 	if ((rs = raidget(unit, false)) == NULL)
   1031 		return ENXIO;
   1032 	dksc = &rs->sc_dksc;
   1033 
   1034 	if ((error = raidlock(rs)) != 0)
   1035 		return error;
   1036 
   1037 	if ((rs->sc_flags & RAIDF_INITED) != 0) {
   1038 		error = dk_close(dksc, dev, flags, fmt, l);
   1039 		if ((rs->sc_flags & RAIDF_DETACH) != 0)
   1040 			do_detach = 1;
   1041 	} else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
   1042 		do_put = 1;
   1043 
   1044 	raidunlock(rs);
   1045 
   1046 	if (do_detach) {
   1047 		/* free the pseudo device attach bits */
   1048 		cf = device_cfdata(dksc->sc_dev);
   1049 		error = config_detach(dksc->sc_dev, 0);
   1050 		if (error == 0)
   1051 			free(cf, M_RAIDFRAME);
   1052 	} else if (do_put) {
   1053 		raidput(rs);
   1054 	}
   1055 
   1056 	return error;
   1057 
   1058 }
   1059 
   1060 static void
   1061 raid_wakeup(RF_Raid_t *raidPtr)
   1062 {
   1063 	rf_lock_mutex2(raidPtr->iodone_lock);
   1064 	rf_signal_cond2(raidPtr->iodone_cv);
   1065 	rf_unlock_mutex2(raidPtr->iodone_lock);
   1066 }
   1067 
   1068 static void
   1069 raidstrategy(struct buf *bp)
   1070 {
   1071 	unsigned int unit;
   1072 	struct raid_softc *rs;
   1073 	struct dk_softc *dksc;
   1074 	RF_Raid_t *raidPtr;
   1075 
   1076 	unit = raidunit(bp->b_dev);
   1077 	if ((rs = raidget(unit, false)) == NULL) {
   1078 		bp->b_error = ENXIO;
   1079 		goto fail;
   1080 	}
   1081 	if ((rs->sc_flags & RAIDF_INITED) == 0) {
   1082 		bp->b_error = ENXIO;
   1083 		goto fail;
   1084 	}
   1085 	dksc = &rs->sc_dksc;
   1086 	raidPtr = &rs->sc_r;
   1087 
   1088 	/* Queue IO only */
   1089 	if (dk_strategy_defer(dksc, bp))
   1090 		goto done;
   1091 
   1092 	/* schedule the IO to happen at the next convenient time */
   1093 	raid_wakeup(raidPtr);
   1094 
   1095 done:
   1096 	return;
   1097 
   1098 fail:
   1099 	bp->b_resid = bp->b_bcount;
   1100 	biodone(bp);
   1101 }
   1102 
   1103 static int
   1104 raid_diskstart(device_t dev, struct buf *bp)
   1105 {
   1106 	struct raid_softc *rs = raidsoftc(dev);
   1107 	RF_Raid_t *raidPtr;
   1108 
   1109 	raidPtr = &rs->sc_r;
   1110 	if (!raidPtr->valid) {
   1111 		db1_printf(("raid is not valid..\n"));
   1112 		return ENODEV;
   1113 	}
   1114 
   1115 	/* XXX */
   1116 	bp->b_resid = 0;
   1117 
   1118 	return raiddoaccess(raidPtr, bp);
   1119 }
   1120 
   1121 void
   1122 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
   1123 {
   1124 	struct raid_softc *rs;
   1125 	struct dk_softc *dksc;
   1126 
   1127 	rs = raidPtr->softc;
   1128 	dksc = &rs->sc_dksc;
   1129 
   1130 	dk_done(dksc, bp);
   1131 
   1132 	rf_lock_mutex2(raidPtr->mutex);
   1133 	raidPtr->openings++;
   1134 	rf_unlock_mutex2(raidPtr->mutex);
   1135 
   1136 	/* schedule more IO */
   1137 	raid_wakeup(raidPtr);
   1138 }
   1139 
   1140 /* ARGSUSED */
   1141 static int
   1142 raidread(dev_t dev, struct uio *uio, int flags)
   1143 {
   1144 	int     unit = raidunit(dev);
   1145 	struct raid_softc *rs;
   1146 
   1147 	if ((rs = raidget(unit, false)) == NULL)
   1148 		return ENXIO;
   1149 
   1150 	if ((rs->sc_flags & RAIDF_INITED) == 0)
   1151 		return ENXIO;
   1152 
   1153 	return physio(raidstrategy, NULL, dev, B_READ, minphys, uio);
   1154 
   1155 }
   1156 
   1157 /* ARGSUSED */
   1158 static int
   1159 raidwrite(dev_t dev, struct uio *uio, int flags)
   1160 {
   1161 	int     unit = raidunit(dev);
   1162 	struct raid_softc *rs;
   1163 
   1164 	if ((rs = raidget(unit, false)) == NULL)
   1165 		return ENXIO;
   1166 
   1167 	if ((rs->sc_flags & RAIDF_INITED) == 0)
   1168 		return ENXIO;
   1169 
   1170 	return physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio);
   1171 
   1172 }
   1173 
   1174 static int
   1175 raid_detach_unlocked(struct raid_softc *rs)
   1176 {
   1177 	struct dk_softc *dksc = &rs->sc_dksc;
   1178 	RF_Raid_t *raidPtr;
   1179 	int error;
   1180 
   1181 	raidPtr = &rs->sc_r;
   1182 
   1183 	if (DK_BUSY(dksc, 0) ||
   1184 	    raidPtr->recon_in_progress != 0 ||
   1185 	    raidPtr->parity_rewrite_in_progress != 0 ||
   1186 	    raidPtr->copyback_in_progress != 0)
   1187 		return EBUSY;
   1188 
   1189 	if ((rs->sc_flags & RAIDF_INITED) == 0)
   1190 		return 0;
   1191 
   1192 	rs->sc_flags &= ~RAIDF_SHUTDOWN;
   1193 
   1194 	if ((error = rf_Shutdown(raidPtr)) != 0)
   1195 		return error;
   1196 
   1197 	rs->sc_flags &= ~RAIDF_INITED;
   1198 
   1199 	/* Kill off any queued buffers */
   1200 	dk_drain(dksc);
   1201 	bufq_free(dksc->sc_bufq);
   1202 
   1203 	/* Detach the disk. */
   1204 	dkwedge_delall(&dksc->sc_dkdev);
   1205 	disk_detach(&dksc->sc_dkdev);
   1206 	disk_destroy(&dksc->sc_dkdev);
   1207 	dk_detach(dksc);
   1208 
   1209 	return 0;
   1210 }
   1211 
   1212 int
   1213 rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr)
   1214 {
   1215 	struct rf_recon_req_internal *rrint;
   1216 
   1217 	if (raidPtr->Layout.map->faultsTolerated == 0) {
   1218 		/* Can't do this on a RAID 0!! */
   1219 		return EINVAL;
   1220 	}
   1221 
   1222 	if (rr->col < 0 || rr->col >= raidPtr->numCol) {
   1223 		/* bad column */
   1224 		return EINVAL;
   1225 	}
   1226 
   1227 	rf_lock_mutex2(raidPtr->mutex);
   1228 	if (raidPtr->status == rf_rs_reconstructing) {
   1229 		/* you can't fail a disk while we're reconstructing! */
   1230 		/* XXX wrong for RAID6 */
   1231 		goto out;
   1232 	}
   1233 	if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) &&
   1234 	    (raidPtr->numFailures > 0)) {
   1235 		/* some other component has failed.  Let's not make
   1236 		   things worse. XXX wrong for RAID6 */
   1237 		goto out;
   1238 	}
   1239 	if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
   1240 		/* Can't fail a spared disk! */
   1241 		goto out;
   1242 	}
   1243 	rf_unlock_mutex2(raidPtr->mutex);
   1244 
   1245 	/* make a copy of the recon request so that we don't rely on
   1246 	 * the user's buffer */
   1247 	rrint = RF_Malloc(sizeof(*rrint));
   1248 	if (rrint == NULL)
   1249 		return(ENOMEM);
   1250 	rrint->col = rr->col;
   1251 	rrint->flags = rr->flags;
   1252 	rrint->raidPtr = raidPtr;
   1253 
   1254 	return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread,
   1255 	    rrint, "raid_recon");
   1256 out:
   1257 	rf_unlock_mutex2(raidPtr->mutex);
   1258 	return EINVAL;
   1259 }
   1260 
   1261 static int
   1262 rf_copyinspecificbuf(RF_Config_t *k_cfg)
   1263 {
   1264 	/* allocate a buffer for the layout-specific data, and copy it in */
   1265 	if (k_cfg->layoutSpecificSize == 0)
   1266 		return 0;
   1267 
   1268 	if (k_cfg->layoutSpecificSize > 10000) {
   1269 	    /* sanity check */
   1270 	    return EINVAL;
   1271 	}
   1272 
   1273 	u_char *specific_buf;
   1274 	specific_buf =  RF_Malloc(k_cfg->layoutSpecificSize);
   1275 	if (specific_buf == NULL)
   1276 		return ENOMEM;
   1277 
   1278 	int retcode = copyin(k_cfg->layoutSpecific, specific_buf,
   1279 	    k_cfg->layoutSpecificSize);
   1280 	if (retcode) {
   1281 		RF_Free(specific_buf, k_cfg->layoutSpecificSize);
   1282 		db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode));
   1283 		return retcode;
   1284 	}
   1285 
   1286 	k_cfg->layoutSpecific = specific_buf;
   1287 	return 0;
   1288 }
   1289 
   1290 static int
   1291 rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg)
   1292 {
   1293 	RF_Config_t *u_cfg = *((RF_Config_t **) data);
   1294 
   1295 	if (rs->sc_r.valid) {
   1296 		/* There is a valid RAID set running on this unit! */
   1297 		printf("raid%d: Device already configured!\n", rs->sc_unit);
   1298 		return EINVAL;
   1299 	}
   1300 
   1301 	/* copy-in the configuration information */
   1302 	/* data points to a pointer to the configuration structure */
   1303 	*k_cfg = RF_Malloc(sizeof(**k_cfg));
   1304 	if (*k_cfg == NULL) {
   1305 		return ENOMEM;
   1306 	}
   1307 	int retcode = copyin(u_cfg, *k_cfg, sizeof(RF_Config_t));
   1308 	if (retcode == 0)
   1309 		return 0;
   1310 	RF_Free(*k_cfg, sizeof(RF_Config_t));
   1311 	db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode));
   1312 	rs->sc_flags |= RAIDF_SHUTDOWN;
   1313 	return retcode;
   1314 }
   1315 
   1316 int
   1317 rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg)
   1318 {
   1319 	int retcode, i;
   1320 	RF_Raid_t *raidPtr = &rs->sc_r;
   1321 
   1322 	rs->sc_flags &= ~RAIDF_SHUTDOWN;
   1323 
   1324 	if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0)
   1325 		goto out;
   1326 
   1327 	/* should do some kind of sanity check on the configuration.
   1328 	 * Store the sum of all the bytes in the last byte? */
   1329 
   1330 	/* Force nul-termination on all strings. */
   1331 #define ZERO_FINAL(s)	do { s[sizeof(s) - 1] = '\0'; } while (0)
   1332 	for (i = 0; i < RF_MAXCOL; i++) {
   1333 		ZERO_FINAL(k_cfg->devnames[0][i]);
   1334 	}
   1335 	for (i = 0; i < RF_MAXSPARE; i++) {
   1336 		ZERO_FINAL(k_cfg->spare_names[i]);
   1337 	}
   1338 	for (i = 0; i < RF_MAXDBGV; i++) {
   1339 		ZERO_FINAL(k_cfg->debugVars[i]);
   1340 	}
   1341 #undef ZERO_FINAL
   1342 
   1343 	/* Check some basic limits. */
   1344 	if (k_cfg->numCol >= RF_MAXCOL || k_cfg->numCol < 0) {
   1345 		retcode = EINVAL;
   1346 		goto out;
   1347 	}
   1348 	if (k_cfg->numSpare >= RF_MAXSPARE || k_cfg->numSpare < 0) {
   1349 		retcode = EINVAL;
   1350 		goto out;
   1351 	}
   1352 
   1353 	/* configure the system */
   1354 
   1355 	/*
   1356 	 * Clear the entire RAID descriptor, just to make sure
   1357 	 *  there is no stale data left in the case of a
   1358 	 *  reconfiguration
   1359 	 */
   1360 	memset(raidPtr, 0, sizeof(*raidPtr));
   1361 	raidPtr->softc = rs;
   1362 	raidPtr->raidid = rs->sc_unit;
   1363 
   1364 	retcode = rf_Configure(raidPtr, k_cfg, NULL);
   1365 
   1366 	if (retcode == 0) {
   1367 		/* allow this many simultaneous IO's to
   1368 		   this RAID device */
   1369 		raidPtr->openings = RAIDOUTSTANDING;
   1370 
   1371 		raidinit(rs);
   1372 		raid_wakeup(raidPtr);
   1373 		rf_markalldirty(raidPtr);
   1374 	}
   1375 
   1376 	/* free the buffers.  No return code here. */
   1377 	if (k_cfg->layoutSpecificSize) {
   1378 		RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize);
   1379 	}
   1380 out:
   1381 	RF_Free(k_cfg, sizeof(RF_Config_t));
   1382 	if (retcode) {
   1383 		/*
   1384 		 * If configuration failed, set sc_flags so that we
   1385 		 * will detach the device when we close it.
   1386 		 */
   1387 		rs->sc_flags |= RAIDF_SHUTDOWN;
   1388 	}
   1389 	return retcode;
   1390 }
   1391 
   1392 #if RF_DISABLED
   1393 static int
   1394 rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   1395 {
   1396 
   1397 	/* XXX check the label for valid stuff... */
   1398 	/* Note that some things *should not* get modified --
   1399 	   the user should be re-initing the labels instead of
   1400 	   trying to patch things.
   1401 	   */
   1402 #ifdef DEBUG
   1403 	int raidid = raidPtr->raidid;
   1404 	printf("raid%d: Got component label:\n", raidid);
   1405 	printf("raid%d: Version: %d\n", raidid, clabel->version);
   1406 	printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1407 	printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1408 	printf("raid%d: Column: %d\n", raidid, clabel->column);
   1409 	printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1410 	printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1411 	printf("raid%d: Status: %d\n", raidid, clabel->status);
   1412 #endif	/* DEBUG */
   1413 	clabel->row = 0;
   1414 	int column = clabel->column;
   1415 
   1416 	if ((column < 0) || (column >= raidPtr->numCol)) {
   1417 		return(EINVAL);
   1418 	}
   1419 
   1420 	/* XXX this isn't allowed to do anything for now :-) */
   1421 
   1422 	/* XXX and before it is, we need to fill in the rest
   1423 	   of the fields!?!?!?! */
   1424 	memcpy(raidget_component_label(raidPtr, column),
   1425 	    clabel, sizeof(*clabel));
   1426 	raidflush_component_label(raidPtr, column);
   1427 	return 0;
   1428 }
   1429 #endif
   1430 
   1431 static int
   1432 rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   1433 {
   1434 	/*
   1435 	   we only want the serial number from
   1436 	   the above.  We get all the rest of the information
   1437 	   from the config that was used to create this RAID
   1438 	   set.
   1439 	   */
   1440 
   1441 	raidPtr->serial_number = clabel->serial_number;
   1442 
   1443 	for (int column = 0; column < raidPtr->numCol; column++) {
   1444 		RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column];
   1445 		if (RF_DEAD_DISK(diskPtr->status))
   1446 			continue;
   1447 		RF_ComponentLabel_t *ci_label = raidget_component_label(
   1448 		    raidPtr, column);
   1449 		/* Zeroing this is important. */
   1450 		memset(ci_label, 0, sizeof(*ci_label));
   1451 		raid_init_component_label(raidPtr, ci_label);
   1452 		ci_label->serial_number = raidPtr->serial_number;
   1453 		ci_label->row = 0; /* we dont' pretend to support more */
   1454 		rf_component_label_set_partitionsize(ci_label,
   1455 		    diskPtr->partitionSize);
   1456 		ci_label->column = column;
   1457 		raidflush_component_label(raidPtr, column);
   1458 		/* XXXjld what about the spares? */
   1459 	}
   1460 
   1461 	return 0;
   1462 }
   1463 
   1464 static int
   1465 rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr)
   1466 {
   1467 
   1468 	if (raidPtr->Layout.map->faultsTolerated == 0) {
   1469 		/* Can't do this on a RAID 0!! */
   1470 		return EINVAL;
   1471 	}
   1472 
   1473 	if (raidPtr->recon_in_progress == 1) {
   1474 		/* a reconstruct is already in progress! */
   1475 		return EINVAL;
   1476 	}
   1477 
   1478 	RF_SingleComponent_t component;
   1479 	memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
   1480 	component.row = 0; /* we don't support any more */
   1481 	int column = component.column;
   1482 
   1483 	if ((column < 0) || (column >= raidPtr->numCol)) {
   1484 		return EINVAL;
   1485 	}
   1486 
   1487 	rf_lock_mutex2(raidPtr->mutex);
   1488 	if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
   1489 	    (raidPtr->numFailures > 0)) {
   1490 		/* XXX 0 above shouldn't be constant!!! */
   1491 		/* some component other than this has failed.
   1492 		   Let's not make things worse than they already
   1493 		   are... */
   1494 		printf("raid%d: Unable to reconstruct to disk at:\n",
   1495 		       raidPtr->raidid);
   1496 		printf("raid%d:     Col: %d   Too many failures.\n",
   1497 		       raidPtr->raidid, column);
   1498 		rf_unlock_mutex2(raidPtr->mutex);
   1499 		return EINVAL;
   1500 	}
   1501 
   1502 	if (raidPtr->Disks[column].status == rf_ds_reconstructing) {
   1503 		printf("raid%d: Unable to reconstruct to disk at:\n",
   1504 		       raidPtr->raidid);
   1505 		printf("raid%d:    Col: %d   "
   1506 		    "Reconstruction already occurring!\n",
   1507 		    raidPtr->raidid, column);
   1508 
   1509 		rf_unlock_mutex2(raidPtr->mutex);
   1510 		return EINVAL;
   1511 	}
   1512 
   1513 	if (raidPtr->Disks[column].status == rf_ds_spared) {
   1514 		rf_unlock_mutex2(raidPtr->mutex);
   1515 		return EINVAL;
   1516 	}
   1517 
   1518 	rf_unlock_mutex2(raidPtr->mutex);
   1519 
   1520 	struct rf_recon_req_internal *rrint;
   1521 	rrint = RF_Malloc(sizeof(*rrint));
   1522 	if (rrint == NULL)
   1523 		return ENOMEM;
   1524 
   1525 	rrint->col = column;
   1526 	rrint->raidPtr = raidPtr;
   1527 
   1528 	return RF_CREATE_THREAD(raidPtr->recon_thread,
   1529 	    rf_ReconstructInPlaceThread, rrint, "raid_reconip");
   1530 }
   1531 
   1532 static int
   1533 rf_check_recon_status(RF_Raid_t *raidPtr, int *data)
   1534 {
   1535 	/*
   1536 	 * This makes no sense on a RAID 0, or if we are not reconstructing
   1537 	 * so tell the user it's done.
   1538 	 */
   1539 	if (raidPtr->Layout.map->faultsTolerated == 0 ||
   1540 	    raidPtr->status != rf_rs_reconstructing) {
   1541 		*data = 100;
   1542 		return 0;
   1543 	}
   1544 	if (raidPtr->reconControl->numRUsTotal == 0) {
   1545 		*data = 0;
   1546 		return 0;
   1547 	}
   1548 	*data = (raidPtr->reconControl->numRUsComplete * 100
   1549 	    / raidPtr->reconControl->numRUsTotal);
   1550 	return 0;
   1551 }
   1552 
   1553 /*
   1554  * Copy a RF_SingleComponent_t from 'data', ensuring nul-termination
   1555  * on the component_name[] array.
   1556  */
   1557 static void
   1558 rf_copy_single_component(RF_SingleComponent_t *component, void *data)
   1559 {
   1560 
   1561 	memcpy(component, data, sizeof *component);
   1562 	component->component_name[sizeof(component->component_name) - 1] = '\0';
   1563 }
   1564 
   1565 static int
   1566 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
   1567 {
   1568 	int     unit = raidunit(dev);
   1569 	int     part, pmask;
   1570 	struct raid_softc *rs;
   1571 	struct dk_softc *dksc;
   1572 	RF_Config_t *k_cfg;
   1573 	RF_Raid_t *raidPtr;
   1574 	RF_AccTotals_t *totals;
   1575 	RF_SingleComponent_t component;
   1576 	RF_DeviceConfig_t *d_cfg, *ucfgp;
   1577 	int retcode = 0;
   1578 	int column;
   1579 	RF_ComponentLabel_t *clabel;
   1580 	int d;
   1581 
   1582 	if ((rs = raidget(unit, false)) == NULL)
   1583 		return ENXIO;
   1584 
   1585 	dksc = &rs->sc_dksc;
   1586 	raidPtr = &rs->sc_r;
   1587 
   1588 	db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
   1589 	    (int) DISKPART(dev), (int) unit, cmd));
   1590 
   1591 	/* Only CONFIGURE and RESCAN can be done without the RAID being initialized. */
   1592 	switch (cmd) {
   1593 	case RAIDFRAME_CONFIGURE:
   1594 	case RAIDFRAME_RESCAN:
   1595 		break;
   1596 	default:
   1597 		if (!rf_inited(rs))
   1598 			return ENXIO;
   1599 	}
   1600 
   1601 	switch (cmd) {
   1602 		/* configure the system */
   1603 	case RAIDFRAME_CONFIGURE:
   1604 		if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0)
   1605 			return retcode;
   1606 		return rf_construct(rs, k_cfg);
   1607 
   1608 		/* shutdown the system */
   1609 	case RAIDFRAME_SHUTDOWN:
   1610 
   1611 		part = DISKPART(dev);
   1612 		pmask = (1 << part);
   1613 
   1614 		if ((retcode = raidlock(rs)) != 0)
   1615 			return retcode;
   1616 
   1617 		if (DK_BUSY(dksc, pmask) ||
   1618 		    raidPtr->recon_in_progress != 0 ||
   1619 		    raidPtr->parity_rewrite_in_progress != 0 ||
   1620 		    raidPtr->copyback_in_progress != 0)
   1621 			retcode = EBUSY;
   1622 		else {
   1623 			/* detach and free on close */
   1624 			rs->sc_flags |= RAIDF_SHUTDOWN;
   1625 			retcode = 0;
   1626 		}
   1627 
   1628 		raidunlock(rs);
   1629 
   1630 		return retcode;
   1631 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1632 		return rf_get_component_label(raidPtr, data);
   1633 
   1634 #if RF_DISABLED
   1635 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1636 		return rf_set_component_label(raidPtr, data);
   1637 #endif
   1638 
   1639 	case RAIDFRAME_INIT_LABELS:
   1640 		return rf_init_component_label(raidPtr, data);
   1641 
   1642 	case RAIDFRAME_SET_AUTOCONFIG:
   1643 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1644 		printf("raid%d: New autoconfig value is: %d\n",
   1645 		       raidPtr->raidid, d);
   1646 		*(int *) data = d;
   1647 		return retcode;
   1648 
   1649 	case RAIDFRAME_SET_ROOT:
   1650 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1651 		printf("raid%d: New rootpartition value is: %d\n",
   1652 		       raidPtr->raidid, d);
   1653 		*(int *) data = d;
   1654 		return retcode;
   1655 
   1656 		/* initialize all parity */
   1657 	case RAIDFRAME_REWRITEPARITY:
   1658 
   1659 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1660 			/* Parity for RAID 0 is trivially correct */
   1661 			raidPtr->parity_good = RF_RAID_CLEAN;
   1662 			return 0;
   1663 		}
   1664 
   1665 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1666 			/* Re-write is already in progress! */
   1667 			return EINVAL;
   1668 		}
   1669 
   1670 		return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1671 		    rf_RewriteParityThread, raidPtr,"raid_parity");
   1672 
   1673 	case RAIDFRAME_ADD_HOT_SPARE:
   1674 		rf_copy_single_component(&component, data);
   1675 		return rf_add_hot_spare(raidPtr, &component);
   1676 
   1677 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1678 		return retcode;
   1679 
   1680 	case RAIDFRAME_DELETE_COMPONENT:
   1681 		rf_copy_single_component(&component, data);
   1682 		return rf_delete_component(raidPtr, &component);
   1683 
   1684 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1685 		rf_copy_single_component(&component, data);
   1686 		return rf_incorporate_hot_spare(raidPtr, &component);
   1687 
   1688 	case RAIDFRAME_REBUILD_IN_PLACE:
   1689 		return rf_rebuild_in_place(raidPtr, data);
   1690 
   1691 	case RAIDFRAME_GET_INFO:
   1692 		ucfgp = *(RF_DeviceConfig_t **)data;
   1693 		d_cfg = RF_Malloc(sizeof(*d_cfg));
   1694 		if (d_cfg == NULL)
   1695 			return ENOMEM;
   1696 		retcode = rf_get_info(raidPtr, d_cfg);
   1697 		if (retcode == 0) {
   1698 			retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg));
   1699 		}
   1700 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1701 		return retcode;
   1702 
   1703 	case RAIDFRAME_CHECK_PARITY:
   1704 		*(int *) data = raidPtr->parity_good;
   1705 		return 0;
   1706 
   1707 	case RAIDFRAME_PARITYMAP_STATUS:
   1708 		if (rf_paritymap_ineligible(raidPtr))
   1709 			return EINVAL;
   1710 		rf_paritymap_status(raidPtr->parity_map, data);
   1711 		return 0;
   1712 
   1713 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
   1714 		if (rf_paritymap_ineligible(raidPtr))
   1715 			return EINVAL;
   1716 		if (raidPtr->parity_map == NULL)
   1717 			return ENOENT; /* ??? */
   1718 		if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0)
   1719 			return EINVAL;
   1720 		return 0;
   1721 
   1722 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
   1723 		if (rf_paritymap_ineligible(raidPtr))
   1724 			return EINVAL;
   1725 		*(int *) data = rf_paritymap_get_disable(raidPtr);
   1726 		return 0;
   1727 
   1728 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
   1729 		if (rf_paritymap_ineligible(raidPtr))
   1730 			return EINVAL;
   1731 		rf_paritymap_set_disable(raidPtr, *(int *)data);
   1732 		/* XXX should errors be passed up? */
   1733 		return 0;
   1734 
   1735 	case RAIDFRAME_RESCAN:
   1736 		return rf_rescan();
   1737 
   1738 	case RAIDFRAME_RESET_ACCTOTALS:
   1739 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1740 		return 0;
   1741 
   1742 	case RAIDFRAME_GET_ACCTOTALS:
   1743 		totals = (RF_AccTotals_t *) data;
   1744 		*totals = raidPtr->acc_totals;
   1745 		return 0;
   1746 
   1747 	case RAIDFRAME_KEEP_ACCTOTALS:
   1748 		raidPtr->keep_acc_totals = *(int *)data;
   1749 		return 0;
   1750 
   1751 	case RAIDFRAME_GET_SIZE:
   1752 		*(int *) data = raidPtr->totalSectors;
   1753 		return 0;
   1754 
   1755 	case RAIDFRAME_FAIL_DISK:
   1756 		return rf_fail_disk(raidPtr, data);
   1757 
   1758 		/* invoke a copyback operation after recon on whatever disk
   1759 		 * needs it, if any */
   1760 	case RAIDFRAME_COPYBACK:
   1761 
   1762 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1763 			/* This makes no sense on a RAID 0!! */
   1764 			return EINVAL;
   1765 		}
   1766 
   1767 		if (raidPtr->copyback_in_progress == 1) {
   1768 			/* Copyback is already in progress! */
   1769 			return EINVAL;
   1770 		}
   1771 
   1772 		return RF_CREATE_THREAD(raidPtr->copyback_thread,
   1773 		    rf_CopybackThread, raidPtr, "raid_copyback");
   1774 
   1775 		/* return the percentage completion of reconstruction */
   1776 	case RAIDFRAME_CHECK_RECON_STATUS:
   1777 		return rf_check_recon_status(raidPtr, data);
   1778 
   1779 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1780 		rf_check_recon_status_ext(raidPtr, data);
   1781 		return 0;
   1782 
   1783 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1784 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1785 			/* This makes no sense on a RAID 0, so tell the
   1786 			   user it's done. */
   1787 			*(int *) data = 100;
   1788 			return 0;
   1789 		}
   1790 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1791 			*(int *) data = 100 *
   1792 				raidPtr->parity_rewrite_stripes_done /
   1793 				raidPtr->Layout.numStripe;
   1794 		} else {
   1795 			*(int *) data = 100;
   1796 		}
   1797 		return 0;
   1798 
   1799 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1800 		rf_check_parityrewrite_status_ext(raidPtr, data);
   1801 		return 0;
   1802 
   1803 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1804 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1805 			/* This makes no sense on a RAID 0 */
   1806 			*(int *) data = 100;
   1807 			return 0;
   1808 		}
   1809 		if (raidPtr->copyback_in_progress == 1) {
   1810 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1811 				raidPtr->Layout.numStripe;
   1812 		} else {
   1813 			*(int *) data = 100;
   1814 		}
   1815 		return 0;
   1816 
   1817 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1818 		rf_check_copyback_status_ext(raidPtr, data);
   1819 		return 0;
   1820 
   1821 	case RAIDFRAME_SET_LAST_UNIT:
   1822 		for (column = 0; column < raidPtr->numCol; column++)
   1823 			if (raidPtr->Disks[column].status != rf_ds_optimal)
   1824 				return EBUSY;
   1825 
   1826 		for (column = 0; column < raidPtr->numCol; column++) {
   1827 			clabel = raidget_component_label(raidPtr, column);
   1828 			clabel->last_unit = *(int *)data;
   1829 			raidflush_component_label(raidPtr, column);
   1830 		}
   1831 		rs->sc_cflags |= RAIDF_UNIT_CHANGED;
   1832 		return 0;
   1833 
   1834 		/* the sparetable daemon calls this to wait for the kernel to
   1835 		 * need a spare table. this ioctl does not return until a
   1836 		 * spare table is needed. XXX -- calling mpsleep here in the
   1837 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1838 		 * -- I should either compute the spare table in the kernel,
   1839 		 * or have a different -- XXX XXX -- interface (a different
   1840 		 * character device) for delivering the table     -- XXX */
   1841 #if RF_DISABLED
   1842 	case RAIDFRAME_SPARET_WAIT:
   1843 		rf_lock_mutex2(rf_sparet_wait_mutex);
   1844 		while (!rf_sparet_wait_queue)
   1845 			rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
   1846 		RF_SparetWait_t *waitreq = rf_sparet_wait_queue;
   1847 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1848 		rf_unlock_mutex2(rf_sparet_wait_mutex);
   1849 
   1850 		/* structure assignment */
   1851 		*((RF_SparetWait_t *) data) = *waitreq;
   1852 
   1853 		RF_Free(waitreq, sizeof(*waitreq));
   1854 		return 0;
   1855 
   1856 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1857 		 * code in it that will cause the dameon to exit */
   1858 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1859 		waitreq = RF_Malloc(sizeof(*waitreq));
   1860 		waitreq->fcol = -1;
   1861 		rf_lock_mutex2(rf_sparet_wait_mutex);
   1862 		waitreq->next = rf_sparet_wait_queue;
   1863 		rf_sparet_wait_queue = waitreq;
   1864 		rf_broadcast_cond2(rf_sparet_wait_cv);
   1865 		rf_unlock_mutex2(rf_sparet_wait_mutex);
   1866 		return 0;
   1867 
   1868 		/* used by the spare table daemon to deliver a spare table
   1869 		 * into the kernel */
   1870 	case RAIDFRAME_SEND_SPARET:
   1871 
   1872 		/* install the spare table */
   1873 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1874 
   1875 		/* respond to the requestor.  the return status of the spare
   1876 		 * table installation is passed in the "fcol" field */
   1877 		waitred = RF_Malloc(sizeof(*waitreq));
   1878 		waitreq->fcol = retcode;
   1879 		rf_lock_mutex2(rf_sparet_wait_mutex);
   1880 		waitreq->next = rf_sparet_resp_queue;
   1881 		rf_sparet_resp_queue = waitreq;
   1882 		rf_broadcast_cond2(rf_sparet_resp_cv);
   1883 		rf_unlock_mutex2(rf_sparet_wait_mutex);
   1884 
   1885 		return retcode;
   1886 #endif
   1887 	default:
   1888 		/*
   1889 		 * Don't bother trying to load compat modules
   1890 		 * if it is not our ioctl. This is more efficient
   1891 		 * and makes rump tests not depend on compat code
   1892 		 */
   1893 		if (IOCGROUP(cmd) != 'r')
   1894 			break;
   1895 #ifdef _LP64
   1896 		if ((l->l_proc->p_flag & PK_32) != 0) {
   1897 			module_autoload("compat_netbsd32_raid",
   1898 			    MODULE_CLASS_EXEC);
   1899 			MODULE_HOOK_CALL(raidframe_netbsd32_ioctl_hook,
   1900 			    (rs, cmd, data), enosys(), retcode);
   1901 			if (retcode != EPASSTHROUGH)
   1902 				return retcode;
   1903 		}
   1904 #endif
   1905 		module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
   1906 		MODULE_HOOK_CALL(raidframe_ioctl_80_hook,
   1907 		    (rs, cmd, data), enosys(), retcode);
   1908 		if (retcode != EPASSTHROUGH)
   1909 			return retcode;
   1910 
   1911 		module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
   1912 		MODULE_HOOK_CALL(raidframe_ioctl_50_hook,
   1913 		    (rs, cmd, data), enosys(), retcode);
   1914 		if (retcode != EPASSTHROUGH)
   1915 			return retcode;
   1916 		break; /* fall through to the os-specific code below */
   1917 
   1918 	}
   1919 
   1920 	if (!raidPtr->valid)
   1921 		return EINVAL;
   1922 
   1923 	/*
   1924 	 * Add support for "regular" device ioctls here.
   1925 	 */
   1926 
   1927 	switch (cmd) {
   1928 	case DIOCGCACHE:
   1929 		retcode = rf_get_component_caches(raidPtr, (int *)data);
   1930 		break;
   1931 
   1932 	case DIOCCACHESYNC:
   1933 		retcode = rf_sync_component_caches(raidPtr, *(int *)data);
   1934 		break;
   1935 
   1936 	default:
   1937 		retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
   1938 		break;
   1939 	}
   1940 
   1941 	return retcode;
   1942 
   1943 }
   1944 
   1945 
   1946 /* raidinit -- complete the rest of the initialization for the
   1947    RAIDframe device.  */
   1948 
   1949 
   1950 static void
   1951 raidinit(struct raid_softc *rs)
   1952 {
   1953 	cfdata_t cf;
   1954 	unsigned int unit;
   1955 	struct dk_softc *dksc = &rs->sc_dksc;
   1956 	RF_Raid_t *raidPtr = &rs->sc_r;
   1957 	device_t dev;
   1958 
   1959 	unit = raidPtr->raidid;
   1960 
   1961 	/* XXX doesn't check bounds. */
   1962 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
   1963 
   1964 	/* attach the pseudo device */
   1965 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
   1966 	cf->cf_name = raid_cd.cd_name;
   1967 	cf->cf_atname = raid_cd.cd_name;
   1968 	cf->cf_unit = unit;
   1969 	cf->cf_fstate = FSTATE_STAR;
   1970 
   1971 	dev = config_attach_pseudo(cf);
   1972 	if (dev == NULL) {
   1973 		printf("raid%d: config_attach_pseudo failed\n",
   1974 		    raidPtr->raidid);
   1975 		free(cf, M_RAIDFRAME);
   1976 		return;
   1977 	}
   1978 
   1979 	/* provide a backpointer to the real softc */
   1980 	raidsoftc(dev) = rs;
   1981 
   1982 	/* disk_attach actually creates space for the CPU disklabel, among
   1983 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1984 	 * with disklabels. */
   1985 	dk_init(dksc, dev, DKTYPE_RAID);
   1986 	disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
   1987 
   1988 	/* XXX There may be a weird interaction here between this, and
   1989 	 * protectedSectors, as used in RAIDframe.  */
   1990 
   1991 	rs->sc_size = raidPtr->totalSectors;
   1992 
   1993 	/* Attach dk and disk subsystems */
   1994 	dk_attach(dksc);
   1995 	disk_attach(&dksc->sc_dkdev);
   1996 	rf_set_geometry(rs, raidPtr);
   1997 
   1998 	bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
   1999 
   2000 	/* mark unit as usuable */
   2001 	rs->sc_flags |= RAIDF_INITED;
   2002 
   2003 	dkwedge_discover(&dksc->sc_dkdev);
   2004 }
   2005 
   2006 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   2007 /* wake up the daemon & tell it to get us a spare table
   2008  * XXX
   2009  * the entries in the queues should be tagged with the raidPtr
   2010  * so that in the extremely rare case that two recons happen at once,
   2011  * we know for which device were requesting a spare table
   2012  * XXX
   2013  *
   2014  * XXX This code is not currently used. GO
   2015  */
   2016 int
   2017 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
   2018 {
   2019 	int     retcode;
   2020 
   2021 	rf_lock_mutex2(rf_sparet_wait_mutex);
   2022 	req->next = rf_sparet_wait_queue;
   2023 	rf_sparet_wait_queue = req;
   2024 	rf_broadcast_cond2(rf_sparet_wait_cv);
   2025 
   2026 	/* mpsleep unlocks the mutex */
   2027 	while (!rf_sparet_resp_queue) {
   2028 		rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
   2029 	}
   2030 	req = rf_sparet_resp_queue;
   2031 	rf_sparet_resp_queue = req->next;
   2032 	rf_unlock_mutex2(rf_sparet_wait_mutex);
   2033 
   2034 	retcode = req->fcol;
   2035 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   2036 					 * alloc'd */
   2037 	return retcode;
   2038 }
   2039 #endif
   2040 
   2041 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   2042  * bp & passes it down.
   2043  * any calls originating in the kernel must use non-blocking I/O
   2044  * do some extra sanity checking to return "appropriate" error values for
   2045  * certain conditions (to make some standard utilities work)
   2046  *
   2047  * Formerly known as: rf_DoAccessKernel
   2048  */
   2049 void
   2050 raidstart(RF_Raid_t *raidPtr)
   2051 {
   2052 	struct raid_softc *rs;
   2053 	struct dk_softc *dksc;
   2054 
   2055 	rs = raidPtr->softc;
   2056 	dksc = &rs->sc_dksc;
   2057 	/* quick check to see if anything has died recently */
   2058 	rf_lock_mutex2(raidPtr->mutex);
   2059 	if (raidPtr->numNewFailures > 0) {
   2060 		rf_unlock_mutex2(raidPtr->mutex);
   2061 		rf_update_component_labels(raidPtr,
   2062 					   RF_NORMAL_COMPONENT_UPDATE);
   2063 		rf_lock_mutex2(raidPtr->mutex);
   2064 		raidPtr->numNewFailures--;
   2065 	}
   2066 	rf_unlock_mutex2(raidPtr->mutex);
   2067 
   2068 	if ((rs->sc_flags & RAIDF_INITED) == 0) {
   2069 		printf("raid%d: raidstart not ready\n", raidPtr->raidid);
   2070 		return;
   2071 	}
   2072 
   2073 	dk_start(dksc, NULL);
   2074 }
   2075 
   2076 static int
   2077 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
   2078 {
   2079 	RF_SectorCount_t num_blocks, pb, sum;
   2080 	RF_RaidAddr_t raid_addr;
   2081 	daddr_t blocknum;
   2082 	int rc;
   2083 
   2084 	rf_lock_mutex2(raidPtr->mutex);
   2085 	if (raidPtr->openings == 0) {
   2086 		rf_unlock_mutex2(raidPtr->mutex);
   2087 		return EAGAIN;
   2088 	}
   2089 	rf_unlock_mutex2(raidPtr->mutex);
   2090 
   2091 	blocknum = bp->b_rawblkno;
   2092 
   2093 	db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   2094 		    (int) blocknum));
   2095 
   2096 	db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   2097 	db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   2098 
   2099 	/* *THIS* is where we adjust what block we're going to...
   2100 	 * but DO NOT TOUCH bp->b_blkno!!! */
   2101 	raid_addr = blocknum;
   2102 
   2103 	num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   2104 	pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   2105 	sum = raid_addr + num_blocks + pb;
   2106 	if (1 || rf_debugKernelAccess) {
   2107 		db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   2108 			    (int) raid_addr, (int) sum, (int) num_blocks,
   2109 			    (int) pb, (int) bp->b_resid));
   2110 	}
   2111 	if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   2112 	    || (sum < num_blocks) || (sum < pb)) {
   2113 		rc = ENOSPC;
   2114 		goto done;
   2115 	}
   2116 	/*
   2117 	 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   2118 	 */
   2119 
   2120 	if (bp->b_bcount & raidPtr->sectorMask) {
   2121 		rc = ENOSPC;
   2122 		goto done;
   2123 	}
   2124 	db1_printf(("Calling DoAccess..\n"));
   2125 
   2126 
   2127 	rf_lock_mutex2(raidPtr->mutex);
   2128 	raidPtr->openings--;
   2129 	rf_unlock_mutex2(raidPtr->mutex);
   2130 
   2131 	/* don't ever condition on bp->b_flags & B_WRITE.
   2132 	 * always condition on B_READ instead */
   2133 
   2134 	rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   2135 			 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   2136 			 raid_addr, num_blocks,
   2137 			 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   2138 
   2139 done:
   2140 	return rc;
   2141 }
   2142 
   2143 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   2144 
   2145 int
   2146 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
   2147 {
   2148 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   2149 	struct buf *bp;
   2150 
   2151 	req->queue = queue;
   2152 	bp = req->bp;
   2153 
   2154 	switch (req->type) {
   2155 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   2156 		/* XXX need to do something extra here.. */
   2157 		/* I'm leaving this in, as I've never actually seen it used,
   2158 		 * and I'd like folks to report it... GO */
   2159 		printf("%s: WAKEUP CALLED\n", __func__);
   2160 		queue->numOutstanding++;
   2161 
   2162 		bp->b_flags = 0;
   2163 		bp->b_private = req;
   2164 
   2165 		KernelWakeupFunc(bp);
   2166 		break;
   2167 
   2168 	case RF_IO_TYPE_READ:
   2169 	case RF_IO_TYPE_WRITE:
   2170 #if RF_ACC_TRACE > 0
   2171 		if (req->tracerec) {
   2172 			RF_ETIMER_START(req->tracerec->timer);
   2173 		}
   2174 #endif
   2175 		InitBP(bp, queue->rf_cinfo->ci_vp,
   2176 		    op, queue->rf_cinfo->ci_dev,
   2177 		    req->sectorOffset, req->numSector,
   2178 		    req->buf, KernelWakeupFunc, (void *) req,
   2179 		    queue->raidPtr->logBytesPerSector);
   2180 
   2181 		if (rf_debugKernelAccess) {
   2182 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   2183 				(long) bp->b_blkno));
   2184 		}
   2185 		queue->numOutstanding++;
   2186 		queue->last_deq_sector = req->sectorOffset;
   2187 		/* acc wouldn't have been let in if there were any pending
   2188 		 * reqs at any other priority */
   2189 		queue->curPriority = req->priority;
   2190 
   2191 		db1_printf(("Going for %c to unit %d col %d\n",
   2192 			    req->type, queue->raidPtr->raidid,
   2193 			    queue->col));
   2194 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   2195 			(int) req->sectorOffset, (int) req->numSector,
   2196 			(int) (req->numSector <<
   2197 			    queue->raidPtr->logBytesPerSector),
   2198 			(int) queue->raidPtr->logBytesPerSector));
   2199 
   2200 		/*
   2201 		 * XXX: drop lock here since this can block at
   2202 		 * least with backing SCSI devices.  Retake it
   2203 		 * to minimize fuss with calling interfaces.
   2204 		 */
   2205 
   2206 		RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
   2207 		bdev_strategy(bp);
   2208 		RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
   2209 		break;
   2210 
   2211 	default:
   2212 		panic("bad req->type in rf_DispatchKernelIO");
   2213 	}
   2214 	db1_printf(("Exiting from DispatchKernelIO\n"));
   2215 
   2216 	return 0;
   2217 }
   2218 /* this is the callback function associated with a I/O invoked from
   2219    kernel code.
   2220  */
   2221 static void
   2222 KernelWakeupFunc(struct buf *bp)
   2223 {
   2224 	RF_DiskQueueData_t *req = NULL;
   2225 	RF_DiskQueue_t *queue;
   2226 
   2227 	db1_printf(("recovering the request queue:\n"));
   2228 
   2229 	req = bp->b_private;
   2230 
   2231 	queue = (RF_DiskQueue_t *) req->queue;
   2232 
   2233 	rf_lock_mutex2(queue->raidPtr->iodone_lock);
   2234 
   2235 #if RF_ACC_TRACE > 0
   2236 	if (req->tracerec) {
   2237 		RF_ETIMER_STOP(req->tracerec->timer);
   2238 		RF_ETIMER_EVAL(req->tracerec->timer);
   2239 		rf_lock_mutex2(rf_tracing_mutex);
   2240 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2241 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2242 		req->tracerec->num_phys_ios++;
   2243 		rf_unlock_mutex2(rf_tracing_mutex);
   2244 	}
   2245 #endif
   2246 
   2247 	/* XXX Ok, let's get aggressive... If b_error is set, let's go
   2248 	 * ballistic, and mark the component as hosed... */
   2249 
   2250 	if (bp->b_error != 0) {
   2251 		/* Mark the disk as dead */
   2252 		/* but only mark it once... */
   2253 		/* and only if it wouldn't leave this RAID set
   2254 		   completely broken */
   2255 		if (((queue->raidPtr->Disks[queue->col].status ==
   2256 		      rf_ds_optimal) ||
   2257 		     (queue->raidPtr->Disks[queue->col].status ==
   2258 		      rf_ds_used_spare)) &&
   2259 		     (queue->raidPtr->numFailures <
   2260 		      queue->raidPtr->Layout.map->faultsTolerated)) {
   2261 			printf("raid%d: IO Error (%d). Marking %s as failed.\n",
   2262 			       queue->raidPtr->raidid,
   2263 			       bp->b_error,
   2264 			       queue->raidPtr->Disks[queue->col].devname);
   2265 			queue->raidPtr->Disks[queue->col].status =
   2266 			    rf_ds_failed;
   2267 			queue->raidPtr->status = rf_rs_degraded;
   2268 			queue->raidPtr->numFailures++;
   2269 			queue->raidPtr->numNewFailures++;
   2270 		} else {	/* Disk is already dead... */
   2271 			/* printf("Disk already marked as dead!\n"); */
   2272 		}
   2273 
   2274 	}
   2275 
   2276 	/* Fill in the error value */
   2277 	req->error = bp->b_error;
   2278 
   2279 	/* Drop this one on the "finished" queue... */
   2280 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   2281 
   2282 	/* Let the raidio thread know there is work to be done. */
   2283 	rf_signal_cond2(queue->raidPtr->iodone_cv);
   2284 
   2285 	rf_unlock_mutex2(queue->raidPtr->iodone_lock);
   2286 }
   2287 
   2288 
   2289 /*
   2290  * initialize a buf structure for doing an I/O in the kernel.
   2291  */
   2292 static void
   2293 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
   2294        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
   2295        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector)
   2296 {
   2297 	bp->b_flags = rw_flag | (bp->b_flags & rf_b_pass);
   2298 	bp->b_oflags = 0;
   2299 	bp->b_cflags = 0;
   2300 	bp->b_bcount = numSect << logBytesPerSector;
   2301 	bp->b_bufsize = bp->b_bcount;
   2302 	bp->b_error = 0;
   2303 	bp->b_dev = dev;
   2304 	bp->b_data = bf;
   2305 	bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
   2306 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   2307 	if (bp->b_bcount == 0) {
   2308 		panic("bp->b_bcount is zero in InitBP!!");
   2309 	}
   2310 	bp->b_iodone = cbFunc;
   2311 	bp->b_private = cbArg;
   2312 }
   2313 
   2314 /*
   2315  * Wait interruptibly for an exclusive lock.
   2316  *
   2317  * XXX
   2318  * Several drivers do this; it should be abstracted and made MP-safe.
   2319  * (Hmm... where have we seen this warning before :->  GO )
   2320  */
   2321 static int
   2322 raidlock(struct raid_softc *rs)
   2323 {
   2324 	int     error;
   2325 
   2326 	error = 0;
   2327 	mutex_enter(&rs->sc_mutex);
   2328 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2329 		rs->sc_flags |= RAIDF_WANTED;
   2330 		error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
   2331 		if (error != 0)
   2332 			goto done;
   2333 	}
   2334 	rs->sc_flags |= RAIDF_LOCKED;
   2335 done:
   2336 	mutex_exit(&rs->sc_mutex);
   2337 	return error;
   2338 }
   2339 /*
   2340  * Unlock and wake up any waiters.
   2341  */
   2342 static void
   2343 raidunlock(struct raid_softc *rs)
   2344 {
   2345 
   2346 	mutex_enter(&rs->sc_mutex);
   2347 	rs->sc_flags &= ~RAIDF_LOCKED;
   2348 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2349 		rs->sc_flags &= ~RAIDF_WANTED;
   2350 		cv_broadcast(&rs->sc_cv);
   2351 	}
   2352 	mutex_exit(&rs->sc_mutex);
   2353 }
   2354 
   2355 
   2356 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2357 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2358 #define RF_PARITY_MAP_SIZE   RF_PARITYMAP_NBYTE
   2359 
   2360 static daddr_t
   2361 rf_component_info_offset(void)
   2362 {
   2363 
   2364 	return RF_COMPONENT_INFO_OFFSET;
   2365 }
   2366 
   2367 static daddr_t
   2368 rf_component_info_size(unsigned secsize)
   2369 {
   2370 	daddr_t info_size;
   2371 
   2372 	KASSERT(secsize);
   2373 	if (secsize > RF_COMPONENT_INFO_SIZE)
   2374 		info_size = secsize;
   2375 	else
   2376 		info_size = RF_COMPONENT_INFO_SIZE;
   2377 
   2378 	return info_size;
   2379 }
   2380 
   2381 static daddr_t
   2382 rf_parity_map_offset(RF_Raid_t *raidPtr)
   2383 {
   2384 	daddr_t map_offset;
   2385 
   2386 	KASSERT(raidPtr->bytesPerSector);
   2387 	if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
   2388 		map_offset = raidPtr->bytesPerSector;
   2389 	else
   2390 		map_offset = RF_COMPONENT_INFO_SIZE;
   2391 	map_offset += rf_component_info_offset();
   2392 
   2393 	return map_offset;
   2394 }
   2395 
   2396 static daddr_t
   2397 rf_parity_map_size(RF_Raid_t *raidPtr)
   2398 {
   2399 	daddr_t map_size;
   2400 
   2401 	if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
   2402 		map_size = raidPtr->bytesPerSector;
   2403 	else
   2404 		map_size = RF_PARITY_MAP_SIZE;
   2405 
   2406 	return map_size;
   2407 }
   2408 
   2409 int
   2410 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2411 {
   2412 	RF_ComponentLabel_t *clabel;
   2413 
   2414 	clabel = raidget_component_label(raidPtr, col);
   2415 	clabel->clean = RF_RAID_CLEAN;
   2416 	raidflush_component_label(raidPtr, col);
   2417 	return(0);
   2418 }
   2419 
   2420 
   2421 int
   2422 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2423 {
   2424 	RF_ComponentLabel_t *clabel;
   2425 
   2426 	clabel = raidget_component_label(raidPtr, col);
   2427 	clabel->clean = RF_RAID_DIRTY;
   2428 	raidflush_component_label(raidPtr, col);
   2429 	return(0);
   2430 }
   2431 
   2432 int
   2433 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2434 {
   2435 	KASSERT(raidPtr->bytesPerSector);
   2436 
   2437 	return raidread_component_label(raidPtr->bytesPerSector,
   2438 	    raidPtr->Disks[col].dev,
   2439 	    raidPtr->raid_cinfo[col].ci_vp,
   2440 	    &raidPtr->raid_cinfo[col].ci_label);
   2441 }
   2442 
   2443 RF_ComponentLabel_t *
   2444 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2445 {
   2446 	return &raidPtr->raid_cinfo[col].ci_label;
   2447 }
   2448 
   2449 int
   2450 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2451 {
   2452 	RF_ComponentLabel_t *label;
   2453 
   2454 	label = &raidPtr->raid_cinfo[col].ci_label;
   2455 	label->mod_counter = raidPtr->mod_counter;
   2456 #ifndef RF_NO_PARITY_MAP
   2457 	label->parity_map_modcount = label->mod_counter;
   2458 #endif
   2459 	return raidwrite_component_label(raidPtr->bytesPerSector,
   2460 	    raidPtr->Disks[col].dev,
   2461 	    raidPtr->raid_cinfo[col].ci_vp, label);
   2462 }
   2463 
   2464 /*
   2465  * Swap the label endianness.
   2466  *
   2467  * Everything in the component label is 4-byte-swapped except the version,
   2468  * which is kept in the byte-swapped version at all times, and indicates
   2469  * for the writer that a swap is necessary.
   2470  *
   2471  * For reads it is expected that out_label == clabel, but writes expect
   2472  * separate labels so only the re-swapped label is written out to disk,
   2473  * leaving the swapped-except-version internally.
   2474  *
   2475  * Only support swapping label version 2.
   2476  */
   2477 static void
   2478 rf_swap_label(RF_ComponentLabel_t *clabel, RF_ComponentLabel_t *out_label)
   2479 {
   2480 	int	*in, *out, *in_last;
   2481 
   2482 	KASSERT(clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION));
   2483 
   2484 	/* Don't swap the label, but do copy it. */
   2485 	out_label->version = clabel->version;
   2486 
   2487 	in = &clabel->serial_number;
   2488 	in_last = &clabel->future_use2[42];
   2489 	out = &out_label->serial_number;
   2490 
   2491 	for (; in < in_last; in++, out++)
   2492 		*out = bswap32(*in);
   2493 }
   2494 
   2495 static int
   2496 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
   2497     RF_ComponentLabel_t *clabel)
   2498 {
   2499 	int error;
   2500 
   2501 	error = raidread_component_area(dev, b_vp, clabel,
   2502 	    sizeof(RF_ComponentLabel_t),
   2503 	    rf_component_info_offset(),
   2504 	    rf_component_info_size(secsize));
   2505 
   2506 	if (error == 0 &&
   2507 	    clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
   2508 		rf_swap_label(clabel, clabel);
   2509 	}
   2510 
   2511 	return error;
   2512 }
   2513 
   2514 /* ARGSUSED */
   2515 static int
   2516 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
   2517     size_t msize, daddr_t offset, daddr_t dsize)
   2518 {
   2519 	struct buf *bp;
   2520 	int error;
   2521 
   2522 	/* XXX should probably ensure that we don't try to do this if
   2523 	   someone has changed rf_protected_sectors. */
   2524 
   2525 	if (b_vp == NULL) {
   2526 		/* For whatever reason, this component is not valid.
   2527 		   Don't try to read a component label from it. */
   2528 		return(EINVAL);
   2529 	}
   2530 
   2531 	/* get a block of the appropriate size... */
   2532 	bp = geteblk((int)dsize);
   2533 	bp->b_dev = dev;
   2534 
   2535 	/* get our ducks in a row for the read */
   2536 	bp->b_blkno = offset / DEV_BSIZE;
   2537 	bp->b_bcount = dsize;
   2538 	bp->b_flags |= B_READ;
   2539  	bp->b_resid = dsize;
   2540 
   2541 	bdev_strategy(bp);
   2542 	error = biowait(bp);
   2543 
   2544 	if (!error) {
   2545 		memcpy(data, bp->b_data, msize);
   2546 	}
   2547 
   2548 	brelse(bp, 0);
   2549 	return(error);
   2550 }
   2551 
   2552 static int
   2553 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
   2554     RF_ComponentLabel_t *clabel)
   2555 {
   2556 	RF_ComponentLabel_t *clabel_write = clabel;
   2557 	RF_ComponentLabel_t lclabel;
   2558 	int error;
   2559 
   2560 	if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
   2561 		clabel_write = &lclabel;
   2562 		rf_swap_label(clabel, clabel_write);
   2563 	}
   2564 	error = raidwrite_component_area(dev, b_vp, clabel_write,
   2565 	    sizeof(RF_ComponentLabel_t),
   2566 	    rf_component_info_offset(),
   2567 	    rf_component_info_size(secsize), 0);
   2568 
   2569 	return error;
   2570 }
   2571 
   2572 /* ARGSUSED */
   2573 static int
   2574 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
   2575     size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
   2576 {
   2577 	struct buf *bp;
   2578 	int error;
   2579 
   2580 	/* get a block of the appropriate size... */
   2581 	bp = geteblk((int)dsize);
   2582 	bp->b_dev = dev;
   2583 
   2584 	/* get our ducks in a row for the write */
   2585 	bp->b_blkno = offset / DEV_BSIZE;
   2586 	bp->b_bcount = dsize;
   2587 	bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
   2588  	bp->b_resid = dsize;
   2589 
   2590 	memset(bp->b_data, 0, dsize);
   2591 	memcpy(bp->b_data, data, msize);
   2592 
   2593 	bdev_strategy(bp);
   2594 	if (asyncp)
   2595 		return 0;
   2596 	error = biowait(bp);
   2597 	brelse(bp, 0);
   2598 	if (error) {
   2599 #if 1
   2600 		printf("Failed to write RAID component info!\n");
   2601 #endif
   2602 	}
   2603 
   2604 	return(error);
   2605 }
   2606 
   2607 void
   2608 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
   2609 {
   2610 	int c;
   2611 
   2612 	for (c = 0; c < raidPtr->numCol; c++) {
   2613 		/* Skip dead disks. */
   2614 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
   2615 			continue;
   2616 		/* XXXjld: what if an error occurs here? */
   2617 		raidwrite_component_area(raidPtr->Disks[c].dev,
   2618 		    raidPtr->raid_cinfo[c].ci_vp, map,
   2619 		    RF_PARITYMAP_NBYTE,
   2620 		    rf_parity_map_offset(raidPtr),
   2621 		    rf_parity_map_size(raidPtr), 0);
   2622 	}
   2623 }
   2624 
   2625 void
   2626 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
   2627 {
   2628 	struct rf_paritymap_ondisk tmp;
   2629 	int c,first;
   2630 
   2631 	first=1;
   2632 	for (c = 0; c < raidPtr->numCol; c++) {
   2633 		/* Skip dead disks. */
   2634 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
   2635 			continue;
   2636 		raidread_component_area(raidPtr->Disks[c].dev,
   2637 		    raidPtr->raid_cinfo[c].ci_vp, &tmp,
   2638 		    RF_PARITYMAP_NBYTE,
   2639 		    rf_parity_map_offset(raidPtr),
   2640 		    rf_parity_map_size(raidPtr));
   2641 		if (first) {
   2642 			memcpy(map, &tmp, sizeof(*map));
   2643 			first = 0;
   2644 		} else {
   2645 			rf_paritymap_merge(map, &tmp);
   2646 		}
   2647 	}
   2648 }
   2649 
   2650 void
   2651 rf_markalldirty(RF_Raid_t *raidPtr)
   2652 {
   2653 	RF_ComponentLabel_t *clabel;
   2654 	int sparecol;
   2655 	int c;
   2656 	int j;
   2657 	int scol = -1;
   2658 
   2659 	raidPtr->mod_counter++;
   2660 	for (c = 0; c < raidPtr->numCol; c++) {
   2661 		/* we don't want to touch (at all) a disk that has
   2662 		   failed */
   2663 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   2664 			clabel = raidget_component_label(raidPtr, c);
   2665 			if (clabel->status == rf_ds_spared) {
   2666 				/* XXX do something special...
   2667 				   but whatever you do, don't
   2668 				   try to access it!! */
   2669 			} else {
   2670 				raidmarkdirty(raidPtr, c);
   2671 			}
   2672 		}
   2673 	}
   2674 
   2675 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2676 		sparecol = raidPtr->numCol + c;
   2677 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2678 			/*
   2679 
   2680 			   we claim this disk is "optimal" if it's
   2681 			   rf_ds_used_spare, as that means it should be
   2682 			   directly substitutable for the disk it replaced.
   2683 			   We note that too...
   2684 
   2685 			 */
   2686 
   2687 			for(j=0;j<raidPtr->numCol;j++) {
   2688 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2689 					scol = j;
   2690 					break;
   2691 				}
   2692 			}
   2693 
   2694 			clabel = raidget_component_label(raidPtr, sparecol);
   2695 			/* make sure status is noted */
   2696 
   2697 			raid_init_component_label(raidPtr, clabel);
   2698 
   2699 			clabel->row = 0;
   2700 			clabel->column = scol;
   2701 			/* Note: we *don't* change status from rf_ds_used_spare
   2702 			   to rf_ds_optimal */
   2703 			/* clabel.status = rf_ds_optimal; */
   2704 
   2705 			raidmarkdirty(raidPtr, sparecol);
   2706 		}
   2707 	}
   2708 }
   2709 
   2710 
   2711 void
   2712 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
   2713 {
   2714 	RF_ComponentLabel_t *clabel;
   2715 	int sparecol;
   2716 	int c;
   2717 	int j;
   2718 	int scol;
   2719 	struct raid_softc *rs = raidPtr->softc;
   2720 
   2721 	scol = -1;
   2722 
   2723 	/* XXX should do extra checks to make sure things really are clean,
   2724 	   rather than blindly setting the clean bit... */
   2725 
   2726 	raidPtr->mod_counter++;
   2727 
   2728 	for (c = 0; c < raidPtr->numCol; c++) {
   2729 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   2730 			clabel = raidget_component_label(raidPtr, c);
   2731 			/* make sure status is noted */
   2732 			clabel->status = rf_ds_optimal;
   2733 
   2734 			/* note what unit we are configured as */
   2735 			if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
   2736 				clabel->last_unit = raidPtr->raidid;
   2737 
   2738 			raidflush_component_label(raidPtr, c);
   2739 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2740 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2741 					raidmarkclean(raidPtr, c);
   2742 				}
   2743 			}
   2744 		}
   2745 		/* else we don't touch it.. */
   2746 	}
   2747 
   2748 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2749 		sparecol = raidPtr->numCol + c;
   2750 		/* Need to ensure that the reconstruct actually completed! */
   2751 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2752 			/*
   2753 
   2754 			   we claim this disk is "optimal" if it's
   2755 			   rf_ds_used_spare, as that means it should be
   2756 			   directly substitutable for the disk it replaced.
   2757 			   We note that too...
   2758 
   2759 			 */
   2760 
   2761 			for(j=0;j<raidPtr->numCol;j++) {
   2762 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2763 					scol = j;
   2764 					break;
   2765 				}
   2766 			}
   2767 
   2768 			/* XXX shouldn't *really* need this... */
   2769 			clabel = raidget_component_label(raidPtr, sparecol);
   2770 			/* make sure status is noted */
   2771 
   2772 			raid_init_component_label(raidPtr, clabel);
   2773 
   2774 			clabel->column = scol;
   2775 			clabel->status = rf_ds_optimal;
   2776 			if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
   2777 				clabel->last_unit = raidPtr->raidid;
   2778 
   2779 			raidflush_component_label(raidPtr, sparecol);
   2780 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2781 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2782 					raidmarkclean(raidPtr, sparecol);
   2783 				}
   2784 			}
   2785 		}
   2786 	}
   2787 }
   2788 
   2789 void
   2790 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
   2791 {
   2792 
   2793 	if (vp != NULL) {
   2794 		if (auto_configured == 1) {
   2795 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2796 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2797 			vput(vp);
   2798 
   2799 		} else {
   2800 			(void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
   2801 		}
   2802 	}
   2803 }
   2804 
   2805 
   2806 void
   2807 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
   2808 {
   2809 	int r,c;
   2810 	struct vnode *vp;
   2811 	int acd;
   2812 
   2813 
   2814 	/* We take this opportunity to close the vnodes like we should.. */
   2815 
   2816 	for (c = 0; c < raidPtr->numCol; c++) {
   2817 		vp = raidPtr->raid_cinfo[c].ci_vp;
   2818 		acd = raidPtr->Disks[c].auto_configured;
   2819 		rf_close_component(raidPtr, vp, acd);
   2820 		raidPtr->raid_cinfo[c].ci_vp = NULL;
   2821 		raidPtr->Disks[c].auto_configured = 0;
   2822 	}
   2823 
   2824 	for (r = 0; r < raidPtr->numSpare; r++) {
   2825 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
   2826 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
   2827 		rf_close_component(raidPtr, vp, acd);
   2828 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
   2829 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
   2830 	}
   2831 }
   2832 
   2833 
   2834 static void
   2835 rf_ReconThread(struct rf_recon_req_internal *req)
   2836 {
   2837 	int     s;
   2838 	RF_Raid_t *raidPtr;
   2839 
   2840 	s = splbio();
   2841 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2842 	raidPtr->recon_in_progress = 1;
   2843 
   2844 	if (req->flags & RF_FDFLAGS_RECON_FORCE) {
   2845 		raidPtr->forceRecon = 1;
   2846 	}
   2847 
   2848 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
   2849 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2850 
   2851 	if (req->flags & RF_FDFLAGS_RECON_FORCE) {
   2852 		raidPtr->forceRecon = 0;
   2853 	}
   2854 
   2855 	RF_Free(req, sizeof(*req));
   2856 
   2857 	raidPtr->recon_in_progress = 0;
   2858 	splx(s);
   2859 
   2860 	/* That's all... */
   2861 	kthread_exit(0);	/* does not return */
   2862 }
   2863 
   2864 static void
   2865 rf_RewriteParityThread(RF_Raid_t *raidPtr)
   2866 {
   2867 	int retcode;
   2868 	int s;
   2869 
   2870 	raidPtr->parity_rewrite_stripes_done = 0;
   2871 	raidPtr->parity_rewrite_in_progress = 1;
   2872 	s = splbio();
   2873 	retcode = rf_RewriteParity(raidPtr);
   2874 	splx(s);
   2875 	if (retcode) {
   2876 		printf("raid%d: Error re-writing parity (%d)!\n",
   2877 		    raidPtr->raidid, retcode);
   2878 	} else {
   2879 		/* set the clean bit!  If we shutdown correctly,
   2880 		   the clean bit on each component label will get
   2881 		   set */
   2882 		raidPtr->parity_good = RF_RAID_CLEAN;
   2883 	}
   2884 	raidPtr->parity_rewrite_in_progress = 0;
   2885 
   2886 	/* Anyone waiting for us to stop?  If so, inform them... */
   2887 	if (raidPtr->waitShutdown) {
   2888 		rf_lock_mutex2(raidPtr->rad_lock);
   2889 		cv_broadcast(&raidPtr->parity_rewrite_cv);
   2890 		rf_unlock_mutex2(raidPtr->rad_lock);
   2891 	}
   2892 
   2893 	/* That's all... */
   2894 	kthread_exit(0);	/* does not return */
   2895 }
   2896 
   2897 
   2898 static void
   2899 rf_CopybackThread(RF_Raid_t *raidPtr)
   2900 {
   2901 	int s;
   2902 
   2903 	raidPtr->copyback_in_progress = 1;
   2904 	s = splbio();
   2905 	rf_CopybackReconstructedData(raidPtr);
   2906 	splx(s);
   2907 	raidPtr->copyback_in_progress = 0;
   2908 
   2909 	/* That's all... */
   2910 	kthread_exit(0);	/* does not return */
   2911 }
   2912 
   2913 
   2914 static void
   2915 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
   2916 {
   2917 	int s;
   2918 	RF_Raid_t *raidPtr;
   2919 
   2920 	s = splbio();
   2921 	raidPtr = req->raidPtr;
   2922 	raidPtr->recon_in_progress = 1;
   2923 
   2924 	if (req->flags & RF_FDFLAGS_RECON_FORCE) {
   2925 		raidPtr->forceRecon = 1;
   2926 	}
   2927 
   2928 	rf_ReconstructInPlace(raidPtr, req->col);
   2929 
   2930 	if (req->flags & RF_FDFLAGS_RECON_FORCE) {
   2931 		raidPtr->forceRecon = 0;
   2932 	}
   2933 
   2934 	RF_Free(req, sizeof(*req));
   2935 	raidPtr->recon_in_progress = 0;
   2936 	splx(s);
   2937 
   2938 	/* That's all... */
   2939 	kthread_exit(0);	/* does not return */
   2940 }
   2941 
   2942 static RF_AutoConfig_t *
   2943 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
   2944     const char *cname, RF_SectorCount_t size, uint64_t numsecs,
   2945     unsigned secsize)
   2946 {
   2947 	int good_one = 0;
   2948 	RF_ComponentLabel_t *clabel;
   2949 	RF_AutoConfig_t *ac;
   2950 
   2951 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_WAITOK);
   2952 
   2953 	if (!raidread_component_label(secsize, dev, vp, clabel)) {
   2954 		/* Got the label.  Does it look reasonable? */
   2955 		if (rf_reasonable_label(clabel, numsecs) &&
   2956 		    (rf_component_label_partitionsize(clabel) <= size)) {
   2957 #ifdef DEBUG
   2958 			printf("Component on: %s: %llu\n",
   2959 				cname, (unsigned long long)size);
   2960 			rf_print_component_label(clabel);
   2961 #endif
   2962 			/* if it's reasonable, add it, else ignore it. */
   2963 			ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
   2964 				M_WAITOK);
   2965 			strlcpy(ac->devname, cname, sizeof(ac->devname));
   2966 			ac->dev = dev;
   2967 			ac->vp = vp;
   2968 			ac->clabel = clabel;
   2969 			ac->next = ac_list;
   2970 			ac_list = ac;
   2971 			good_one = 1;
   2972 		}
   2973 	}
   2974 	if (!good_one) {
   2975 		/* cleanup */
   2976 		free(clabel, M_RAIDFRAME);
   2977 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2978 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2979 		vput(vp);
   2980 	}
   2981 	return ac_list;
   2982 }
   2983 
   2984 static RF_AutoConfig_t *
   2985 rf_find_raid_components(void)
   2986 {
   2987 	struct vnode *vp;
   2988 	struct disklabel label;
   2989 	device_t dv;
   2990 	deviter_t di;
   2991 	dev_t dev;
   2992 	int bmajor, bminor, wedge, rf_part_found;
   2993 	int error;
   2994 	int i;
   2995 	RF_AutoConfig_t *ac_list;
   2996 	uint64_t numsecs;
   2997 	unsigned secsize;
   2998 	int dowedges;
   2999 
   3000 	/* initialize the AutoConfig list */
   3001 	ac_list = NULL;
   3002 
   3003 	/*
   3004 	 * we begin by trolling through *all* the devices on the system *twice*
   3005 	 * first we scan for wedges, second for other devices. This avoids
   3006 	 * using a raw partition instead of a wedge that covers the whole disk
   3007 	 */
   3008 
   3009 	for (dowedges=1; dowedges>=0; --dowedges) {
   3010 		for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
   3011 		     dv = deviter_next(&di)) {
   3012 
   3013 			/* we are only interested in disks */
   3014 			if (device_class(dv) != DV_DISK)
   3015 				continue;
   3016 
   3017 			/* we don't care about floppies */
   3018 			if (device_is_a(dv, "fd")) {
   3019 				continue;
   3020 			}
   3021 
   3022 			/* we don't care about CDs. */
   3023 			if (device_is_a(dv, "cd")) {
   3024 				continue;
   3025 			}
   3026 
   3027 			/* we don't care about md. */
   3028 			if (device_is_a(dv, "md")) {
   3029 				continue;
   3030 			}
   3031 
   3032 			/* hdfd is the Atari/Hades floppy driver */
   3033 			if (device_is_a(dv, "hdfd")) {
   3034 				continue;
   3035 			}
   3036 
   3037 			/* fdisa is the Atari/Milan floppy driver */
   3038 			if (device_is_a(dv, "fdisa")) {
   3039 				continue;
   3040 			}
   3041 
   3042 			/* we don't care about spiflash */
   3043 			if (device_is_a(dv, "spiflash")) {
   3044 				continue;
   3045 			}
   3046 
   3047 			/* are we in the wedges pass ? */
   3048 			wedge = device_is_a(dv, "dk");
   3049 			if (wedge != dowedges) {
   3050 				continue;
   3051 			}
   3052 
   3053 			/* need to find the device_name_to_block_device_major stuff */
   3054 			bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
   3055 
   3056 			rf_part_found = 0; /*No raid partition as yet*/
   3057 
   3058 			/* get a vnode for the raw partition of this disk */
   3059 			bminor = minor(device_unit(dv));
   3060 			dev = wedge ? makedev(bmajor, bminor) :
   3061 			    MAKEDISKDEV(bmajor, bminor, RAW_PART);
   3062 			if (bdevvp(dev, &vp))
   3063 				panic("RAID can't alloc vnode");
   3064 
   3065 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   3066 			error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
   3067 
   3068 			if (error) {
   3069 				/* "Who cares."  Continue looking
   3070 				   for something that exists*/
   3071 				vput(vp);
   3072 				continue;
   3073 			}
   3074 
   3075 			error = getdisksize(vp, &numsecs, &secsize);
   3076 			if (error) {
   3077 				/*
   3078 				 * Pseudo devices like vnd and cgd can be
   3079 				 * opened but may still need some configuration.
   3080 				 * Ignore these quietly.
   3081 				 */
   3082 				if (error != ENXIO)
   3083 					printf("RAIDframe: can't get disk size"
   3084 					    " for dev %s (%d)\n",
   3085 					    device_xname(dv), error);
   3086 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3087 				vput(vp);
   3088 				continue;
   3089 			}
   3090 			if (wedge) {
   3091 				struct dkwedge_info dkw;
   3092 				error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
   3093 				    NOCRED);
   3094 				if (error) {
   3095 					printf("RAIDframe: can't get wedge info for "
   3096 					    "dev %s (%d)\n", device_xname(dv), error);
   3097 					VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3098 					vput(vp);
   3099 					continue;
   3100 				}
   3101 
   3102 				if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
   3103 					VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3104 					vput(vp);
   3105 					continue;
   3106 				}
   3107 
   3108 				VOP_UNLOCK(vp);
   3109 				ac_list = rf_get_component(ac_list, dev, vp,
   3110 				    device_xname(dv), dkw.dkw_size, numsecs, secsize);
   3111 				rf_part_found = 1; /*There is a raid component on this disk*/
   3112 				continue;
   3113 			}
   3114 
   3115 			/* Ok, the disk exists.  Go get the disklabel. */
   3116 			error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
   3117 			if (error) {
   3118 				/*
   3119 				 * XXX can't happen - open() would
   3120 				 * have errored out (or faked up one)
   3121 				 */
   3122 				if (error != ENOTTY)
   3123 					printf("RAIDframe: can't get label for dev "
   3124 					    "%s (%d)\n", device_xname(dv), error);
   3125 			}
   3126 
   3127 			/* don't need this any more.  We'll allocate it again
   3128 			   a little later if we really do... */
   3129 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3130 			vput(vp);
   3131 
   3132 			if (error)
   3133 				continue;
   3134 
   3135 			rf_part_found = 0; /*No raid partitions yet*/
   3136 			for (i = 0; i < label.d_npartitions; i++) {
   3137 				char cname[sizeof(ac_list->devname)];
   3138 
   3139 				/* We only support partitions marked as RAID */
   3140 				if (label.d_partitions[i].p_fstype != FS_RAID)
   3141 					continue;
   3142 
   3143 				dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
   3144 				if (bdevvp(dev, &vp))
   3145 					panic("RAID can't alloc vnode");
   3146 
   3147 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   3148 				error = VOP_OPEN(vp, FREAD, NOCRED);
   3149 				if (error) {
   3150 					/* Not quite a 'whatever'.  In
   3151 					 * this situation we know
   3152 					 * there is a FS_RAID
   3153 					 * partition, but we can't
   3154 					 * open it.  The most likely
   3155 					 * reason is that the
   3156 					 * partition is already in
   3157 					 * use by another RAID set.
   3158 					 * So note that we've already
   3159 					 * found a partition on this
   3160 					 * disk so we don't attempt
   3161 					 * to use the raw disk later. */
   3162 					rf_part_found = 1;
   3163 					vput(vp);
   3164 					continue;
   3165 				}
   3166 				VOP_UNLOCK(vp);
   3167 				snprintf(cname, sizeof(cname), "%s%c",
   3168 				    device_xname(dv), 'a' + i);
   3169 				ac_list = rf_get_component(ac_list, dev, vp, cname,
   3170 					label.d_partitions[i].p_size, numsecs, secsize);
   3171 				rf_part_found = 1; /*There is at least one raid partition on this disk*/
   3172 			}
   3173 
   3174 			/*
   3175 			 *If there is no raid component on this disk, either in a
   3176 			 *disklabel or inside a wedge, check the raw partition as well,
   3177 			 *as it is possible to configure raid components on raw disk
   3178 			 *devices.
   3179 			 */
   3180 
   3181 			if (!rf_part_found) {
   3182 				char cname[sizeof(ac_list->devname)];
   3183 
   3184 				dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
   3185 				if (bdevvp(dev, &vp))
   3186 					panic("RAID can't alloc vnode");
   3187 
   3188 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   3189 
   3190 				error = VOP_OPEN(vp, FREAD, NOCRED);
   3191 				if (error) {
   3192 					/* Whatever... */
   3193 					vput(vp);
   3194 					continue;
   3195 				}
   3196 				VOP_UNLOCK(vp);
   3197 				snprintf(cname, sizeof(cname), "%s%c",
   3198 				    device_xname(dv), 'a' + RAW_PART);
   3199 				ac_list = rf_get_component(ac_list, dev, vp, cname,
   3200 					label.d_partitions[RAW_PART].p_size, numsecs, secsize);
   3201 			}
   3202 		}
   3203 		deviter_release(&di);
   3204 	}
   3205 	return ac_list;
   3206 }
   3207 
   3208 int
   3209 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
   3210 {
   3211 
   3212 	if ((clabel->version==RF_COMPONENT_LABEL_VERSION_1 ||
   3213 	     clabel->version==RF_COMPONENT_LABEL_VERSION ||
   3214 	     clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) &&
   3215 	    (clabel->clean == RF_RAID_CLEAN ||
   3216 	     clabel->clean == RF_RAID_DIRTY) &&
   3217 	    clabel->row >=0 &&
   3218 	    clabel->column >= 0 &&
   3219 	    clabel->num_rows > 0 &&
   3220 	    clabel->num_columns > 0 &&
   3221 	    clabel->row < clabel->num_rows &&
   3222 	    clabel->column < clabel->num_columns &&
   3223 	    clabel->blockSize > 0 &&
   3224 	    /*
   3225 	     * numBlocksHi may contain garbage, but it is ok since
   3226 	     * the type is unsigned.  If it is really garbage,
   3227 	     * rf_fix_old_label_size() will fix it.
   3228 	     */
   3229 	    rf_component_label_numblocks(clabel) > 0) {
   3230 		/*
   3231 		 * label looks reasonable enough...
   3232 		 * let's make sure it has no old garbage.
   3233 		 */
   3234 		if (numsecs)
   3235 			rf_fix_old_label_size(clabel, numsecs);
   3236 		return(1);
   3237 	}
   3238 	return(0);
   3239 }
   3240 
   3241 
   3242 /*
   3243  * For reasons yet unknown, some old component labels have garbage in
   3244  * the newer numBlocksHi region, and this causes lossage.  Since those
   3245  * disks will also have numsecs set to less than 32 bits of sectors,
   3246  * we can determine when this corruption has occurred, and fix it.
   3247  *
   3248  * The exact same problem, with the same unknown reason, happens to
   3249  * the partitionSizeHi member as well.
   3250  */
   3251 static void
   3252 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
   3253 {
   3254 
   3255 	if (numsecs < ((uint64_t)1 << 32)) {
   3256 		if (clabel->numBlocksHi) {
   3257 			printf("WARNING: total sectors < 32 bits, yet "
   3258 			       "numBlocksHi set\n"
   3259 			       "WARNING: resetting numBlocksHi to zero.\n");
   3260 			clabel->numBlocksHi = 0;
   3261 		}
   3262 
   3263 		if (clabel->partitionSizeHi) {
   3264 			printf("WARNING: total sectors < 32 bits, yet "
   3265 			       "partitionSizeHi set\n"
   3266 			       "WARNING: resetting partitionSizeHi to zero.\n");
   3267 			clabel->partitionSizeHi = 0;
   3268 		}
   3269 	}
   3270 }
   3271 
   3272 
   3273 #ifdef DEBUG
   3274 void
   3275 rf_print_component_label(RF_ComponentLabel_t *clabel)
   3276 {
   3277 	uint64_t numBlocks;
   3278 	static const char *rp[] = {
   3279 	    "No", "Force", "Soft", "*invalid*"
   3280 	};
   3281 
   3282 
   3283 	numBlocks = rf_component_label_numblocks(clabel);
   3284 
   3285 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   3286 	       clabel->row, clabel->column,
   3287 	       clabel->num_rows, clabel->num_columns);
   3288 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   3289 	       clabel->version, clabel->serial_number,
   3290 	       clabel->mod_counter);
   3291 	printf("   Clean: %s Status: %d\n",
   3292 	       clabel->clean ? "Yes" : "No", clabel->status);
   3293 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   3294 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   3295 	printf("   RAID Level: %c  blocksize: %d numBlocks: %"PRIu64"\n",
   3296 	       (char) clabel->parityConfig, clabel->blockSize, numBlocks);
   3297 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
   3298 	printf("   Root partition: %s\n", rp[clabel->root_partition & 3]);
   3299 	printf("   Last configured as: raid%d\n", clabel->last_unit);
   3300 #if 0
   3301 	   printf("   Config order: %d\n", clabel->config_order);
   3302 #endif
   3303 
   3304 }
   3305 #endif
   3306 
   3307 static RF_ConfigSet_t *
   3308 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
   3309 {
   3310 	RF_AutoConfig_t *ac;
   3311 	RF_ConfigSet_t *config_sets;
   3312 	RF_ConfigSet_t *cset;
   3313 	RF_AutoConfig_t *ac_next;
   3314 
   3315 
   3316 	config_sets = NULL;
   3317 
   3318 	/* Go through the AutoConfig list, and figure out which components
   3319 	   belong to what sets.  */
   3320 	ac = ac_list;
   3321 	while(ac!=NULL) {
   3322 		/* we're going to putz with ac->next, so save it here
   3323 		   for use at the end of the loop */
   3324 		ac_next = ac->next;
   3325 
   3326 		if (config_sets == NULL) {
   3327 			/* will need at least this one... */
   3328 			config_sets = malloc(sizeof(RF_ConfigSet_t),
   3329 				       M_RAIDFRAME, M_WAITOK);
   3330 			/* this one is easy :) */
   3331 			config_sets->ac = ac;
   3332 			config_sets->next = NULL;
   3333 			config_sets->rootable = 0;
   3334 			ac->next = NULL;
   3335 		} else {
   3336 			/* which set does this component fit into? */
   3337 			cset = config_sets;
   3338 			while(cset!=NULL) {
   3339 				if (rf_does_it_fit(cset, ac)) {
   3340 					/* looks like it matches... */
   3341 					ac->next = cset->ac;
   3342 					cset->ac = ac;
   3343 					break;
   3344 				}
   3345 				cset = cset->next;
   3346 			}
   3347 			if (cset==NULL) {
   3348 				/* didn't find a match above... new set..*/
   3349 				cset = malloc(sizeof(RF_ConfigSet_t),
   3350 					       M_RAIDFRAME, M_WAITOK);
   3351 				cset->ac = ac;
   3352 				ac->next = NULL;
   3353 				cset->next = config_sets;
   3354 				cset->rootable = 0;
   3355 				config_sets = cset;
   3356 			}
   3357 		}
   3358 		ac = ac_next;
   3359 	}
   3360 
   3361 
   3362 	return(config_sets);
   3363 }
   3364 
   3365 static int
   3366 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
   3367 {
   3368 	RF_ComponentLabel_t *clabel1, *clabel2;
   3369 
   3370 	/* If this one matches the *first* one in the set, that's good
   3371 	   enough, since the other members of the set would have been
   3372 	   through here too... */
   3373 	/* note that we are not checking partitionSize here..
   3374 
   3375 	   Note that we are also not checking the mod_counters here.
   3376 	   If everything else matches except the mod_counter, that's
   3377 	   good enough for this test.  We will deal with the mod_counters
   3378 	   a little later in the autoconfiguration process.
   3379 
   3380 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   3381 
   3382 	   The reason we don't check for this is that failed disks
   3383 	   will have lower modification counts.  If those disks are
   3384 	   not added to the set they used to belong to, then they will
   3385 	   form their own set, which may result in 2 different sets,
   3386 	   for example, competing to be configured at raid0, and
   3387 	   perhaps competing to be the root filesystem set.  If the
   3388 	   wrong ones get configured, or both attempt to become /,
   3389 	   weird behaviour and or serious lossage will occur.  Thus we
   3390 	   need to bring them into the fold here, and kick them out at
   3391 	   a later point.
   3392 
   3393 	*/
   3394 
   3395 	clabel1 = cset->ac->clabel;
   3396 	clabel2 = ac->clabel;
   3397 	if ((clabel1->version == clabel2->version) &&
   3398 	    (clabel1->serial_number == clabel2->serial_number) &&
   3399 	    (clabel1->num_rows == clabel2->num_rows) &&
   3400 	    (clabel1->num_columns == clabel2->num_columns) &&
   3401 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   3402 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   3403 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   3404 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   3405 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   3406 	    (clabel1->blockSize == clabel2->blockSize) &&
   3407 	    rf_component_label_numblocks(clabel1) ==
   3408 	    rf_component_label_numblocks(clabel2) &&
   3409 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   3410 	    (clabel1->root_partition == clabel2->root_partition) &&
   3411 	    (clabel1->last_unit == clabel2->last_unit) &&
   3412 	    (clabel1->config_order == clabel2->config_order)) {
   3413 		/* if it get's here, it almost *has* to be a match */
   3414 	} else {
   3415 		/* it's not consistent with somebody in the set..
   3416 		   punt */
   3417 		return(0);
   3418 	}
   3419 	/* all was fine.. it must fit... */
   3420 	return(1);
   3421 }
   3422 
   3423 static int
   3424 rf_have_enough_components(RF_ConfigSet_t *cset)
   3425 {
   3426 	RF_AutoConfig_t *ac;
   3427 	RF_AutoConfig_t *auto_config;
   3428 	RF_ComponentLabel_t *clabel;
   3429 	int c;
   3430 	int num_cols;
   3431 	int num_missing;
   3432 	int mod_counter;
   3433 	int mod_counter_found;
   3434 	int even_pair_failed;
   3435 	char parity_type;
   3436 
   3437 
   3438 	/* check to see that we have enough 'live' components
   3439 	   of this set.  If so, we can configure it if necessary */
   3440 
   3441 	num_cols = cset->ac->clabel->num_columns;
   3442 	parity_type = cset->ac->clabel->parityConfig;
   3443 
   3444 	/* XXX Check for duplicate components!?!?!? */
   3445 
   3446 	/* Determine what the mod_counter is supposed to be for this set. */
   3447 
   3448 	mod_counter_found = 0;
   3449 	mod_counter = 0;
   3450 	ac = cset->ac;
   3451 	while(ac!=NULL) {
   3452 		if (mod_counter_found==0) {
   3453 			mod_counter = ac->clabel->mod_counter;
   3454 			mod_counter_found = 1;
   3455 		} else {
   3456 			if (ac->clabel->mod_counter > mod_counter) {
   3457 				mod_counter = ac->clabel->mod_counter;
   3458 			}
   3459 		}
   3460 		ac = ac->next;
   3461 	}
   3462 
   3463 	num_missing = 0;
   3464 	auto_config = cset->ac;
   3465 
   3466 	even_pair_failed = 0;
   3467 	for(c=0; c<num_cols; c++) {
   3468 		ac = auto_config;
   3469 		while(ac!=NULL) {
   3470 			if ((ac->clabel->column == c) &&
   3471 			    (ac->clabel->mod_counter == mod_counter)) {
   3472 				/* it's this one... */
   3473 #ifdef DEBUG
   3474 				printf("Found: %s at %d\n",
   3475 				       ac->devname,c);
   3476 #endif
   3477 				break;
   3478 			}
   3479 			ac=ac->next;
   3480 		}
   3481 		if (ac==NULL) {
   3482 				/* Didn't find one here! */
   3483 				/* special case for RAID 1, especially
   3484 				   where there are more than 2
   3485 				   components (where RAIDframe treats
   3486 				   things a little differently :( ) */
   3487 			if (parity_type == '1') {
   3488 				if (c%2 == 0) { /* even component */
   3489 					even_pair_failed = 1;
   3490 				} else { /* odd component.  If
   3491 					    we're failed, and
   3492 					    so is the even
   3493 					    component, it's
   3494 					    "Good Night, Charlie" */
   3495 					if (even_pair_failed == 1) {
   3496 						return(0);
   3497 					}
   3498 				}
   3499 			} else {
   3500 				/* normal accounting */
   3501 				num_missing++;
   3502 			}
   3503 		}
   3504 		if ((parity_type == '1') && (c%2 == 1)) {
   3505 				/* Just did an even component, and we didn't
   3506 				   bail.. reset the even_pair_failed flag,
   3507 				   and go on to the next component.... */
   3508 			even_pair_failed = 0;
   3509 		}
   3510 	}
   3511 
   3512 	clabel = cset->ac->clabel;
   3513 
   3514 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3515 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3516 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3517 		/* XXX this needs to be made *much* more general */
   3518 		/* Too many failures */
   3519 		return(0);
   3520 	}
   3521 	/* otherwise, all is well, and we've got enough to take a kick
   3522 	   at autoconfiguring this set */
   3523 	return(1);
   3524 }
   3525 
   3526 static void
   3527 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
   3528 			RF_Raid_t *raidPtr)
   3529 {
   3530 	RF_ComponentLabel_t *clabel;
   3531 	int i;
   3532 
   3533 	clabel = ac->clabel;
   3534 
   3535 	/* 1. Fill in the common stuff */
   3536 	config->numCol = clabel->num_columns;
   3537 	config->numSpare = 0; /* XXX should this be set here? */
   3538 	config->sectPerSU = clabel->sectPerSU;
   3539 	config->SUsPerPU = clabel->SUsPerPU;
   3540 	config->SUsPerRU = clabel->SUsPerRU;
   3541 	config->parityConfig = clabel->parityConfig;
   3542 	/* XXX... */
   3543 	strcpy(config->diskQueueType,"fifo");
   3544 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3545 	config->layoutSpecificSize = 0; /* XXX ?? */
   3546 
   3547 	while(ac!=NULL) {
   3548 		/* row/col values will be in range due to the checks
   3549 		   in reasonable_label() */
   3550 		strcpy(config->devnames[0][ac->clabel->column],
   3551 		       ac->devname);
   3552 		ac = ac->next;
   3553 	}
   3554 
   3555 	for(i=0;i<RF_MAXDBGV;i++) {
   3556 		config->debugVars[i][0] = 0;
   3557 	}
   3558 }
   3559 
   3560 static int
   3561 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
   3562 {
   3563 	RF_ComponentLabel_t *clabel;
   3564 	int column;
   3565 	int sparecol;
   3566 
   3567 	raidPtr->autoconfigure = new_value;
   3568 
   3569 	for(column=0; column<raidPtr->numCol; column++) {
   3570 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3571 			clabel = raidget_component_label(raidPtr, column);
   3572 			clabel->autoconfigure = new_value;
   3573 			raidflush_component_label(raidPtr, column);
   3574 		}
   3575 	}
   3576 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3577 		sparecol = raidPtr->numCol + column;
   3578 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3579 			clabel = raidget_component_label(raidPtr, sparecol);
   3580 			clabel->autoconfigure = new_value;
   3581 			raidflush_component_label(raidPtr, sparecol);
   3582 		}
   3583 	}
   3584 	return(new_value);
   3585 }
   3586 
   3587 static int
   3588 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
   3589 {
   3590 	RF_ComponentLabel_t *clabel;
   3591 	int column;
   3592 	int sparecol;
   3593 
   3594 	raidPtr->root_partition = new_value;
   3595 	for(column=0; column<raidPtr->numCol; column++) {
   3596 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3597 			clabel = raidget_component_label(raidPtr, column);
   3598 			clabel->root_partition = new_value;
   3599 			raidflush_component_label(raidPtr, column);
   3600 		}
   3601 	}
   3602 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3603 		sparecol = raidPtr->numCol + column;
   3604 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3605 			clabel = raidget_component_label(raidPtr, sparecol);
   3606 			clabel->root_partition = new_value;
   3607 			raidflush_component_label(raidPtr, sparecol);
   3608 		}
   3609 	}
   3610 	return(new_value);
   3611 }
   3612 
   3613 static void
   3614 rf_release_all_vps(RF_ConfigSet_t *cset)
   3615 {
   3616 	RF_AutoConfig_t *ac;
   3617 
   3618 	ac = cset->ac;
   3619 	while(ac!=NULL) {
   3620 		/* Close the vp, and give it back */
   3621 		if (ac->vp) {
   3622 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3623 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
   3624 			vput(ac->vp);
   3625 			ac->vp = NULL;
   3626 		}
   3627 		ac = ac->next;
   3628 	}
   3629 }
   3630 
   3631 
   3632 static void
   3633 rf_cleanup_config_set(RF_ConfigSet_t *cset)
   3634 {
   3635 	RF_AutoConfig_t *ac;
   3636 	RF_AutoConfig_t *next_ac;
   3637 
   3638 	ac = cset->ac;
   3639 	while(ac!=NULL) {
   3640 		next_ac = ac->next;
   3641 		/* nuke the label */
   3642 		free(ac->clabel, M_RAIDFRAME);
   3643 		/* cleanup the config structure */
   3644 		free(ac, M_RAIDFRAME);
   3645 		/* "next.." */
   3646 		ac = next_ac;
   3647 	}
   3648 	/* and, finally, nuke the config set */
   3649 	free(cset, M_RAIDFRAME);
   3650 }
   3651 
   3652 
   3653 void
   3654 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   3655 {
   3656 	/* avoid over-writing byteswapped version. */
   3657 	if (clabel->version != bswap32(RF_COMPONENT_LABEL_VERSION))
   3658 		clabel->version = RF_COMPONENT_LABEL_VERSION;
   3659 	clabel->serial_number = raidPtr->serial_number;
   3660 	clabel->mod_counter = raidPtr->mod_counter;
   3661 
   3662 	clabel->num_rows = 1;
   3663 	clabel->num_columns = raidPtr->numCol;
   3664 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3665 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3666 
   3667 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3668 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3669 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3670 
   3671 	clabel->blockSize = raidPtr->bytesPerSector;
   3672 	rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
   3673 
   3674 	/* XXX not portable */
   3675 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3676 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3677 	clabel->autoconfigure = raidPtr->autoconfigure;
   3678 	clabel->root_partition = raidPtr->root_partition;
   3679 	clabel->last_unit = raidPtr->raidid;
   3680 	clabel->config_order = raidPtr->config_order;
   3681 
   3682 #ifndef RF_NO_PARITY_MAP
   3683 	rf_paritymap_init_label(raidPtr->parity_map, clabel);
   3684 #endif
   3685 }
   3686 
   3687 static struct raid_softc *
   3688 rf_auto_config_set(RF_ConfigSet_t *cset)
   3689 {
   3690 	RF_Raid_t *raidPtr;
   3691 	RF_Config_t *config;
   3692 	int raidID;
   3693 	struct raid_softc *sc;
   3694 
   3695 #ifdef DEBUG
   3696 	printf("RAID autoconfigure\n");
   3697 #endif
   3698 
   3699 	/* 1. Create a config structure */
   3700 	config = malloc(sizeof(*config), M_RAIDFRAME, M_WAITOK|M_ZERO);
   3701 
   3702 	/*
   3703 	   2. Figure out what RAID ID this one is supposed to live at
   3704 	   See if we can get the same RAID dev that it was configured
   3705 	   on last time..
   3706 	*/
   3707 
   3708 	raidID = cset->ac->clabel->last_unit;
   3709 	for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
   3710 	     sc = raidget(++raidID, false))
   3711 		continue;
   3712 #ifdef DEBUG
   3713 	printf("Configuring raid%d:\n",raidID);
   3714 #endif
   3715 
   3716 	if (sc == NULL)
   3717 		sc = raidget(raidID, true);
   3718 	raidPtr = &sc->sc_r;
   3719 
   3720 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3721 	raidPtr->softc = sc;
   3722 	raidPtr->raidid = raidID;
   3723 	raidPtr->openings = RAIDOUTSTANDING;
   3724 
   3725 	/* 3. Build the configuration structure */
   3726 	rf_create_configuration(cset->ac, config, raidPtr);
   3727 
   3728 	/* 4. Do the configuration */
   3729 	if (rf_Configure(raidPtr, config, cset->ac) == 0) {
   3730 		raidinit(sc);
   3731 
   3732 		rf_markalldirty(raidPtr);
   3733 		raidPtr->autoconfigure = 1; /* XXX do this here? */
   3734 		switch (cset->ac->clabel->root_partition) {
   3735 		case 1:	/* Force Root */
   3736 		case 2:	/* Soft Root: root when boot partition part of raid */
   3737 			/*
   3738 			 * everything configured just fine.  Make a note
   3739 			 * that this set is eligible to be root,
   3740 			 * or forced to be root
   3741 			 */
   3742 			cset->rootable = cset->ac->clabel->root_partition;
   3743 			/* XXX do this here? */
   3744 			raidPtr->root_partition = cset->rootable;
   3745 			break;
   3746 		default:
   3747 			break;
   3748 		}
   3749 	} else {
   3750 		raidput(sc);
   3751 		sc = NULL;
   3752 	}
   3753 
   3754 	/* 5. Cleanup */
   3755 	free(config, M_RAIDFRAME);
   3756 	return sc;
   3757 }
   3758 
   3759 void
   3760 rf_pool_init(RF_Raid_t *raidPtr, char *w_chan, struct pool *p, size_t size, const char *pool_name,
   3761 	     size_t xmin, size_t xmax)
   3762 {
   3763 
   3764 	/* Format: raid%d_foo */
   3765 	snprintf(w_chan, RF_MAX_POOLNAMELEN, "raid%d_%s", raidPtr->raidid, pool_name);
   3766 
   3767 	pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
   3768 	pool_sethiwat(p, xmax);
   3769 	pool_prime(p, xmin);
   3770 }
   3771 
   3772 
   3773 /*
   3774  * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
   3775  * to see if there is IO pending and if that IO could possibly be done
   3776  * for a given RAID set.  Returns 0 if IO is waiting and can be done, 1
   3777  * otherwise.
   3778  *
   3779  */
   3780 int
   3781 rf_buf_queue_check(RF_Raid_t *raidPtr)
   3782 {
   3783 	struct raid_softc *rs;
   3784 	struct dk_softc *dksc;
   3785 
   3786 	rs = raidPtr->softc;
   3787 	dksc = &rs->sc_dksc;
   3788 
   3789 	if ((rs->sc_flags & RAIDF_INITED) == 0)
   3790 		return 1;
   3791 
   3792 	if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
   3793 		/* there is work to do */
   3794 		return 0;
   3795 	}
   3796 	/* default is nothing to do */
   3797 	return 1;
   3798 }
   3799 
   3800 int
   3801 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
   3802 {
   3803 	uint64_t numsecs;
   3804 	unsigned secsize;
   3805 	int error;
   3806 
   3807 	error = getdisksize(vp, &numsecs, &secsize);
   3808 	if (error == 0) {
   3809 		diskPtr->blockSize = secsize;
   3810 		diskPtr->numBlocks = numsecs - rf_protectedSectors;
   3811 		diskPtr->partitionSize = numsecs;
   3812 		return 0;
   3813 	}
   3814 	return error;
   3815 }
   3816 
   3817 static int
   3818 raid_match(device_t self, cfdata_t cfdata, void *aux)
   3819 {
   3820 	return 1;
   3821 }
   3822 
   3823 static void
   3824 raid_attach(device_t parent, device_t self, void *aux)
   3825 {
   3826 }
   3827 
   3828 
   3829 static int
   3830 raid_detach(device_t self, int flags)
   3831 {
   3832 	int error;
   3833 	struct raid_softc *rs = raidsoftc(self);
   3834 
   3835 	if (rs == NULL)
   3836 		return ENXIO;
   3837 
   3838 	if ((error = raidlock(rs)) != 0)
   3839 		return error;
   3840 
   3841 	error = raid_detach_unlocked(rs);
   3842 
   3843 	raidunlock(rs);
   3844 
   3845 	/* XXX raid can be referenced here */
   3846 
   3847 	if (error)
   3848 		return error;
   3849 
   3850 	/* Free the softc */
   3851 	raidput(rs);
   3852 
   3853 	return 0;
   3854 }
   3855 
   3856 static void
   3857 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
   3858 {
   3859 	struct dk_softc *dksc = &rs->sc_dksc;
   3860 	struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
   3861 
   3862 	memset(dg, 0, sizeof(*dg));
   3863 
   3864 	dg->dg_secperunit = raidPtr->totalSectors;
   3865 	dg->dg_secsize = raidPtr->bytesPerSector;
   3866 	dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   3867 	dg->dg_ntracks = 4 * raidPtr->numCol;
   3868 
   3869 	disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
   3870 }
   3871 
   3872 /*
   3873  * Get cache info for all the components (including spares).
   3874  * Returns intersection of all the cache flags of all disks, or first
   3875  * error if any encountered.
   3876  * XXXfua feature flags can change as spares are added - lock down somehow
   3877  */
   3878 static int
   3879 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
   3880 {
   3881 	int c;
   3882 	int error;
   3883 	int dkwhole = 0, dkpart;
   3884 
   3885 	for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
   3886 		/*
   3887 		 * Check any non-dead disk, even when currently being
   3888 		 * reconstructed.
   3889 		 */
   3890 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   3891 			error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
   3892 			    DIOCGCACHE, &dkpart, FREAD, NOCRED);
   3893 			if (error) {
   3894 				if (error != ENODEV) {
   3895 					printf("raid%d: get cache for component %s failed\n",
   3896 					    raidPtr->raidid,
   3897 					    raidPtr->Disks[c].devname);
   3898 				}
   3899 
   3900 				return error;
   3901 			}
   3902 
   3903 			if (c == 0)
   3904 				dkwhole = dkpart;
   3905 			else
   3906 				dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
   3907 		}
   3908 	}
   3909 
   3910 	*data = dkwhole;
   3911 
   3912 	return 0;
   3913 }
   3914 
   3915 /*
   3916  * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
   3917  * We end up returning whatever error was returned by the first cache flush
   3918  * that fails.
   3919  */
   3920 
   3921 static int
   3922 rf_sync_component_cache(RF_Raid_t *raidPtr, int c, int force)
   3923 {
   3924 	int e = 0;
   3925 	for (int i = 0; i < 5; i++) {
   3926 		e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
   3927 		    &force, FWRITE, NOCRED);
   3928 		if (!e || e == ENODEV)
   3929 			return e;
   3930 		printf("raid%d: cache flush[%d] to component %s failed (%d)\n",
   3931 		    raidPtr->raidid, i, raidPtr->Disks[c].devname, e);
   3932 	}
   3933 	return e;
   3934 }
   3935 
   3936 int
   3937 rf_sync_component_caches(RF_Raid_t *raidPtr, int force)
   3938 {
   3939 	int c, error;
   3940 
   3941 	error = 0;
   3942 	for (c = 0; c < raidPtr->numCol; c++) {
   3943 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   3944 			int e = rf_sync_component_cache(raidPtr, c, force);
   3945 			if (e && !error)
   3946 				error = e;
   3947 		}
   3948 	}
   3949 
   3950 	for (c = 0; c < raidPtr->numSpare ; c++) {
   3951 		int sparecol = raidPtr->numCol + c;
   3952 		/* Need to ensure that the reconstruct actually completed! */
   3953 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3954 			int e = rf_sync_component_cache(raidPtr, sparecol,
   3955 			    force);
   3956 			if (e && !error)
   3957 				error = e;
   3958 		}
   3959 	}
   3960 	return error;
   3961 }
   3962 
   3963 /* Fill in info with the current status */
   3964 void
   3965 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
   3966 {
   3967 
   3968 	memset(info, 0, sizeof(*info));
   3969 
   3970 	if (raidPtr->status != rf_rs_reconstructing) {
   3971 		info->total = 100;
   3972 		info->completed = 100;
   3973 	} else {
   3974 		info->total = raidPtr->reconControl->numRUsTotal;
   3975 		info->completed = raidPtr->reconControl->numRUsComplete;
   3976 	}
   3977 	info->remaining = info->total - info->completed;
   3978 }
   3979 
   3980 /* Fill in info with the current status */
   3981 void
   3982 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
   3983 {
   3984 
   3985 	memset(info, 0, sizeof(*info));
   3986 
   3987 	if (raidPtr->parity_rewrite_in_progress == 1) {
   3988 		info->total = raidPtr->Layout.numStripe;
   3989 		info->completed = raidPtr->parity_rewrite_stripes_done;
   3990 	} else {
   3991 		info->completed = 100;
   3992 		info->total = 100;
   3993 	}
   3994 	info->remaining = info->total - info->completed;
   3995 }
   3996 
   3997 /* Fill in info with the current status */
   3998 void
   3999 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
   4000 {
   4001 
   4002 	memset(info, 0, sizeof(*info));
   4003 
   4004 	if (raidPtr->copyback_in_progress == 1) {
   4005 		info->total = raidPtr->Layout.numStripe;
   4006 		info->completed = raidPtr->copyback_stripes_done;
   4007 		info->remaining = info->total - info->completed;
   4008 	} else {
   4009 		info->remaining = 0;
   4010 		info->completed = 100;
   4011 		info->total = 100;
   4012 	}
   4013 }
   4014 
   4015 /* Fill in config with the current info */
   4016 int
   4017 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
   4018 {
   4019 	int	d, i, j;
   4020 
   4021 	if (!raidPtr->valid)
   4022 		return ENODEV;
   4023 	config->cols = raidPtr->numCol;
   4024 	config->ndevs = raidPtr->numCol;
   4025 	if (config->ndevs >= RF_MAX_DISKS)
   4026 		return ENOMEM;
   4027 	config->nspares = raidPtr->numSpare;
   4028 	if (config->nspares >= RF_MAX_DISKS)
   4029 		return ENOMEM;
   4030 	config->maxqdepth = raidPtr->maxQueueDepth;
   4031 	d = 0;
   4032 	for (j = 0; j < config->cols; j++) {
   4033 		config->devs[d] = raidPtr->Disks[j];
   4034 		d++;
   4035 	}
   4036 	for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
   4037 		config->spares[i] = raidPtr->Disks[j];
   4038 		if (config->spares[i].status == rf_ds_rebuilding_spare) {
   4039 			/* XXX: raidctl(8) expects to see this as a used spare */
   4040 			config->spares[i].status = rf_ds_used_spare;
   4041 		}
   4042 	}
   4043 	return 0;
   4044 }
   4045 
   4046 int
   4047 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
   4048 {
   4049 	RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
   4050 	RF_ComponentLabel_t *raid_clabel;
   4051 	int column = clabel->column;
   4052 
   4053 	if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
   4054 		return EINVAL;
   4055 	raid_clabel = raidget_component_label(raidPtr, column);
   4056 	memcpy(clabel, raid_clabel, sizeof *clabel);
   4057 	/* Fix-up for userland. */
   4058 	if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION))
   4059 		clabel->version = RF_COMPONENT_LABEL_VERSION;
   4060 
   4061 	return 0;
   4062 }
   4063 
   4064 /*
   4065  * Module interface
   4066  */
   4067 
   4068 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
   4069 
   4070 #ifdef _MODULE
   4071 CFDRIVER_DECL(raid, DV_DISK, NULL);
   4072 #endif
   4073 
   4074 static int raid_modcmd(modcmd_t, void *);
   4075 static int raid_modcmd_init(void);
   4076 static int raid_modcmd_fini(void);
   4077 
   4078 static int
   4079 raid_modcmd(modcmd_t cmd, void *data)
   4080 {
   4081 	int error;
   4082 
   4083 	error = 0;
   4084 	switch (cmd) {
   4085 	case MODULE_CMD_INIT:
   4086 		error = raid_modcmd_init();
   4087 		break;
   4088 	case MODULE_CMD_FINI:
   4089 		error = raid_modcmd_fini();
   4090 		break;
   4091 	default:
   4092 		error = ENOTTY;
   4093 		break;
   4094 	}
   4095 	return error;
   4096 }
   4097 
   4098 static int
   4099 raid_modcmd_init(void)
   4100 {
   4101 	int error;
   4102 	int bmajor, cmajor;
   4103 
   4104 	mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
   4105 	mutex_enter(&raid_lock);
   4106 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   4107 	rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
   4108 	rf_init_cond2(rf_sparet_wait_cv, "sparetw");
   4109 	rf_init_cond2(rf_sparet_resp_cv, "rfgst");
   4110 
   4111 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
   4112 #endif
   4113 
   4114 	bmajor = cmajor = -1;
   4115 	error = devsw_attach("raid", &raid_bdevsw, &bmajor,
   4116 	    &raid_cdevsw, &cmajor);
   4117 	if (error != 0 && error != EEXIST) {
   4118 		aprint_error("%s: devsw_attach failed %d\n", __func__, error);
   4119 		mutex_exit(&raid_lock);
   4120 		return error;
   4121 	}
   4122 #ifdef _MODULE
   4123 	error = config_cfdriver_attach(&raid_cd);
   4124 	if (error != 0) {
   4125 		aprint_error("%s: config_cfdriver_attach failed %d\n",
   4126 		    __func__, error);
   4127 		devsw_detach(&raid_bdevsw, &raid_cdevsw);
   4128 		mutex_exit(&raid_lock);
   4129 		return error;
   4130 	}
   4131 #endif
   4132 	error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
   4133 	if (error != 0) {
   4134 		aprint_error("%s: config_cfattach_attach failed %d\n",
   4135 		    __func__, error);
   4136 #ifdef _MODULE
   4137 		config_cfdriver_detach(&raid_cd);
   4138 #endif
   4139 		devsw_detach(&raid_bdevsw, &raid_cdevsw);
   4140 		mutex_exit(&raid_lock);
   4141 		return error;
   4142 	}
   4143 
   4144 	raidautoconfigdone = false;
   4145 
   4146 	mutex_exit(&raid_lock);
   4147 
   4148 	if (error == 0) {
   4149 		if (rf_BootRaidframe(true) == 0)
   4150 			aprint_verbose("Kernelized RAIDframe activated\n");
   4151 		else
   4152 			panic("Serious error activating RAID!!");
   4153 	}
   4154 
   4155 	/*
   4156 	 * Register a finalizer which will be used to auto-config RAID
   4157 	 * sets once all real hardware devices have been found.
   4158 	 */
   4159 	error = config_finalize_register(NULL, rf_autoconfig);
   4160 	if (error != 0) {
   4161 		aprint_error("WARNING: unable to register RAIDframe "
   4162 		    "finalizer\n");
   4163 		error = 0;
   4164 	}
   4165 
   4166 	return error;
   4167 }
   4168 
   4169 static int
   4170 raid_modcmd_fini(void)
   4171 {
   4172 	int error;
   4173 
   4174 	mutex_enter(&raid_lock);
   4175 
   4176 	/* Don't allow unload if raid device(s) exist.  */
   4177 	if (!LIST_EMPTY(&raids)) {
   4178 		mutex_exit(&raid_lock);
   4179 		return EBUSY;
   4180 	}
   4181 
   4182 	error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
   4183 	if (error != 0) {
   4184 		aprint_error("%s: cannot detach cfattach\n",__func__);
   4185 		mutex_exit(&raid_lock);
   4186 		return error;
   4187 	}
   4188 #ifdef _MODULE
   4189 	error = config_cfdriver_detach(&raid_cd);
   4190 	if (error != 0) {
   4191 		aprint_error("%s: cannot detach cfdriver\n",__func__);
   4192 		config_cfattach_attach(raid_cd.cd_name, &raid_ca);
   4193 		mutex_exit(&raid_lock);
   4194 		return error;
   4195 	}
   4196 #endif
   4197 	devsw_detach(&raid_bdevsw, &raid_cdevsw);
   4198 	rf_BootRaidframe(false);
   4199 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   4200 	rf_destroy_mutex2(rf_sparet_wait_mutex);
   4201 	rf_destroy_cond2(rf_sparet_wait_cv);
   4202 	rf_destroy_cond2(rf_sparet_resp_cv);
   4203 #endif
   4204 	mutex_exit(&raid_lock);
   4205 	mutex_destroy(&raid_lock);
   4206 
   4207 	return error;
   4208 }
   4209