Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.410.4.4
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.410.4.4 2024/04/28 12:09:08 martin Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Greg Oster; Jason R. Thorpe.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Copyright (c) 1988 University of Utah.
     34  * Copyright (c) 1990, 1993
     35  *      The Regents of the University of California.  All rights reserved.
     36  *
     37  * This code is derived from software contributed to Berkeley by
     38  * the Systems Programming Group of the University of Utah Computer
     39  * Science Department.
     40  *
     41  * Redistribution and use in source and binary forms, with or without
     42  * modification, are permitted provided that the following conditions
     43  * are met:
     44  * 1. Redistributions of source code must retain the above copyright
     45  *    notice, this list of conditions and the following disclaimer.
     46  * 2. Redistributions in binary form must reproduce the above copyright
     47  *    notice, this list of conditions and the following disclaimer in the
     48  *    documentation and/or other materials provided with the distribution.
     49  * 3. Neither the name of the University nor the names of its contributors
     50  *    may be used to endorse or promote products derived from this software
     51  *    without specific prior written permission.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     63  * SUCH DAMAGE.
     64  *
     65  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     66  *
     67  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     68  */
     69 
     70 /*
     71  * Copyright (c) 1995 Carnegie-Mellon University.
     72  * All rights reserved.
     73  *
     74  * Authors: Mark Holland, Jim Zelenka
     75  *
     76  * Permission to use, copy, modify and distribute this software and
     77  * its documentation is hereby granted, provided that both the copyright
     78  * notice and this permission notice appear in all copies of the
     79  * software, derivative works or modified versions, and any portions
     80  * thereof, and that both notices appear in supporting documentation.
     81  *
     82  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     83  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     84  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     85  *
     86  * Carnegie Mellon requests users of this software to return to
     87  *
     88  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     89  *  School of Computer Science
     90  *  Carnegie Mellon University
     91  *  Pittsburgh PA 15213-3890
     92  *
     93  * any improvements or extensions that they make and grant Carnegie the
     94  * rights to redistribute these changes.
     95  */
     96 
     97 /***********************************************************
     98  *
     99  * rf_kintf.c -- the kernel interface routines for RAIDframe
    100  *
    101  ***********************************************************/
    102 
    103 #include <sys/cdefs.h>
    104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.410.4.4 2024/04/28 12:09:08 martin Exp $");
    105 
    106 #ifdef _KERNEL_OPT
    107 #include "opt_raid_autoconfig.h"
    108 #include "opt_compat_netbsd32.h"
    109 #endif
    110 
    111 #include <sys/param.h>
    112 #include <sys/errno.h>
    113 #include <sys/pool.h>
    114 #include <sys/proc.h>
    115 #include <sys/queue.h>
    116 #include <sys/disk.h>
    117 #include <sys/device.h>
    118 #include <sys/stat.h>
    119 #include <sys/ioctl.h>
    120 #include <sys/fcntl.h>
    121 #include <sys/systm.h>
    122 #include <sys/vnode.h>
    123 #include <sys/disklabel.h>
    124 #include <sys/conf.h>
    125 #include <sys/buf.h>
    126 #include <sys/bufq.h>
    127 #include <sys/reboot.h>
    128 #include <sys/kauth.h>
    129 #include <sys/module.h>
    130 #include <sys/compat_stub.h>
    131 
    132 #include <prop/proplib.h>
    133 
    134 #include <dev/raidframe/raidframevar.h>
    135 #include <dev/raidframe/raidframeio.h>
    136 #include <dev/raidframe/rf_paritymap.h>
    137 
    138 #include "rf_raid.h"
    139 #include "rf_copyback.h"
    140 #include "rf_dag.h"
    141 #include "rf_dagflags.h"
    142 #include "rf_desc.h"
    143 #include "rf_diskqueue.h"
    144 #include "rf_etimer.h"
    145 #include "rf_general.h"
    146 #include "rf_kintf.h"
    147 #include "rf_options.h"
    148 #include "rf_driver.h"
    149 #include "rf_parityscan.h"
    150 #include "rf_threadstuff.h"
    151 
    152 #include "ioconf.h"
    153 
    154 #ifdef DEBUG
    155 int     rf_kdebug_level = 0;
    156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    157 #else				/* DEBUG */
    158 #define db1_printf(a) { }
    159 #endif				/* DEBUG */
    160 
    161 #define DEVICE_XNAME(dev) dev ? device_xname(dev) : "null"
    162 
    163 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
    164 static rf_declare_mutex2(rf_sparet_wait_mutex);
    165 static rf_declare_cond2(rf_sparet_wait_cv);
    166 static rf_declare_cond2(rf_sparet_resp_cv);
    167 
    168 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    169 						 * spare table */
    170 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    171 						 * installation process */
    172 #endif
    173 
    174 const int rf_b_pass = (B_PHYS|B_RAW|B_MEDIA_FLAGS);
    175 
    176 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
    177 
    178 /* prototypes */
    179 static void KernelWakeupFunc(struct buf *);
    180 static void InitBP(struct buf *, struct vnode *, unsigned,
    181     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
    182     void *, int);
    183 static void raidinit(struct raid_softc *);
    184 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
    185 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
    186 
    187 static int raid_match(device_t, cfdata_t, void *);
    188 static void raid_attach(device_t, device_t, void *);
    189 static int raid_detach(device_t, int);
    190 
    191 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
    192     daddr_t, daddr_t);
    193 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
    194     daddr_t, daddr_t, int);
    195 
    196 static int raidwrite_component_label(unsigned,
    197     dev_t, struct vnode *, RF_ComponentLabel_t *);
    198 static int raidread_component_label(unsigned,
    199     dev_t, struct vnode *, RF_ComponentLabel_t *);
    200 
    201 static int raid_diskstart(device_t, struct buf *bp);
    202 static int raid_dumpblocks(device_t, void *, daddr_t, int);
    203 static int raid_lastclose(device_t);
    204 
    205 static dev_type_open(raidopen);
    206 static dev_type_close(raidclose);
    207 static dev_type_read(raidread);
    208 static dev_type_write(raidwrite);
    209 static dev_type_ioctl(raidioctl);
    210 static dev_type_strategy(raidstrategy);
    211 static dev_type_dump(raiddump);
    212 static dev_type_size(raidsize);
    213 
    214 const struct bdevsw raid_bdevsw = {
    215 	.d_open = raidopen,
    216 	.d_close = raidclose,
    217 	.d_strategy = raidstrategy,
    218 	.d_ioctl = raidioctl,
    219 	.d_dump = raiddump,
    220 	.d_psize = raidsize,
    221 	.d_discard = nodiscard,
    222 	.d_flag = D_DISK
    223 };
    224 
    225 const struct cdevsw raid_cdevsw = {
    226 	.d_open = raidopen,
    227 	.d_close = raidclose,
    228 	.d_read = raidread,
    229 	.d_write = raidwrite,
    230 	.d_ioctl = raidioctl,
    231 	.d_stop = nostop,
    232 	.d_tty = notty,
    233 	.d_poll = nopoll,
    234 	.d_mmap = nommap,
    235 	.d_kqfilter = nokqfilter,
    236 	.d_discard = nodiscard,
    237 	.d_flag = D_DISK
    238 };
    239 
    240 static struct dkdriver rf_dkdriver = {
    241 	.d_open = raidopen,
    242 	.d_close = raidclose,
    243 	.d_strategy = raidstrategy,
    244 	.d_diskstart = raid_diskstart,
    245 	.d_dumpblocks = raid_dumpblocks,
    246 	.d_lastclose = raid_lastclose,
    247 	.d_minphys = minphys
    248 };
    249 
    250 #define	raidunit(x)	DISKUNIT(x)
    251 #define	raidsoftc(dev)	(((struct raid_softc *)device_private(dev))->sc_r.softc)
    252 
    253 extern struct cfdriver raid_cd;
    254 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
    255     raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
    256     DVF_DETACH_SHUTDOWN);
    257 
    258 /* Internal representation of a rf_recon_req */
    259 struct rf_recon_req_internal {
    260 	RF_RowCol_t col;
    261 	RF_ReconReqFlags_t flags;
    262 	void   *raidPtr;
    263 };
    264 
    265 /*
    266  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    267  * Be aware that large numbers can allow the driver to consume a lot of
    268  * kernel memory, especially on writes, and in degraded mode reads.
    269  *
    270  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    271  * a single 64K write will typically require 64K for the old data,
    272  * 64K for the old parity, and 64K for the new parity, for a total
    273  * of 192K (if the parity buffer is not re-used immediately).
    274  * Even it if is used immediately, that's still 128K, which when multiplied
    275  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    276  *
    277  * Now in degraded mode, for example, a 64K read on the above setup may
    278  * require data reconstruction, which will require *all* of the 4 remaining
    279  * disks to participate -- 4 * 32K/disk == 128K again.
    280  */
    281 
    282 #ifndef RAIDOUTSTANDING
    283 #define RAIDOUTSTANDING   6
    284 #endif
    285 
    286 #define RAIDLABELDEV(dev)	\
    287 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    288 
    289 /* declared here, and made public, for the benefit of KVM stuff.. */
    290 
    291 static int raidlock(struct raid_softc *);
    292 static void raidunlock(struct raid_softc *);
    293 
    294 static int raid_detach_unlocked(struct raid_softc *);
    295 
    296 static void rf_markalldirty(RF_Raid_t *);
    297 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
    298 
    299 static void rf_ReconThread(struct rf_recon_req_internal *);
    300 static void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    301 static void rf_CopybackThread(RF_Raid_t *raidPtr);
    302 static void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
    303 static int rf_autoconfig(device_t);
    304 static int rf_rescan(void);
    305 static void rf_buildroothack(RF_ConfigSet_t *);
    306 
    307 static RF_AutoConfig_t *rf_find_raid_components(void);
    308 static RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    309 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    310 static void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    311 static int rf_set_autoconfig(RF_Raid_t *, int);
    312 static int rf_set_rootpartition(RF_Raid_t *, int);
    313 static void rf_release_all_vps(RF_ConfigSet_t *);
    314 static void rf_cleanup_config_set(RF_ConfigSet_t *);
    315 static int rf_have_enough_components(RF_ConfigSet_t *);
    316 static struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
    317 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
    318 
    319 /*
    320  * Debugging, mostly.  Set to 0 to not allow autoconfig to take place.
    321  * Note that this is overridden by having RAID_AUTOCONFIG as an option
    322  * in the kernel config file.
    323  */
    324 #ifdef RAID_AUTOCONFIG
    325 int raidautoconfig = 1;
    326 #else
    327 int raidautoconfig = 0;
    328 #endif
    329 static bool raidautoconfigdone = false;
    330 
    331 struct pool rf_alloclist_pool;   /* AllocList */
    332 
    333 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
    334 static kmutex_t raid_lock;
    335 
    336 static struct raid_softc *
    337 raidcreate(int unit) {
    338 	struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
    339 	sc->sc_unit = unit;
    340 	cv_init(&sc->sc_cv, "raidunit");
    341 	mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
    342 	return sc;
    343 }
    344 
    345 static void
    346 raiddestroy(struct raid_softc *sc) {
    347 	cv_destroy(&sc->sc_cv);
    348 	mutex_destroy(&sc->sc_mutex);
    349 	kmem_free(sc, sizeof(*sc));
    350 }
    351 
    352 static struct raid_softc *
    353 raidget(int unit, bool create) {
    354 	struct raid_softc *sc;
    355 	if (unit < 0) {
    356 #ifdef DIAGNOSTIC
    357 		panic("%s: unit %d!", __func__, unit);
    358 #endif
    359 		return NULL;
    360 	}
    361 	mutex_enter(&raid_lock);
    362 	LIST_FOREACH(sc, &raids, sc_link) {
    363 		if (sc->sc_unit == unit) {
    364 			mutex_exit(&raid_lock);
    365 			return sc;
    366 		}
    367 	}
    368 	mutex_exit(&raid_lock);
    369 	if (!create)
    370 		return NULL;
    371 	sc = raidcreate(unit);
    372 	mutex_enter(&raid_lock);
    373 	LIST_INSERT_HEAD(&raids, sc, sc_link);
    374 	mutex_exit(&raid_lock);
    375 	return sc;
    376 }
    377 
    378 static void
    379 raidput(struct raid_softc *sc) {
    380 	mutex_enter(&raid_lock);
    381 	LIST_REMOVE(sc, sc_link);
    382 	mutex_exit(&raid_lock);
    383 	raiddestroy(sc);
    384 }
    385 
    386 void
    387 raidattach(int num)
    388 {
    389 
    390 	/*
    391 	 * Device attachment and associated initialization now occurs
    392 	 * as part of the module initialization.
    393 	 */
    394 }
    395 
    396 static int
    397 rf_autoconfig(device_t self)
    398 {
    399 	RF_AutoConfig_t *ac_list;
    400 	RF_ConfigSet_t *config_sets;
    401 
    402 	if (!raidautoconfig || raidautoconfigdone == true)
    403 		return 0;
    404 
    405 	/* XXX This code can only be run once. */
    406 	raidautoconfigdone = true;
    407 
    408 #ifdef __HAVE_CPU_BOOTCONF
    409 	/*
    410 	 * 0. find the boot device if needed first so we can use it later
    411 	 * this needs to be done before we autoconfigure any raid sets,
    412 	 * because if we use wedges we are not going to be able to open
    413 	 * the boot device later
    414 	 */
    415 	if (booted_device == NULL)
    416 		cpu_bootconf();
    417 #endif
    418 	/* 1. locate all RAID components on the system */
    419 	aprint_debug("Searching for RAID components...\n");
    420 	ac_list = rf_find_raid_components();
    421 
    422 	/* 2. Sort them into their respective sets. */
    423 	config_sets = rf_create_auto_sets(ac_list);
    424 
    425 	/*
    426 	 * 3. Evaluate each set and configure the valid ones.
    427 	 * This gets done in rf_buildroothack().
    428 	 */
    429 	rf_buildroothack(config_sets);
    430 
    431 	return 1;
    432 }
    433 
    434 int
    435 rf_inited(const struct raid_softc *rs) {
    436 	return (rs->sc_flags & RAIDF_INITED) != 0;
    437 }
    438 
    439 RF_Raid_t *
    440 rf_get_raid(struct raid_softc *rs) {
    441 	return &rs->sc_r;
    442 }
    443 
    444 int
    445 rf_get_unit(const struct raid_softc *rs) {
    446 	return rs->sc_unit;
    447 }
    448 
    449 static int
    450 rf_containsboot(RF_Raid_t *r, device_t bdv) {
    451 	const char *bootname;
    452 	size_t len;
    453 
    454 	/* if bdv is NULL, the set can't contain it. exit early. */
    455 	if (bdv == NULL)
    456 		return 0;
    457 
    458 	bootname = device_xname(bdv);
    459 	len = strlen(bootname);
    460 
    461 	for (int col = 0; col < r->numCol; col++) {
    462 		const char *devname = r->Disks[col].devname;
    463 		devname += sizeof("/dev/") - 1;
    464 		if (strncmp(devname, "dk", 2) == 0) {
    465 			const char *parent =
    466 			    dkwedge_get_parent_name(r->Disks[col].dev);
    467 			if (parent != NULL)
    468 				devname = parent;
    469 		}
    470 		if (strncmp(devname, bootname, len) == 0) {
    471 			struct raid_softc *sc = r->softc;
    472 			aprint_debug("raid%d includes boot device %s\n",
    473 			    sc->sc_unit, devname);
    474 			return 1;
    475 		}
    476 	}
    477 	return 0;
    478 }
    479 
    480 static int
    481 rf_rescan(void)
    482 {
    483 	RF_AutoConfig_t *ac_list;
    484 	RF_ConfigSet_t *config_sets, *cset, *next_cset;
    485 	struct raid_softc *sc;
    486 	int raid_added;
    487 
    488 	ac_list = rf_find_raid_components();
    489 	config_sets = rf_create_auto_sets(ac_list);
    490 
    491 	raid_added = 1;
    492 	while (raid_added > 0) {
    493 		raid_added = 0;
    494 		cset = config_sets;
    495 		while (cset != NULL) {
    496 			next_cset = cset->next;
    497 			if (rf_have_enough_components(cset) &&
    498 			    cset->ac->clabel->autoconfigure == 1) {
    499 				sc = rf_auto_config_set(cset);
    500 				if (sc != NULL) {
    501 					aprint_debug("raid%d: configured ok, rootable %d\n",
    502 						     sc->sc_unit, cset->rootable);
    503 					/* We added one RAID set */
    504 					raid_added++;
    505 				} else {
    506 					/* The autoconfig didn't work :( */
    507 					aprint_debug("Autoconfig failed\n");
    508 					rf_release_all_vps(cset);
    509 				}
    510 			} else {
    511 				/* we're not autoconfiguring this set...
    512 				   release the associated resources */
    513 				rf_release_all_vps(cset);
    514 			}
    515 			/* cleanup */
    516 			rf_cleanup_config_set(cset);
    517 			cset = next_cset;
    518 		}
    519 		if (raid_added > 0) {
    520 			/* We added at least one RAID set, so re-scan for recursive RAID */
    521 			ac_list = rf_find_raid_components();
    522 			config_sets = rf_create_auto_sets(ac_list);
    523 		}
    524 	}
    525 
    526 	return 0;
    527 }
    528 
    529 /*
    530  * Example setup:
    531  * dk1 at wd0: "raid@wd0", 171965 blocks at 32802, type: raidframe
    532  * dk3 at wd1: "raid@wd1", 171965 blocks at 32802, type: raidframz
    533  * raid1: Components: /dev/dk1 /dev/dk3
    534  * dk4 at raid1: "empty@raid1", 8192 blocks at 34, type: msdos
    535  * dk5 at raid1: "root@raid1", 163517 blocks at 8226, type: ffs
    536  *
    537  * If booted from wd0, booted_device will be
    538  * disk wd0, startblk = 41092, nblks = 163517
    539  *
    540  * That is, dk5 with startblk computed from the beginning of wd0
    541  * instead of beginning of raid1:
    542  * 32802 + 64 (RF_PROTECTED_SECTORS) + 8226 = 41092
    543  *
    544  * In order to find the boot wedge, we must iterate on each component,
    545  * find its offset from disk beginning, abd look for the boot wedge with
    546  * startblck adjusted.
    547  */
    548 static device_t
    549 rf_find_bootwedge(struct raid_softc *rsc)
    550 {
    551 	RF_Raid_t *r = &rsc->sc_r;
    552 	const char *bootname;
    553 	size_t len;
    554 	device_t rdev = NULL;
    555 
    556 	if (booted_device == NULL)
    557 		goto out;
    558 
    559 	bootname = device_xname(booted_device);
    560 	len = strlen(bootname);
    561 
    562 	aprint_debug("%s: booted_device %s, startblk = %"PRId64", "
    563 		     "nblks = %"PRId64"\n", __func__,
    564 		     bootname, booted_startblk, booted_nblks);
    565 
    566 	for (int col = 0; col < r->numCol; col++) {
    567 		const char *devname = r->Disks[col].devname;
    568 		const char *parent;
    569 		struct disk *dk;
    570 		u_int nwedges;
    571 		struct dkwedge_info *dkwi;
    572 		struct dkwedge_list dkwl;
    573 		size_t dkwi_len;
    574 		int i;
    575 
    576 		devname += sizeof("/dev/") - 1;
    577 		if (strncmp(devname, "dk", 2) != 0)
    578 			continue;
    579 
    580 		parent = dkwedge_get_parent_name(r->Disks[col].dev);
    581 		if (parent == NULL) {
    582 			aprint_debug("%s: cannot find parent for "
    583 				     "component /dev/%s", __func__, devname);
    584 			continue;
    585 		}
    586 
    587 		if (strncmp(parent, bootname, len) != 0)
    588 			continue;
    589 
    590 		aprint_debug("%s: looking up wedge %s in device %s\n",
    591 			     __func__, devname, parent);
    592 
    593 		dk = disk_find(parent);
    594 		nwedges = dk->dk_nwedges;
    595 		dkwi_len = sizeof(*dkwi) * nwedges;
    596 		dkwi = RF_Malloc(dkwi_len);
    597 
    598 		dkwl.dkwl_buf = dkwi;
    599 		dkwl.dkwl_bufsize = dkwi_len;
    600 		dkwl.dkwl_nwedges = 0;
    601 		dkwl.dkwl_ncopied = 0;
    602 
    603 		if (dkwedge_list(dk, &dkwl, curlwp) == 0) {
    604 			daddr_t startblk;
    605 
    606 			for (i = 0; i < dkwl.dkwl_ncopied; i++) {
    607 				if (strcmp(dkwi[i].dkw_devname, devname) == 0)
    608 					break;
    609 			}
    610 
    611 			KASSERT(i < dkwl.dkwl_ncopied);
    612 
    613 			aprint_debug("%s: wedge %s, "
    614 				     "startblk = %"PRId64", "
    615 				     "nblks = %"PRId64"\n",
    616 				     __func__,
    617 				     dkwi[i].dkw_devname,
    618 				     dkwi[i].dkw_offset,
    619 				     dkwi[i].dkw_size);
    620 
    621 			startblk = booted_startblk
    622 				 - dkwi[i].dkw_offset
    623 				 - RF_PROTECTED_SECTORS;
    624 
    625 			aprint_debug("%s: looking for wedge in %s, "
    626 				     "startblk = %"PRId64", "
    627 				     "nblks = %"PRId64"\n",
    628 				     __func__,
    629 				     DEVICE_XNAME(rsc->sc_dksc.sc_dev),
    630 				     startblk, booted_nblks);
    631 
    632 			rdev = dkwedge_find_partition(rsc->sc_dksc.sc_dev,
    633 						      startblk,
    634 						      booted_nblks);
    635 			if (rdev) {
    636 				aprint_debug("%s: root candidate wedge %s "
    637 					     "shifted from %s\n", __func__,
    638 					     device_xname(rdev),
    639 					     dkwi[i].dkw_devname);
    640 				goto done;
    641 			} else {
    642 				aprint_debug("%s: not found\n", __func__);
    643 			}
    644 		}
    645 
    646 		aprint_debug("%s: nothing found for col %d\n", __func__, col);
    647 done:
    648 		RF_Free(dkwi, dkwi_len);
    649 	}
    650 
    651 out:
    652 	if (!rdev)
    653 		aprint_debug("%s: nothing found\n", __func__);
    654 
    655 	return rdev;
    656 }
    657 
    658 static void
    659 rf_buildroothack(RF_ConfigSet_t *config_sets)
    660 {
    661 	RF_AutoConfig_t *ac_list;
    662 	RF_ConfigSet_t *cset;
    663 	RF_ConfigSet_t *next_cset;
    664 	int num_root;
    665 	int raid_added;
    666 	struct raid_softc *sc, *rsc;
    667 	struct dk_softc *dksc = NULL;	/* XXX gcc -Os: may be used uninit. */
    668 
    669 	sc = rsc = NULL;
    670 	num_root = 0;
    671 
    672 	raid_added = 1;
    673 	while (raid_added > 0) {
    674 		raid_added = 0;
    675 		cset = config_sets;
    676 		while (cset != NULL) {
    677 			next_cset = cset->next;
    678 			if (rf_have_enough_components(cset) &&
    679 			    cset->ac->clabel->autoconfigure == 1) {
    680 				sc = rf_auto_config_set(cset);
    681 				if (sc != NULL) {
    682 					aprint_debug("raid%d: configured ok, rootable %d\n",
    683 						     sc->sc_unit, cset->rootable);
    684 					/* We added one RAID set */
    685 					raid_added++;
    686 					if (cset->rootable) {
    687 						rsc = sc;
    688 						num_root++;
    689 					}
    690 				} else {
    691 					/* The autoconfig didn't work :( */
    692 					aprint_debug("Autoconfig failed\n");
    693 					rf_release_all_vps(cset);
    694 				}
    695 			} else {
    696 				/* we're not autoconfiguring this set...
    697 				   release the associated resources */
    698 				rf_release_all_vps(cset);
    699 			}
    700 			/* cleanup */
    701 			rf_cleanup_config_set(cset);
    702 			cset = next_cset;
    703 		}
    704 		if (raid_added > 0) {
    705 			/* We added at least one RAID set, so re-scan for recursive RAID */
    706 			ac_list = rf_find_raid_components();
    707 			config_sets = rf_create_auto_sets(ac_list);
    708 		}
    709 	}
    710 
    711 	/* if the user has specified what the root device should be
    712 	   then we don't touch booted_device or boothowto... */
    713 
    714 	if (rootspec != NULL) {
    715 		aprint_debug("%s: rootspec %s\n", __func__, rootspec);
    716 		return;
    717 	}
    718 
    719 	/* we found something bootable... */
    720 	if (num_root == 1) {
    721 		device_t candidate_root = NULL;
    722 		dksc = &rsc->sc_dksc;
    723 
    724 		if (dksc->sc_dkdev.dk_nwedges != 0) {
    725 
    726 			/* Find the wedge we booted from */
    727 			candidate_root = rf_find_bootwedge(rsc);
    728 
    729 			/* Try first partition */
    730 			if (candidate_root == NULL) {
    731 				size_t i = 0;
    732 				candidate_root = dkwedge_find_by_parent(
    733 				    device_xname(dksc->sc_dev), &i);
    734 			}
    735 			aprint_debug("%s: candidate wedge root %s\n",
    736 			    __func__, DEVICE_XNAME(candidate_root));
    737 		} else {
    738 			candidate_root = dksc->sc_dev;
    739 		}
    740 
    741 		aprint_debug("%s: candidate root = %s, booted_device = %s, "
    742 			     "root_partition = %d, contains_boot=%d\n",
    743 		    __func__, DEVICE_XNAME(candidate_root),
    744 		    DEVICE_XNAME(booted_device), rsc->sc_r.root_partition,
    745 		    rf_containsboot(&rsc->sc_r, booted_device));
    746 
    747 		/* XXX the check for booted_device == NULL can probably be
    748 		 * dropped, now that rf_containsboot handles that case.
    749 		 */
    750 		if (booted_device == NULL ||
    751 		    rsc->sc_r.root_partition == 1 ||
    752 		    rf_containsboot(&rsc->sc_r, booted_device)) {
    753 			booted_device = candidate_root;
    754 			booted_method = "raidframe/single";
    755 			booted_partition = 0;	/* XXX assume 'a' */
    756 			aprint_debug("%s: set booted_device = %s\n", __func__,
    757 			    DEVICE_XNAME(booted_device));
    758 		}
    759 	} else if (num_root > 1) {
    760 		aprint_debug("%s: many roots=%d, %s\n", __func__, num_root,
    761 		    DEVICE_XNAME(booted_device));
    762 
    763 		/*
    764 		 * Maybe the MD code can help. If it cannot, then
    765 		 * setroot() will discover that we have no
    766 		 * booted_device and will ask the user if nothing was
    767 		 * hardwired in the kernel config file
    768 		 */
    769 		if (booted_device == NULL)
    770 			return;
    771 
    772 		num_root = 0;
    773 		mutex_enter(&raid_lock);
    774 		LIST_FOREACH(sc, &raids, sc_link) {
    775 			RF_Raid_t *r = &sc->sc_r;
    776 			if (r->valid == 0)
    777 				continue;
    778 
    779 			if (r->root_partition == 0)
    780 				continue;
    781 
    782 			if (rf_containsboot(r, booted_device)) {
    783 				num_root++;
    784 				rsc = sc;
    785 				dksc = &rsc->sc_dksc;
    786 			}
    787 		}
    788 		mutex_exit(&raid_lock);
    789 
    790 		if (num_root == 1) {
    791 			booted_device = dksc->sc_dev;
    792 			booted_method = "raidframe/multi";
    793 			booted_partition = 0;	/* XXX assume 'a' */
    794 		} else {
    795 			/* we can't guess.. require the user to answer... */
    796 			boothowto |= RB_ASKNAME;
    797 		}
    798 	}
    799 }
    800 
    801 static int
    802 raidsize(dev_t dev)
    803 {
    804 	struct raid_softc *rs;
    805 	struct dk_softc *dksc;
    806 	unsigned int unit;
    807 
    808 	unit = raidunit(dev);
    809 	if ((rs = raidget(unit, false)) == NULL)
    810 		return -1;
    811 	dksc = &rs->sc_dksc;
    812 
    813 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    814 		return -1;
    815 
    816 	return dk_size(dksc, dev);
    817 }
    818 
    819 static int
    820 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
    821 {
    822 	unsigned int unit;
    823 	struct raid_softc *rs;
    824 	struct dk_softc *dksc;
    825 
    826 	unit = raidunit(dev);
    827 	if ((rs = raidget(unit, false)) == NULL)
    828 		return ENXIO;
    829 	dksc = &rs->sc_dksc;
    830 
    831 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    832 		return ENODEV;
    833 
    834         /*
    835            Note that blkno is relative to this particular partition.
    836            By adding adding RF_PROTECTED_SECTORS, we get a value that
    837 	   is relative to the partition used for the underlying component.
    838         */
    839 	blkno += RF_PROTECTED_SECTORS;
    840 
    841 	return dk_dump(dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE);
    842 }
    843 
    844 static int
    845 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
    846 {
    847 	struct raid_softc *rs = raidsoftc(dev);
    848 	const struct bdevsw *bdev;
    849 	RF_Raid_t *raidPtr;
    850 	int     c, sparecol, j, scol, dumpto;
    851 	int     error = 0;
    852 
    853 	raidPtr = &rs->sc_r;
    854 
    855 	/* we only support dumping to RAID 1 sets */
    856 	if (raidPtr->Layout.numDataCol != 1 ||
    857 	    raidPtr->Layout.numParityCol != 1)
    858 		return EINVAL;
    859 
    860 	if ((error = raidlock(rs)) != 0)
    861 		return error;
    862 
    863 	/* figure out what device is alive.. */
    864 
    865 	/*
    866 	   Look for a component to dump to.  The preference for the
    867 	   component to dump to is as follows:
    868 	   1) the first component
    869 	   2) a used_spare of the first component
    870 	   3) the second component
    871 	   4) a used_spare of the second component
    872 	*/
    873 
    874 	dumpto = -1;
    875 	for (c = 0; c < raidPtr->numCol; c++) {
    876 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
    877 			/* this might be the one */
    878 			dumpto = c;
    879 			break;
    880 		}
    881 	}
    882 
    883 	/*
    884 	   At this point we have possibly selected a live component.
    885 	   If we didn't find a live ocmponent, we now check to see
    886 	   if there is a relevant spared component.
    887 	*/
    888 
    889 	for (c = 0; c < raidPtr->numSpare; c++) {
    890 		sparecol = raidPtr->numCol + c;
    891 
    892 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
    893 			/* How about this one? */
    894 			scol = -1;
    895 			for(j=0;j<raidPtr->numCol;j++) {
    896 				if (raidPtr->Disks[j].spareCol == sparecol) {
    897 					scol = j;
    898 					break;
    899 				}
    900 			}
    901 			if (scol == 0) {
    902 				/*
    903 				   We must have found a spared first
    904 				   component!  We'll take that over
    905 				   anything else found so far.  (We
    906 				   couldn't have found a real first
    907 				   component before, since this is a
    908 				   used spare, and it's saying that
    909 				   it's replacing the first
    910 				   component.)  On reboot (with
    911 				   autoconfiguration turned on)
    912 				   sparecol will become the first
    913 				   component (component0) of this set.
    914 				*/
    915 				dumpto = sparecol;
    916 				break;
    917 			} else if (scol != -1) {
    918 				/*
    919 				   Must be a spared second component.
    920 				   We'll dump to that if we havn't found
    921 				   anything else so far.
    922 				*/
    923 				if (dumpto == -1)
    924 					dumpto = sparecol;
    925 			}
    926 		}
    927 	}
    928 
    929 	if (dumpto == -1) {
    930 		/* we couldn't find any live components to dump to!?!?
    931 		 */
    932 		error = EINVAL;
    933 		goto out;
    934 	}
    935 
    936 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
    937 	if (bdev == NULL) {
    938 		error = ENXIO;
    939 		goto out;
    940 	}
    941 
    942 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
    943 				blkno, va, nblk * raidPtr->bytesPerSector);
    944 
    945 out:
    946 	raidunlock(rs);
    947 
    948 	return error;
    949 }
    950 
    951 /* ARGSUSED */
    952 static int
    953 raidopen(dev_t dev, int flags, int fmt,
    954     struct lwp *l)
    955 {
    956 	int     unit = raidunit(dev);
    957 	struct raid_softc *rs;
    958 	struct dk_softc *dksc;
    959 	int     error = 0;
    960 	int     part, pmask;
    961 
    962 	if ((rs = raidget(unit, true)) == NULL)
    963 		return ENXIO;
    964 	if ((error = raidlock(rs)) != 0)
    965 		return error;
    966 
    967 	if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
    968 		error = EBUSY;
    969 		goto bad;
    970 	}
    971 
    972 	dksc = &rs->sc_dksc;
    973 
    974 	part = DISKPART(dev);
    975 	pmask = (1 << part);
    976 
    977 	if (!DK_BUSY(dksc, pmask) &&
    978 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    979 		/* First one... mark things as dirty... Note that we *MUST*
    980 		 have done a configure before this.  I DO NOT WANT TO BE
    981 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    982 		 THAT THEY BELONG TOGETHER!!!!! */
    983 		/* XXX should check to see if we're only open for reading
    984 		   here... If so, we needn't do this, but then need some
    985 		   other way of keeping track of what's happened.. */
    986 
    987 		rf_markalldirty(&rs->sc_r);
    988 	}
    989 
    990 	if ((rs->sc_flags & RAIDF_INITED) != 0)
    991 		error = dk_open(dksc, dev, flags, fmt, l);
    992 
    993 bad:
    994 	raidunlock(rs);
    995 
    996 	return error;
    997 
    998 
    999 }
   1000 
   1001 static int
   1002 raid_lastclose(device_t self)
   1003 {
   1004 	struct raid_softc *rs = raidsoftc(self);
   1005 
   1006 	/* Last one... device is not unconfigured yet.
   1007 	   Device shutdown has taken care of setting the
   1008 	   clean bits if RAIDF_INITED is not set
   1009 	   mark things as clean... */
   1010 
   1011 	rf_update_component_labels(&rs->sc_r,
   1012 	    RF_FINAL_COMPONENT_UPDATE);
   1013 
   1014 	/* pass to unlocked code */
   1015 	if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
   1016 		rs->sc_flags |= RAIDF_DETACH;
   1017 
   1018 	return 0;
   1019 }
   1020 
   1021 /* ARGSUSED */
   1022 static int
   1023 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
   1024 {
   1025 	int     unit = raidunit(dev);
   1026 	struct raid_softc *rs;
   1027 	struct dk_softc *dksc;
   1028 	cfdata_t cf;
   1029 	int     error = 0, do_detach = 0, do_put = 0;
   1030 
   1031 	if ((rs = raidget(unit, false)) == NULL)
   1032 		return ENXIO;
   1033 	dksc = &rs->sc_dksc;
   1034 
   1035 	if ((error = raidlock(rs)) != 0)
   1036 		return error;
   1037 
   1038 	if ((rs->sc_flags & RAIDF_INITED) != 0) {
   1039 		error = dk_close(dksc, dev, flags, fmt, l);
   1040 		if ((rs->sc_flags & RAIDF_DETACH) != 0)
   1041 			do_detach = 1;
   1042 	} else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
   1043 		do_put = 1;
   1044 
   1045 	raidunlock(rs);
   1046 
   1047 	if (do_detach) {
   1048 		/* free the pseudo device attach bits */
   1049 		cf = device_cfdata(dksc->sc_dev);
   1050 		error = config_detach(dksc->sc_dev, 0);
   1051 		if (error == 0)
   1052 			free(cf, M_RAIDFRAME);
   1053 	} else if (do_put) {
   1054 		raidput(rs);
   1055 	}
   1056 
   1057 	return error;
   1058 
   1059 }
   1060 
   1061 static void
   1062 raid_wakeup(RF_Raid_t *raidPtr)
   1063 {
   1064 	rf_lock_mutex2(raidPtr->iodone_lock);
   1065 	rf_signal_cond2(raidPtr->iodone_cv);
   1066 	rf_unlock_mutex2(raidPtr->iodone_lock);
   1067 }
   1068 
   1069 static void
   1070 raidstrategy(struct buf *bp)
   1071 {
   1072 	unsigned int unit;
   1073 	struct raid_softc *rs;
   1074 	struct dk_softc *dksc;
   1075 	RF_Raid_t *raidPtr;
   1076 
   1077 	unit = raidunit(bp->b_dev);
   1078 	if ((rs = raidget(unit, false)) == NULL) {
   1079 		bp->b_error = ENXIO;
   1080 		goto fail;
   1081 	}
   1082 	if ((rs->sc_flags & RAIDF_INITED) == 0) {
   1083 		bp->b_error = ENXIO;
   1084 		goto fail;
   1085 	}
   1086 	dksc = &rs->sc_dksc;
   1087 	raidPtr = &rs->sc_r;
   1088 
   1089 	/* Queue IO only */
   1090 	if (dk_strategy_defer(dksc, bp))
   1091 		goto done;
   1092 
   1093 	/* schedule the IO to happen at the next convenient time */
   1094 	raid_wakeup(raidPtr);
   1095 
   1096 done:
   1097 	return;
   1098 
   1099 fail:
   1100 	bp->b_resid = bp->b_bcount;
   1101 	biodone(bp);
   1102 }
   1103 
   1104 static int
   1105 raid_diskstart(device_t dev, struct buf *bp)
   1106 {
   1107 	struct raid_softc *rs = raidsoftc(dev);
   1108 	RF_Raid_t *raidPtr;
   1109 
   1110 	raidPtr = &rs->sc_r;
   1111 	if (!raidPtr->valid) {
   1112 		db1_printf(("raid is not valid..\n"));
   1113 		return ENODEV;
   1114 	}
   1115 
   1116 	/* XXX */
   1117 	bp->b_resid = 0;
   1118 
   1119 	return raiddoaccess(raidPtr, bp);
   1120 }
   1121 
   1122 void
   1123 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
   1124 {
   1125 	struct raid_softc *rs;
   1126 	struct dk_softc *dksc;
   1127 
   1128 	rs = raidPtr->softc;
   1129 	dksc = &rs->sc_dksc;
   1130 
   1131 	dk_done(dksc, bp);
   1132 
   1133 	rf_lock_mutex2(raidPtr->mutex);
   1134 	raidPtr->openings++;
   1135 	rf_unlock_mutex2(raidPtr->mutex);
   1136 
   1137 	/* schedule more IO */
   1138 	raid_wakeup(raidPtr);
   1139 }
   1140 
   1141 /* ARGSUSED */
   1142 static int
   1143 raidread(dev_t dev, struct uio *uio, int flags)
   1144 {
   1145 	int     unit = raidunit(dev);
   1146 	struct raid_softc *rs;
   1147 
   1148 	if ((rs = raidget(unit, false)) == NULL)
   1149 		return ENXIO;
   1150 
   1151 	if ((rs->sc_flags & RAIDF_INITED) == 0)
   1152 		return ENXIO;
   1153 
   1154 	return physio(raidstrategy, NULL, dev, B_READ, minphys, uio);
   1155 
   1156 }
   1157 
   1158 /* ARGSUSED */
   1159 static int
   1160 raidwrite(dev_t dev, struct uio *uio, int flags)
   1161 {
   1162 	int     unit = raidunit(dev);
   1163 	struct raid_softc *rs;
   1164 
   1165 	if ((rs = raidget(unit, false)) == NULL)
   1166 		return ENXIO;
   1167 
   1168 	if ((rs->sc_flags & RAIDF_INITED) == 0)
   1169 		return ENXIO;
   1170 
   1171 	return physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio);
   1172 
   1173 }
   1174 
   1175 static int
   1176 raid_detach_unlocked(struct raid_softc *rs)
   1177 {
   1178 	struct dk_softc *dksc = &rs->sc_dksc;
   1179 	RF_Raid_t *raidPtr;
   1180 	int error;
   1181 
   1182 	raidPtr = &rs->sc_r;
   1183 
   1184 	if (DK_BUSY(dksc, 0) ||
   1185 	    raidPtr->recon_in_progress != 0 ||
   1186 	    raidPtr->parity_rewrite_in_progress != 0 ||
   1187 	    raidPtr->copyback_in_progress != 0)
   1188 		return EBUSY;
   1189 
   1190 	if ((rs->sc_flags & RAIDF_INITED) == 0)
   1191 		return 0;
   1192 
   1193 	rs->sc_flags &= ~RAIDF_SHUTDOWN;
   1194 
   1195 	if ((error = rf_Shutdown(raidPtr)) != 0)
   1196 		return error;
   1197 
   1198 	rs->sc_flags &= ~RAIDF_INITED;
   1199 
   1200 	/* Kill off any queued buffers */
   1201 	dk_drain(dksc);
   1202 	bufq_free(dksc->sc_bufq);
   1203 
   1204 	/* Detach the disk. */
   1205 	dkwedge_delall(&dksc->sc_dkdev);
   1206 	disk_detach(&dksc->sc_dkdev);
   1207 	disk_destroy(&dksc->sc_dkdev);
   1208 	dk_detach(dksc);
   1209 
   1210 	return 0;
   1211 }
   1212 
   1213 int
   1214 rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr)
   1215 {
   1216 	struct rf_recon_req_internal *rrint;
   1217 
   1218 	if (raidPtr->Layout.map->faultsTolerated == 0) {
   1219 		/* Can't do this on a RAID 0!! */
   1220 		return EINVAL;
   1221 	}
   1222 
   1223 	if (rr->col < 0 || rr->col >= raidPtr->numCol) {
   1224 		/* bad column */
   1225 		return EINVAL;
   1226 	}
   1227 
   1228 	rf_lock_mutex2(raidPtr->mutex);
   1229 	if (raidPtr->status == rf_rs_reconstructing) {
   1230 		raidPtr->abortRecon[rr->col] = 1;
   1231 	}
   1232 	if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) &&
   1233 	    (raidPtr->numFailures > 0)) {
   1234 		/* some other component has failed.  Let's not make
   1235 		   things worse. XXX wrong for RAID6 */
   1236 		goto out;
   1237 	}
   1238 	if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
   1239 		int spareCol = raidPtr->Disks[rr->col].spareCol;
   1240 
   1241 		if (spareCol < raidPtr->numCol ||
   1242 		    spareCol >= raidPtr->numCol + raidPtr->numSpare)
   1243 			goto out;
   1244 
   1245 		/*
   1246 		 * Fail the spare disk so that we can
   1247 		 * reconstruct on another one.
   1248 		 */
   1249 		raidPtr->Disks[spareCol].status = rf_ds_failed;
   1250 
   1251 	}
   1252 	rf_unlock_mutex2(raidPtr->mutex);
   1253 
   1254 	/* make a copy of the recon request so that we don't rely on
   1255 	 * the user's buffer */
   1256 	rrint = RF_Malloc(sizeof(*rrint));
   1257 	if (rrint == NULL)
   1258 		return(ENOMEM);
   1259 	rrint->col = rr->col;
   1260 	rrint->flags = rr->flags;
   1261 	rrint->raidPtr = raidPtr;
   1262 
   1263 	return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread,
   1264 	    rrint, "raid_recon");
   1265 out:
   1266 	rf_unlock_mutex2(raidPtr->mutex);
   1267 	return EINVAL;
   1268 }
   1269 
   1270 static int
   1271 rf_copyinspecificbuf(RF_Config_t *k_cfg)
   1272 {
   1273 	/* allocate a buffer for the layout-specific data, and copy it in */
   1274 	if (k_cfg->layoutSpecificSize == 0)
   1275 		return 0;
   1276 
   1277 	if (k_cfg->layoutSpecificSize > 10000) {
   1278 	    /* sanity check */
   1279 	    return EINVAL;
   1280 	}
   1281 
   1282 	u_char *specific_buf;
   1283 	specific_buf =  RF_Malloc(k_cfg->layoutSpecificSize);
   1284 	if (specific_buf == NULL)
   1285 		return ENOMEM;
   1286 
   1287 	int retcode = copyin(k_cfg->layoutSpecific, specific_buf,
   1288 	    k_cfg->layoutSpecificSize);
   1289 	if (retcode) {
   1290 		RF_Free(specific_buf, k_cfg->layoutSpecificSize);
   1291 		db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode));
   1292 		return retcode;
   1293 	}
   1294 
   1295 	k_cfg->layoutSpecific = specific_buf;
   1296 	return 0;
   1297 }
   1298 
   1299 static int
   1300 rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg)
   1301 {
   1302 	RF_Config_t *u_cfg = *((RF_Config_t **) data);
   1303 
   1304 	if (rs->sc_r.valid) {
   1305 		/* There is a valid RAID set running on this unit! */
   1306 		printf("raid%d: Device already configured!\n", rs->sc_unit);
   1307 		return EINVAL;
   1308 	}
   1309 
   1310 	/* copy-in the configuration information */
   1311 	/* data points to a pointer to the configuration structure */
   1312 	*k_cfg = RF_Malloc(sizeof(**k_cfg));
   1313 	if (*k_cfg == NULL) {
   1314 		return ENOMEM;
   1315 	}
   1316 	int retcode = copyin(u_cfg, *k_cfg, sizeof(RF_Config_t));
   1317 	if (retcode == 0)
   1318 		return 0;
   1319 	RF_Free(*k_cfg, sizeof(RF_Config_t));
   1320 	db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode));
   1321 	rs->sc_flags |= RAIDF_SHUTDOWN;
   1322 	return retcode;
   1323 }
   1324 
   1325 int
   1326 rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg)
   1327 {
   1328 	int retcode, i;
   1329 	RF_Raid_t *raidPtr = &rs->sc_r;
   1330 
   1331 	rs->sc_flags &= ~RAIDF_SHUTDOWN;
   1332 
   1333 	if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0)
   1334 		goto out;
   1335 
   1336 	/* should do some kind of sanity check on the configuration.
   1337 	 * Store the sum of all the bytes in the last byte? */
   1338 
   1339 	/* Force nul-termination on all strings. */
   1340 #define ZERO_FINAL(s)	do { s[sizeof(s) - 1] = '\0'; } while (0)
   1341 	for (i = 0; i < RF_MAXCOL; i++) {
   1342 		ZERO_FINAL(k_cfg->devnames[0][i]);
   1343 	}
   1344 	for (i = 0; i < RF_MAXSPARE; i++) {
   1345 		ZERO_FINAL(k_cfg->spare_names[i]);
   1346 	}
   1347 	for (i = 0; i < RF_MAXDBGV; i++) {
   1348 		ZERO_FINAL(k_cfg->debugVars[i]);
   1349 	}
   1350 #undef ZERO_FINAL
   1351 
   1352 	/* Check some basic limits. */
   1353 	if (k_cfg->numCol >= RF_MAXCOL || k_cfg->numCol < 0) {
   1354 		retcode = EINVAL;
   1355 		goto out;
   1356 	}
   1357 	if (k_cfg->numSpare >= RF_MAXSPARE || k_cfg->numSpare < 0) {
   1358 		retcode = EINVAL;
   1359 		goto out;
   1360 	}
   1361 
   1362 	/* configure the system */
   1363 
   1364 	/*
   1365 	 * Clear the entire RAID descriptor, just to make sure
   1366 	 *  there is no stale data left in the case of a
   1367 	 *  reconfiguration
   1368 	 */
   1369 	memset(raidPtr, 0, sizeof(*raidPtr));
   1370 	raidPtr->softc = rs;
   1371 	raidPtr->raidid = rs->sc_unit;
   1372 
   1373 	retcode = rf_Configure(raidPtr, k_cfg, NULL);
   1374 
   1375 	if (retcode == 0) {
   1376 		/* allow this many simultaneous IO's to
   1377 		   this RAID device */
   1378 		raidPtr->openings = RAIDOUTSTANDING;
   1379 
   1380 		raidinit(rs);
   1381 		raid_wakeup(raidPtr);
   1382 		rf_markalldirty(raidPtr);
   1383 	}
   1384 
   1385 	/* free the buffers.  No return code here. */
   1386 	if (k_cfg->layoutSpecificSize) {
   1387 		RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize);
   1388 	}
   1389 out:
   1390 	RF_Free(k_cfg, sizeof(RF_Config_t));
   1391 	if (retcode) {
   1392 		/*
   1393 		 * If configuration failed, set sc_flags so that we
   1394 		 * will detach the device when we close it.
   1395 		 */
   1396 		rs->sc_flags |= RAIDF_SHUTDOWN;
   1397 	}
   1398 	return retcode;
   1399 }
   1400 
   1401 #if RF_DISABLED
   1402 static int
   1403 rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   1404 {
   1405 
   1406 	/* XXX check the label for valid stuff... */
   1407 	/* Note that some things *should not* get modified --
   1408 	   the user should be re-initing the labels instead of
   1409 	   trying to patch things.
   1410 	   */
   1411 #ifdef DEBUG
   1412 	int raidid = raidPtr->raidid;
   1413 	printf("raid%d: Got component label:\n", raidid);
   1414 	printf("raid%d: Version: %d\n", raidid, clabel->version);
   1415 	printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1416 	printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1417 	printf("raid%d: Column: %d\n", raidid, clabel->column);
   1418 	printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1419 	printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1420 	printf("raid%d: Status: %d\n", raidid, clabel->status);
   1421 #endif	/* DEBUG */
   1422 	clabel->row = 0;
   1423 	int column = clabel->column;
   1424 
   1425 	if ((column < 0) || (column >= raidPtr->numCol)) {
   1426 		return(EINVAL);
   1427 	}
   1428 
   1429 	/* XXX this isn't allowed to do anything for now :-) */
   1430 
   1431 	/* XXX and before it is, we need to fill in the rest
   1432 	   of the fields!?!?!?! */
   1433 	memcpy(raidget_component_label(raidPtr, column),
   1434 	    clabel, sizeof(*clabel));
   1435 	raidflush_component_label(raidPtr, column);
   1436 	return 0;
   1437 }
   1438 #endif
   1439 
   1440 static int
   1441 rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   1442 {
   1443 	/*
   1444 	   we only want the serial number from
   1445 	   the above.  We get all the rest of the information
   1446 	   from the config that was used to create this RAID
   1447 	   set.
   1448 	   */
   1449 
   1450 	raidPtr->serial_number = clabel->serial_number;
   1451 
   1452 	for (int column = 0; column < raidPtr->numCol; column++) {
   1453 		RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column];
   1454 		if (RF_DEAD_DISK(diskPtr->status))
   1455 			continue;
   1456 		RF_ComponentLabel_t *ci_label = raidget_component_label(
   1457 		    raidPtr, column);
   1458 		/* Zeroing this is important. */
   1459 		memset(ci_label, 0, sizeof(*ci_label));
   1460 		raid_init_component_label(raidPtr, ci_label);
   1461 		ci_label->serial_number = raidPtr->serial_number;
   1462 		ci_label->row = 0; /* we dont' pretend to support more */
   1463 		rf_component_label_set_partitionsize(ci_label,
   1464 		    diskPtr->partitionSize);
   1465 		ci_label->column = column;
   1466 		raidflush_component_label(raidPtr, column);
   1467 		/* XXXjld what about the spares? */
   1468 	}
   1469 
   1470 	return 0;
   1471 }
   1472 
   1473 static int
   1474 rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr)
   1475 {
   1476 
   1477 	if (raidPtr->Layout.map->faultsTolerated == 0) {
   1478 		/* Can't do this on a RAID 0!! */
   1479 		return EINVAL;
   1480 	}
   1481 
   1482 	if (raidPtr->recon_in_progress == 1) {
   1483 		/* a reconstruct is already in progress! */
   1484 		return EINVAL;
   1485 	}
   1486 
   1487 	RF_SingleComponent_t component;
   1488 	memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
   1489 	component.row = 0; /* we don't support any more */
   1490 	int column = component.column;
   1491 
   1492 	if ((column < 0) || (column >= raidPtr->numCol)) {
   1493 		return EINVAL;
   1494 	}
   1495 
   1496 	rf_lock_mutex2(raidPtr->mutex);
   1497 	if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
   1498 	    (raidPtr->numFailures > 0)) {
   1499 		/* XXX 0 above shouldn't be constant!!! */
   1500 		/* some component other than this has failed.
   1501 		   Let's not make things worse than they already
   1502 		   are... */
   1503 		printf("raid%d: Unable to reconstruct to disk at:\n",
   1504 		       raidPtr->raidid);
   1505 		printf("raid%d:     Col: %d   Too many failures.\n",
   1506 		       raidPtr->raidid, column);
   1507 		rf_unlock_mutex2(raidPtr->mutex);
   1508 		return EINVAL;
   1509 	}
   1510 
   1511 	if (raidPtr->Disks[column].status == rf_ds_reconstructing) {
   1512 		printf("raid%d: Unable to reconstruct to disk at:\n",
   1513 		       raidPtr->raidid);
   1514 		printf("raid%d:    Col: %d   "
   1515 		    "Reconstruction already occurring!\n",
   1516 		    raidPtr->raidid, column);
   1517 
   1518 		rf_unlock_mutex2(raidPtr->mutex);
   1519 		return EINVAL;
   1520 	}
   1521 
   1522 	if (raidPtr->Disks[column].status == rf_ds_spared) {
   1523 		rf_unlock_mutex2(raidPtr->mutex);
   1524 		return EINVAL;
   1525 	}
   1526 
   1527 	rf_unlock_mutex2(raidPtr->mutex);
   1528 
   1529 	struct rf_recon_req_internal *rrint;
   1530 	rrint = RF_Malloc(sizeof(*rrint));
   1531 	if (rrint == NULL)
   1532 		return ENOMEM;
   1533 
   1534 	rrint->col = column;
   1535 	rrint->raidPtr = raidPtr;
   1536 
   1537 	return RF_CREATE_THREAD(raidPtr->recon_thread,
   1538 	    rf_ReconstructInPlaceThread, rrint, "raid_reconip");
   1539 }
   1540 
   1541 static int
   1542 rf_check_recon_status(RF_Raid_t *raidPtr, int *data)
   1543 {
   1544 	/*
   1545 	 * This makes no sense on a RAID 0, or if we are not reconstructing
   1546 	 * so tell the user it's done.
   1547 	 */
   1548 	if (raidPtr->Layout.map->faultsTolerated == 0 ||
   1549 	    raidPtr->status != rf_rs_reconstructing) {
   1550 		*data = 100;
   1551 		return 0;
   1552 	}
   1553 	if (raidPtr->reconControl->numRUsTotal == 0) {
   1554 		*data = 0;
   1555 		return 0;
   1556 	}
   1557 	*data = (raidPtr->reconControl->numRUsComplete * 100
   1558 	    / raidPtr->reconControl->numRUsTotal);
   1559 	return 0;
   1560 }
   1561 
   1562 /*
   1563  * Copy a RF_SingleComponent_t from 'data', ensuring nul-termination
   1564  * on the component_name[] array.
   1565  */
   1566 static void
   1567 rf_copy_single_component(RF_SingleComponent_t *component, void *data)
   1568 {
   1569 
   1570 	memcpy(component, data, sizeof *component);
   1571 	component->component_name[sizeof(component->component_name) - 1] = '\0';
   1572 }
   1573 
   1574 static int
   1575 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
   1576 {
   1577 	int     unit = raidunit(dev);
   1578 	int     part, pmask;
   1579 	struct raid_softc *rs;
   1580 	struct dk_softc *dksc;
   1581 	RF_Config_t *k_cfg;
   1582 	RF_Raid_t *raidPtr;
   1583 	RF_AccTotals_t *totals;
   1584 	RF_SingleComponent_t component;
   1585 	RF_DeviceConfig_t *d_cfg, *ucfgp;
   1586 	int retcode = 0;
   1587 	int column;
   1588 	RF_ComponentLabel_t *clabel;
   1589 	int d;
   1590 
   1591 	if ((rs = raidget(unit, false)) == NULL)
   1592 		return ENXIO;
   1593 
   1594 	dksc = &rs->sc_dksc;
   1595 	raidPtr = &rs->sc_r;
   1596 
   1597 	db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
   1598 	    (int) DISKPART(dev), (int) unit, cmd));
   1599 
   1600 	/* Only CONFIGURE and RESCAN can be done without the RAID being initialized. */
   1601 	switch (cmd) {
   1602 	case RAIDFRAME_CONFIGURE:
   1603 	case RAIDFRAME_RESCAN:
   1604 		break;
   1605 	default:
   1606 		if (!rf_inited(rs))
   1607 			return ENXIO;
   1608 	}
   1609 
   1610 	switch (cmd) {
   1611 		/* configure the system */
   1612 	case RAIDFRAME_CONFIGURE:
   1613 		if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0)
   1614 			return retcode;
   1615 		return rf_construct(rs, k_cfg);
   1616 
   1617 		/* shutdown the system */
   1618 	case RAIDFRAME_SHUTDOWN:
   1619 
   1620 		part = DISKPART(dev);
   1621 		pmask = (1 << part);
   1622 
   1623 		if ((retcode = raidlock(rs)) != 0)
   1624 			return retcode;
   1625 
   1626 		if (DK_BUSY(dksc, pmask) ||
   1627 		    raidPtr->recon_in_progress != 0 ||
   1628 		    raidPtr->parity_rewrite_in_progress != 0 ||
   1629 		    raidPtr->copyback_in_progress != 0)
   1630 			retcode = EBUSY;
   1631 		else {
   1632 			/* detach and free on close */
   1633 			rs->sc_flags |= RAIDF_SHUTDOWN;
   1634 			retcode = 0;
   1635 		}
   1636 
   1637 		raidunlock(rs);
   1638 
   1639 		return retcode;
   1640 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1641 		return rf_get_component_label(raidPtr, data);
   1642 
   1643 #if RF_DISABLED
   1644 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1645 		return rf_set_component_label(raidPtr, data);
   1646 #endif
   1647 
   1648 	case RAIDFRAME_INIT_LABELS:
   1649 		return rf_init_component_label(raidPtr, data);
   1650 
   1651 	case RAIDFRAME_SET_AUTOCONFIG:
   1652 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1653 		printf("raid%d: New autoconfig value is: %d\n",
   1654 		       raidPtr->raidid, d);
   1655 		*(int *) data = d;
   1656 		return retcode;
   1657 
   1658 	case RAIDFRAME_SET_ROOT:
   1659 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1660 		printf("raid%d: New rootpartition value is: %d\n",
   1661 		       raidPtr->raidid, d);
   1662 		*(int *) data = d;
   1663 		return retcode;
   1664 
   1665 		/* initialize all parity */
   1666 	case RAIDFRAME_REWRITEPARITY:
   1667 
   1668 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1669 			/* Parity for RAID 0 is trivially correct */
   1670 			raidPtr->parity_good = RF_RAID_CLEAN;
   1671 			return 0;
   1672 		}
   1673 
   1674 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1675 			/* Re-write is already in progress! */
   1676 			return EINVAL;
   1677 		}
   1678 
   1679 		return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1680 		    rf_RewriteParityThread, raidPtr,"raid_parity");
   1681 
   1682 	case RAIDFRAME_ADD_HOT_SPARE:
   1683 		rf_copy_single_component(&component, data);
   1684 		return rf_add_hot_spare(raidPtr, &component);
   1685 
   1686 	/* Remove a non hot-spare component, never implemented in userland */
   1687 	case RAIDFRAME_DELETE_COMPONENT:
   1688 		rf_copy_single_component(&component, data);
   1689 		return rf_delete_component(raidPtr, &component);
   1690 
   1691 	case RAIDFRAME_REMOVE_COMPONENT:
   1692 		rf_copy_single_component(&component, data);
   1693 		return rf_remove_component(raidPtr, &component);
   1694 
   1695 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1696 		rf_copy_single_component(&component, data);
   1697 		return rf_incorporate_hot_spare(raidPtr, &component);
   1698 
   1699 	case RAIDFRAME_REBUILD_IN_PLACE:
   1700 		return rf_rebuild_in_place(raidPtr, data);
   1701 
   1702 	case RAIDFRAME_GET_INFO:
   1703 		ucfgp = *(RF_DeviceConfig_t **)data;
   1704 		d_cfg = RF_Malloc(sizeof(*d_cfg));
   1705 		if (d_cfg == NULL)
   1706 			return ENOMEM;
   1707 		retcode = rf_get_info(raidPtr, d_cfg);
   1708 		if (retcode == 0) {
   1709 			retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg));
   1710 		}
   1711 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1712 		return retcode;
   1713 
   1714 	case RAIDFRAME_CHECK_PARITY:
   1715 		*(int *) data = raidPtr->parity_good;
   1716 		return 0;
   1717 
   1718 	case RAIDFRAME_PARITYMAP_STATUS:
   1719 		if (rf_paritymap_ineligible(raidPtr))
   1720 			return EINVAL;
   1721 		rf_paritymap_status(raidPtr->parity_map, data);
   1722 		return 0;
   1723 
   1724 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
   1725 		if (rf_paritymap_ineligible(raidPtr))
   1726 			return EINVAL;
   1727 		if (raidPtr->parity_map == NULL)
   1728 			return ENOENT; /* ??? */
   1729 		if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0)
   1730 			return EINVAL;
   1731 		return 0;
   1732 
   1733 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
   1734 		if (rf_paritymap_ineligible(raidPtr))
   1735 			return EINVAL;
   1736 		*(int *) data = rf_paritymap_get_disable(raidPtr);
   1737 		return 0;
   1738 
   1739 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
   1740 		if (rf_paritymap_ineligible(raidPtr))
   1741 			return EINVAL;
   1742 		rf_paritymap_set_disable(raidPtr, *(int *)data);
   1743 		/* XXX should errors be passed up? */
   1744 		return 0;
   1745 
   1746 	case RAIDFRAME_RESCAN:
   1747 		return rf_rescan();
   1748 
   1749 	case RAIDFRAME_RESET_ACCTOTALS:
   1750 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1751 		return 0;
   1752 
   1753 	case RAIDFRAME_GET_ACCTOTALS:
   1754 		totals = (RF_AccTotals_t *) data;
   1755 		*totals = raidPtr->acc_totals;
   1756 		return 0;
   1757 
   1758 	case RAIDFRAME_KEEP_ACCTOTALS:
   1759 		raidPtr->keep_acc_totals = *(int *)data;
   1760 		return 0;
   1761 
   1762 	case RAIDFRAME_GET_SIZE:
   1763 		*(int *) data = raidPtr->totalSectors;
   1764 		return 0;
   1765 
   1766 	case RAIDFRAME_FAIL_DISK:
   1767 		return rf_fail_disk(raidPtr, data);
   1768 
   1769 		/* invoke a copyback operation after recon on whatever disk
   1770 		 * needs it, if any */
   1771 	case RAIDFRAME_COPYBACK:
   1772 
   1773 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1774 			/* This makes no sense on a RAID 0!! */
   1775 			return EINVAL;
   1776 		}
   1777 
   1778 		if (raidPtr->copyback_in_progress == 1) {
   1779 			/* Copyback is already in progress! */
   1780 			return EINVAL;
   1781 		}
   1782 
   1783 		return RF_CREATE_THREAD(raidPtr->copyback_thread,
   1784 		    rf_CopybackThread, raidPtr, "raid_copyback");
   1785 
   1786 		/* return the percentage completion of reconstruction */
   1787 	case RAIDFRAME_CHECK_RECON_STATUS:
   1788 		return rf_check_recon_status(raidPtr, data);
   1789 
   1790 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1791 		rf_check_recon_status_ext(raidPtr, data);
   1792 		return 0;
   1793 
   1794 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1795 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1796 			/* This makes no sense on a RAID 0, so tell the
   1797 			   user it's done. */
   1798 			*(int *) data = 100;
   1799 			return 0;
   1800 		}
   1801 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1802 			*(int *) data = 100 *
   1803 				raidPtr->parity_rewrite_stripes_done /
   1804 				raidPtr->Layout.numStripe;
   1805 		} else {
   1806 			*(int *) data = 100;
   1807 		}
   1808 		return 0;
   1809 
   1810 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1811 		rf_check_parityrewrite_status_ext(raidPtr, data);
   1812 		return 0;
   1813 
   1814 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1815 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1816 			/* This makes no sense on a RAID 0 */
   1817 			*(int *) data = 100;
   1818 			return 0;
   1819 		}
   1820 		if (raidPtr->copyback_in_progress == 1) {
   1821 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1822 				raidPtr->Layout.numStripe;
   1823 		} else {
   1824 			*(int *) data = 100;
   1825 		}
   1826 		return 0;
   1827 
   1828 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1829 		rf_check_copyback_status_ext(raidPtr, data);
   1830 		return 0;
   1831 
   1832 	case RAIDFRAME_SET_LAST_UNIT:
   1833 		for (column = 0; column < raidPtr->numCol; column++)
   1834 			if (raidPtr->Disks[column].status != rf_ds_optimal)
   1835 				return EBUSY;
   1836 
   1837 		for (column = 0; column < raidPtr->numCol; column++) {
   1838 			clabel = raidget_component_label(raidPtr, column);
   1839 			clabel->last_unit = *(int *)data;
   1840 			raidflush_component_label(raidPtr, column);
   1841 		}
   1842 		rs->sc_cflags |= RAIDF_UNIT_CHANGED;
   1843 		return 0;
   1844 
   1845 		/* the sparetable daemon calls this to wait for the kernel to
   1846 		 * need a spare table. this ioctl does not return until a
   1847 		 * spare table is needed. XXX -- calling mpsleep here in the
   1848 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1849 		 * -- I should either compute the spare table in the kernel,
   1850 		 * or have a different -- XXX XXX -- interface (a different
   1851 		 * character device) for delivering the table     -- XXX */
   1852 #if RF_DISABLED
   1853 	case RAIDFRAME_SPARET_WAIT:
   1854 		rf_lock_mutex2(rf_sparet_wait_mutex);
   1855 		while (!rf_sparet_wait_queue)
   1856 			rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
   1857 		RF_SparetWait_t *waitreq = rf_sparet_wait_queue;
   1858 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1859 		rf_unlock_mutex2(rf_sparet_wait_mutex);
   1860 
   1861 		/* structure assignment */
   1862 		*((RF_SparetWait_t *) data) = *waitreq;
   1863 
   1864 		RF_Free(waitreq, sizeof(*waitreq));
   1865 		return 0;
   1866 
   1867 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1868 		 * code in it that will cause the dameon to exit */
   1869 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1870 		waitreq = RF_Malloc(sizeof(*waitreq));
   1871 		waitreq->fcol = -1;
   1872 		rf_lock_mutex2(rf_sparet_wait_mutex);
   1873 		waitreq->next = rf_sparet_wait_queue;
   1874 		rf_sparet_wait_queue = waitreq;
   1875 		rf_broadcast_cond2(rf_sparet_wait_cv);
   1876 		rf_unlock_mutex2(rf_sparet_wait_mutex);
   1877 		return 0;
   1878 
   1879 		/* used by the spare table daemon to deliver a spare table
   1880 		 * into the kernel */
   1881 	case RAIDFRAME_SEND_SPARET:
   1882 
   1883 		/* install the spare table */
   1884 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1885 
   1886 		/* respond to the requestor.  the return status of the spare
   1887 		 * table installation is passed in the "fcol" field */
   1888 		waitred = RF_Malloc(sizeof(*waitreq));
   1889 		waitreq->fcol = retcode;
   1890 		rf_lock_mutex2(rf_sparet_wait_mutex);
   1891 		waitreq->next = rf_sparet_resp_queue;
   1892 		rf_sparet_resp_queue = waitreq;
   1893 		rf_broadcast_cond2(rf_sparet_resp_cv);
   1894 		rf_unlock_mutex2(rf_sparet_wait_mutex);
   1895 
   1896 		return retcode;
   1897 #endif
   1898 	default:
   1899 		/*
   1900 		 * Don't bother trying to load compat modules
   1901 		 * if it is not our ioctl. This is more efficient
   1902 		 * and makes rump tests not depend on compat code
   1903 		 */
   1904 		if (IOCGROUP(cmd) != 'r')
   1905 			break;
   1906 #ifdef _LP64
   1907 		if ((l->l_proc->p_flag & PK_32) != 0) {
   1908 			module_autoload("compat_netbsd32_raid",
   1909 			    MODULE_CLASS_EXEC);
   1910 			MODULE_HOOK_CALL(raidframe_netbsd32_ioctl_hook,
   1911 			    (rs, cmd, data), enosys(), retcode);
   1912 			if (retcode != EPASSTHROUGH)
   1913 				return retcode;
   1914 		}
   1915 #endif
   1916 		module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
   1917 		MODULE_HOOK_CALL(raidframe_ioctl_80_hook,
   1918 		    (rs, cmd, data), enosys(), retcode);
   1919 		if (retcode != EPASSTHROUGH)
   1920 			return retcode;
   1921 
   1922 		module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
   1923 		MODULE_HOOK_CALL(raidframe_ioctl_50_hook,
   1924 		    (rs, cmd, data), enosys(), retcode);
   1925 		if (retcode != EPASSTHROUGH)
   1926 			return retcode;
   1927 		break; /* fall through to the os-specific code below */
   1928 
   1929 	}
   1930 
   1931 	if (!raidPtr->valid)
   1932 		return EINVAL;
   1933 
   1934 	/*
   1935 	 * Add support for "regular" device ioctls here.
   1936 	 */
   1937 
   1938 	switch (cmd) {
   1939 	case DIOCGCACHE:
   1940 		retcode = rf_get_component_caches(raidPtr, (int *)data);
   1941 		break;
   1942 
   1943 	case DIOCCACHESYNC:
   1944 		retcode = rf_sync_component_caches(raidPtr, *(int *)data);
   1945 		break;
   1946 
   1947 	default:
   1948 		retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
   1949 		break;
   1950 	}
   1951 
   1952 	return retcode;
   1953 
   1954 }
   1955 
   1956 
   1957 /* raidinit -- complete the rest of the initialization for the
   1958    RAIDframe device.  */
   1959 
   1960 
   1961 static void
   1962 raidinit(struct raid_softc *rs)
   1963 {
   1964 	cfdata_t cf;
   1965 	unsigned int unit;
   1966 	struct dk_softc *dksc = &rs->sc_dksc;
   1967 	RF_Raid_t *raidPtr = &rs->sc_r;
   1968 	device_t dev;
   1969 
   1970 	unit = raidPtr->raidid;
   1971 
   1972 	/* XXX doesn't check bounds. */
   1973 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
   1974 
   1975 	/* attach the pseudo device */
   1976 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
   1977 	cf->cf_name = raid_cd.cd_name;
   1978 	cf->cf_atname = raid_cd.cd_name;
   1979 	cf->cf_unit = unit;
   1980 	cf->cf_fstate = FSTATE_STAR;
   1981 
   1982 	dev = config_attach_pseudo(cf);
   1983 	if (dev == NULL) {
   1984 		printf("raid%d: config_attach_pseudo failed\n",
   1985 		    raidPtr->raidid);
   1986 		free(cf, M_RAIDFRAME);
   1987 		return;
   1988 	}
   1989 
   1990 	/* provide a backpointer to the real softc */
   1991 	raidsoftc(dev) = rs;
   1992 
   1993 	/* disk_attach actually creates space for the CPU disklabel, among
   1994 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1995 	 * with disklabels. */
   1996 	dk_init(dksc, dev, DKTYPE_RAID);
   1997 	disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
   1998 
   1999 	/* XXX There may be a weird interaction here between this, and
   2000 	 * protectedSectors, as used in RAIDframe.  */
   2001 
   2002 	rs->sc_size = raidPtr->totalSectors;
   2003 
   2004 	/* Attach dk and disk subsystems */
   2005 	dk_attach(dksc);
   2006 	disk_attach(&dksc->sc_dkdev);
   2007 	rf_set_geometry(rs, raidPtr);
   2008 
   2009 	bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
   2010 
   2011 	/* mark unit as usuable */
   2012 	rs->sc_flags |= RAIDF_INITED;
   2013 
   2014 	dkwedge_discover(&dksc->sc_dkdev);
   2015 }
   2016 
   2017 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   2018 /* wake up the daemon & tell it to get us a spare table
   2019  * XXX
   2020  * the entries in the queues should be tagged with the raidPtr
   2021  * so that in the extremely rare case that two recons happen at once,
   2022  * we know for which device were requesting a spare table
   2023  * XXX
   2024  *
   2025  * XXX This code is not currently used. GO
   2026  */
   2027 int
   2028 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
   2029 {
   2030 	int     retcode;
   2031 
   2032 	rf_lock_mutex2(rf_sparet_wait_mutex);
   2033 	req->next = rf_sparet_wait_queue;
   2034 	rf_sparet_wait_queue = req;
   2035 	rf_broadcast_cond2(rf_sparet_wait_cv);
   2036 
   2037 	/* mpsleep unlocks the mutex */
   2038 	while (!rf_sparet_resp_queue) {
   2039 		rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
   2040 	}
   2041 	req = rf_sparet_resp_queue;
   2042 	rf_sparet_resp_queue = req->next;
   2043 	rf_unlock_mutex2(rf_sparet_wait_mutex);
   2044 
   2045 	retcode = req->fcol;
   2046 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   2047 					 * alloc'd */
   2048 	return retcode;
   2049 }
   2050 #endif
   2051 
   2052 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   2053  * bp & passes it down.
   2054  * any calls originating in the kernel must use non-blocking I/O
   2055  * do some extra sanity checking to return "appropriate" error values for
   2056  * certain conditions (to make some standard utilities work)
   2057  *
   2058  * Formerly known as: rf_DoAccessKernel
   2059  */
   2060 void
   2061 raidstart(RF_Raid_t *raidPtr)
   2062 {
   2063 	struct raid_softc *rs;
   2064 	struct dk_softc *dksc;
   2065 
   2066 	rs = raidPtr->softc;
   2067 	dksc = &rs->sc_dksc;
   2068 	/* quick check to see if anything has died recently */
   2069 	rf_lock_mutex2(raidPtr->mutex);
   2070 	if (raidPtr->numNewFailures > 0) {
   2071 		rf_unlock_mutex2(raidPtr->mutex);
   2072 		rf_update_component_labels(raidPtr,
   2073 					   RF_NORMAL_COMPONENT_UPDATE);
   2074 		rf_lock_mutex2(raidPtr->mutex);
   2075 		raidPtr->numNewFailures--;
   2076 	}
   2077 	rf_unlock_mutex2(raidPtr->mutex);
   2078 
   2079 	if ((rs->sc_flags & RAIDF_INITED) == 0) {
   2080 		printf("raid%d: raidstart not ready\n", raidPtr->raidid);
   2081 		return;
   2082 	}
   2083 
   2084 	dk_start(dksc, NULL);
   2085 }
   2086 
   2087 static int
   2088 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
   2089 {
   2090 	RF_SectorCount_t num_blocks, pb, sum;
   2091 	RF_RaidAddr_t raid_addr;
   2092 	daddr_t blocknum;
   2093 	int rc;
   2094 
   2095 	rf_lock_mutex2(raidPtr->mutex);
   2096 	if (raidPtr->openings == 0) {
   2097 		rf_unlock_mutex2(raidPtr->mutex);
   2098 		return EAGAIN;
   2099 	}
   2100 	rf_unlock_mutex2(raidPtr->mutex);
   2101 
   2102 	blocknum = bp->b_rawblkno;
   2103 
   2104 	db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   2105 		    (int) blocknum));
   2106 
   2107 	db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   2108 	db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   2109 
   2110 	/* *THIS* is where we adjust what block we're going to...
   2111 	 * but DO NOT TOUCH bp->b_blkno!!! */
   2112 	raid_addr = blocknum;
   2113 
   2114 	num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   2115 	pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   2116 	sum = raid_addr + num_blocks + pb;
   2117 	if (1 || rf_debugKernelAccess) {
   2118 		db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   2119 			    (int) raid_addr, (int) sum, (int) num_blocks,
   2120 			    (int) pb, (int) bp->b_resid));
   2121 	}
   2122 	if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   2123 	    || (sum < num_blocks) || (sum < pb)) {
   2124 		rc = ENOSPC;
   2125 		goto done;
   2126 	}
   2127 	/*
   2128 	 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   2129 	 */
   2130 
   2131 	if (bp->b_bcount & raidPtr->sectorMask) {
   2132 		rc = ENOSPC;
   2133 		goto done;
   2134 	}
   2135 	db1_printf(("Calling DoAccess..\n"));
   2136 
   2137 
   2138 	rf_lock_mutex2(raidPtr->mutex);
   2139 	raidPtr->openings--;
   2140 	rf_unlock_mutex2(raidPtr->mutex);
   2141 
   2142 	/* don't ever condition on bp->b_flags & B_WRITE.
   2143 	 * always condition on B_READ instead */
   2144 
   2145 	rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   2146 			 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   2147 			 raid_addr, num_blocks,
   2148 			 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   2149 
   2150 done:
   2151 	return rc;
   2152 }
   2153 
   2154 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   2155 
   2156 int
   2157 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
   2158 {
   2159 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   2160 	struct buf *bp;
   2161 
   2162 	req->queue = queue;
   2163 	bp = req->bp;
   2164 
   2165 	switch (req->type) {
   2166 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   2167 		/* XXX need to do something extra here.. */
   2168 		/* I'm leaving this in, as I've never actually seen it used,
   2169 		 * and I'd like folks to report it... GO */
   2170 		printf("%s: WAKEUP CALLED\n", __func__);
   2171 		queue->numOutstanding++;
   2172 
   2173 		bp->b_flags = 0;
   2174 		bp->b_private = req;
   2175 
   2176 		KernelWakeupFunc(bp);
   2177 		break;
   2178 
   2179 	case RF_IO_TYPE_READ:
   2180 	case RF_IO_TYPE_WRITE:
   2181 #if RF_ACC_TRACE > 0
   2182 		if (req->tracerec) {
   2183 			RF_ETIMER_START(req->tracerec->timer);
   2184 		}
   2185 #endif
   2186 		InitBP(bp, queue->rf_cinfo->ci_vp,
   2187 		    op, queue->rf_cinfo->ci_dev,
   2188 		    req->sectorOffset, req->numSector,
   2189 		    req->buf, KernelWakeupFunc, (void *) req,
   2190 		    queue->raidPtr->logBytesPerSector);
   2191 
   2192 		if (rf_debugKernelAccess) {
   2193 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   2194 				(long) bp->b_blkno));
   2195 		}
   2196 		queue->numOutstanding++;
   2197 		queue->last_deq_sector = req->sectorOffset;
   2198 		/* acc wouldn't have been let in if there were any pending
   2199 		 * reqs at any other priority */
   2200 		queue->curPriority = req->priority;
   2201 
   2202 		db1_printf(("Going for %c to unit %d col %d\n",
   2203 			    req->type, queue->raidPtr->raidid,
   2204 			    queue->col));
   2205 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   2206 			(int) req->sectorOffset, (int) req->numSector,
   2207 			(int) (req->numSector <<
   2208 			    queue->raidPtr->logBytesPerSector),
   2209 			(int) queue->raidPtr->logBytesPerSector));
   2210 
   2211 		/*
   2212 		 * XXX: drop lock here since this can block at
   2213 		 * least with backing SCSI devices.  Retake it
   2214 		 * to minimize fuss with calling interfaces.
   2215 		 */
   2216 
   2217 		RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
   2218 		bdev_strategy(bp);
   2219 		RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
   2220 		break;
   2221 
   2222 	default:
   2223 		panic("bad req->type in rf_DispatchKernelIO");
   2224 	}
   2225 	db1_printf(("Exiting from DispatchKernelIO\n"));
   2226 
   2227 	return 0;
   2228 }
   2229 /* this is the callback function associated with a I/O invoked from
   2230    kernel code.
   2231  */
   2232 static void
   2233 KernelWakeupFunc(struct buf *bp)
   2234 {
   2235 	RF_DiskQueueData_t *req = NULL;
   2236 	RF_DiskQueue_t *queue;
   2237 
   2238 	db1_printf(("recovering the request queue:\n"));
   2239 
   2240 	req = bp->b_private;
   2241 
   2242 	queue = (RF_DiskQueue_t *) req->queue;
   2243 
   2244 	rf_lock_mutex2(queue->raidPtr->iodone_lock);
   2245 
   2246 #if RF_ACC_TRACE > 0
   2247 	if (req->tracerec) {
   2248 		RF_ETIMER_STOP(req->tracerec->timer);
   2249 		RF_ETIMER_EVAL(req->tracerec->timer);
   2250 		rf_lock_mutex2(rf_tracing_mutex);
   2251 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2252 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2253 		req->tracerec->num_phys_ios++;
   2254 		rf_unlock_mutex2(rf_tracing_mutex);
   2255 	}
   2256 #endif
   2257 
   2258 	/* XXX Ok, let's get aggressive... If b_error is set, let's go
   2259 	 * ballistic, and mark the component as hosed... */
   2260 
   2261 	if (bp->b_error != 0) {
   2262 		/* Mark the disk as dead */
   2263 		/* but only mark it once... */
   2264 		/* and only if it wouldn't leave this RAID set
   2265 		   completely broken */
   2266 		if (((queue->raidPtr->Disks[queue->col].status ==
   2267 		      rf_ds_optimal) ||
   2268 		     (queue->raidPtr->Disks[queue->col].status ==
   2269 		      rf_ds_used_spare)) &&
   2270 		     (queue->raidPtr->numFailures <
   2271 		      queue->raidPtr->Layout.map->faultsTolerated)) {
   2272 			printf("raid%d: IO Error (%d). Marking %s as failed.\n",
   2273 			       queue->raidPtr->raidid,
   2274 			       bp->b_error,
   2275 			       queue->raidPtr->Disks[queue->col].devname);
   2276 			queue->raidPtr->Disks[queue->col].status =
   2277 			    rf_ds_failed;
   2278 			queue->raidPtr->status = rf_rs_degraded;
   2279 			queue->raidPtr->numFailures++;
   2280 			queue->raidPtr->numNewFailures++;
   2281 		} else {	/* Disk is already dead... */
   2282 			/* printf("Disk already marked as dead!\n"); */
   2283 		}
   2284 
   2285 	}
   2286 
   2287 	/* Fill in the error value */
   2288 	req->error = bp->b_error;
   2289 
   2290 	/* Drop this one on the "finished" queue... */
   2291 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   2292 
   2293 	/* Let the raidio thread know there is work to be done. */
   2294 	rf_signal_cond2(queue->raidPtr->iodone_cv);
   2295 
   2296 	rf_unlock_mutex2(queue->raidPtr->iodone_lock);
   2297 }
   2298 
   2299 
   2300 /*
   2301  * initialize a buf structure for doing an I/O in the kernel.
   2302  */
   2303 static void
   2304 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
   2305        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
   2306        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector)
   2307 {
   2308 	bp->b_flags = rw_flag | (bp->b_flags & rf_b_pass);
   2309 	bp->b_oflags = 0;
   2310 	bp->b_cflags = 0;
   2311 	bp->b_bcount = numSect << logBytesPerSector;
   2312 	bp->b_bufsize = bp->b_bcount;
   2313 	bp->b_error = 0;
   2314 	bp->b_dev = dev;
   2315 	bp->b_data = bf;
   2316 	bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
   2317 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   2318 	if (bp->b_bcount == 0) {
   2319 		panic("bp->b_bcount is zero in InitBP!!");
   2320 	}
   2321 	bp->b_iodone = cbFunc;
   2322 	bp->b_private = cbArg;
   2323 }
   2324 
   2325 /*
   2326  * Wait interruptibly for an exclusive lock.
   2327  *
   2328  * XXX
   2329  * Several drivers do this; it should be abstracted and made MP-safe.
   2330  * (Hmm... where have we seen this warning before :->  GO )
   2331  */
   2332 static int
   2333 raidlock(struct raid_softc *rs)
   2334 {
   2335 	int     error;
   2336 
   2337 	error = 0;
   2338 	mutex_enter(&rs->sc_mutex);
   2339 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2340 		rs->sc_flags |= RAIDF_WANTED;
   2341 		error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
   2342 		if (error != 0)
   2343 			goto done;
   2344 	}
   2345 	rs->sc_flags |= RAIDF_LOCKED;
   2346 done:
   2347 	mutex_exit(&rs->sc_mutex);
   2348 	return error;
   2349 }
   2350 /*
   2351  * Unlock and wake up any waiters.
   2352  */
   2353 static void
   2354 raidunlock(struct raid_softc *rs)
   2355 {
   2356 
   2357 	mutex_enter(&rs->sc_mutex);
   2358 	rs->sc_flags &= ~RAIDF_LOCKED;
   2359 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2360 		rs->sc_flags &= ~RAIDF_WANTED;
   2361 		cv_broadcast(&rs->sc_cv);
   2362 	}
   2363 	mutex_exit(&rs->sc_mutex);
   2364 }
   2365 
   2366 
   2367 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2368 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2369 #define RF_PARITY_MAP_SIZE   RF_PARITYMAP_NBYTE
   2370 
   2371 static daddr_t
   2372 rf_component_info_offset(void)
   2373 {
   2374 
   2375 	return RF_COMPONENT_INFO_OFFSET;
   2376 }
   2377 
   2378 static daddr_t
   2379 rf_component_info_size(unsigned secsize)
   2380 {
   2381 	daddr_t info_size;
   2382 
   2383 	KASSERT(secsize);
   2384 	if (secsize > RF_COMPONENT_INFO_SIZE)
   2385 		info_size = secsize;
   2386 	else
   2387 		info_size = RF_COMPONENT_INFO_SIZE;
   2388 
   2389 	return info_size;
   2390 }
   2391 
   2392 static daddr_t
   2393 rf_parity_map_offset(RF_Raid_t *raidPtr)
   2394 {
   2395 	daddr_t map_offset;
   2396 
   2397 	KASSERT(raidPtr->bytesPerSector);
   2398 	if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
   2399 		map_offset = raidPtr->bytesPerSector;
   2400 	else
   2401 		map_offset = RF_COMPONENT_INFO_SIZE;
   2402 	map_offset += rf_component_info_offset();
   2403 
   2404 	return map_offset;
   2405 }
   2406 
   2407 static daddr_t
   2408 rf_parity_map_size(RF_Raid_t *raidPtr)
   2409 {
   2410 	daddr_t map_size;
   2411 
   2412 	if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
   2413 		map_size = raidPtr->bytesPerSector;
   2414 	else
   2415 		map_size = RF_PARITY_MAP_SIZE;
   2416 
   2417 	return map_size;
   2418 }
   2419 
   2420 int
   2421 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2422 {
   2423 	RF_ComponentLabel_t *clabel;
   2424 
   2425 	clabel = raidget_component_label(raidPtr, col);
   2426 	clabel->clean = RF_RAID_CLEAN;
   2427 	raidflush_component_label(raidPtr, col);
   2428 	return(0);
   2429 }
   2430 
   2431 
   2432 int
   2433 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2434 {
   2435 	RF_ComponentLabel_t *clabel;
   2436 
   2437 	clabel = raidget_component_label(raidPtr, col);
   2438 	clabel->clean = RF_RAID_DIRTY;
   2439 	raidflush_component_label(raidPtr, col);
   2440 	return(0);
   2441 }
   2442 
   2443 int
   2444 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2445 {
   2446 	KASSERT(raidPtr->bytesPerSector);
   2447 
   2448 	return raidread_component_label(raidPtr->bytesPerSector,
   2449 	    raidPtr->Disks[col].dev,
   2450 	    raidPtr->raid_cinfo[col].ci_vp,
   2451 	    &raidPtr->raid_cinfo[col].ci_label);
   2452 }
   2453 
   2454 RF_ComponentLabel_t *
   2455 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2456 {
   2457 	return &raidPtr->raid_cinfo[col].ci_label;
   2458 }
   2459 
   2460 int
   2461 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2462 {
   2463 	RF_ComponentLabel_t *label;
   2464 
   2465 	label = &raidPtr->raid_cinfo[col].ci_label;
   2466 	label->mod_counter = raidPtr->mod_counter;
   2467 #ifndef RF_NO_PARITY_MAP
   2468 	label->parity_map_modcount = label->mod_counter;
   2469 #endif
   2470 	return raidwrite_component_label(raidPtr->bytesPerSector,
   2471 	    raidPtr->Disks[col].dev,
   2472 	    raidPtr->raid_cinfo[col].ci_vp, label);
   2473 }
   2474 
   2475 /*
   2476  * Swap the label endianness.
   2477  *
   2478  * Everything in the component label is 4-byte-swapped except the version,
   2479  * which is kept in the byte-swapped version at all times, and indicates
   2480  * for the writer that a swap is necessary.
   2481  *
   2482  * For reads it is expected that out_label == clabel, but writes expect
   2483  * separate labels so only the re-swapped label is written out to disk,
   2484  * leaving the swapped-except-version internally.
   2485  *
   2486  * Only support swapping label version 2.
   2487  */
   2488 static void
   2489 rf_swap_label(RF_ComponentLabel_t *clabel, RF_ComponentLabel_t *out_label)
   2490 {
   2491 	int	*in, *out, *in_last;
   2492 
   2493 	KASSERT(clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION));
   2494 
   2495 	/* Don't swap the label, but do copy it. */
   2496 	out_label->version = clabel->version;
   2497 
   2498 	in = &clabel->serial_number;
   2499 	in_last = &clabel->future_use2[42];
   2500 	out = &out_label->serial_number;
   2501 
   2502 	for (; in < in_last; in++, out++)
   2503 		*out = bswap32(*in);
   2504 }
   2505 
   2506 static int
   2507 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
   2508     RF_ComponentLabel_t *clabel)
   2509 {
   2510 	int error;
   2511 
   2512 	error = raidread_component_area(dev, b_vp, clabel,
   2513 	    sizeof(RF_ComponentLabel_t),
   2514 	    rf_component_info_offset(),
   2515 	    rf_component_info_size(secsize));
   2516 
   2517 	if (error == 0 &&
   2518 	    clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
   2519 		rf_swap_label(clabel, clabel);
   2520 	}
   2521 
   2522 	return error;
   2523 }
   2524 
   2525 /* ARGSUSED */
   2526 static int
   2527 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
   2528     size_t msize, daddr_t offset, daddr_t dsize)
   2529 {
   2530 	struct buf *bp;
   2531 	int error;
   2532 
   2533 	/* XXX should probably ensure that we don't try to do this if
   2534 	   someone has changed rf_protected_sectors. */
   2535 
   2536 	if (b_vp == NULL) {
   2537 		/* For whatever reason, this component is not valid.
   2538 		   Don't try to read a component label from it. */
   2539 		return(EINVAL);
   2540 	}
   2541 
   2542 	/* get a block of the appropriate size... */
   2543 	bp = geteblk((int)dsize);
   2544 	bp->b_dev = dev;
   2545 
   2546 	/* get our ducks in a row for the read */
   2547 	bp->b_blkno = offset / DEV_BSIZE;
   2548 	bp->b_bcount = dsize;
   2549 	bp->b_flags |= B_READ;
   2550  	bp->b_resid = dsize;
   2551 
   2552 	bdev_strategy(bp);
   2553 	error = biowait(bp);
   2554 
   2555 	if (!error) {
   2556 		memcpy(data, bp->b_data, msize);
   2557 	}
   2558 
   2559 	brelse(bp, 0);
   2560 	return(error);
   2561 }
   2562 
   2563 static int
   2564 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
   2565     RF_ComponentLabel_t *clabel)
   2566 {
   2567 	RF_ComponentLabel_t *clabel_write = clabel;
   2568 	RF_ComponentLabel_t lclabel;
   2569 	int error;
   2570 
   2571 	if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
   2572 		clabel_write = &lclabel;
   2573 		rf_swap_label(clabel, clabel_write);
   2574 	}
   2575 	error = raidwrite_component_area(dev, b_vp, clabel_write,
   2576 	    sizeof(RF_ComponentLabel_t),
   2577 	    rf_component_info_offset(),
   2578 	    rf_component_info_size(secsize), 0);
   2579 
   2580 	return error;
   2581 }
   2582 
   2583 /* ARGSUSED */
   2584 static int
   2585 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
   2586     size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
   2587 {
   2588 	struct buf *bp;
   2589 	int error;
   2590 
   2591 	/* get a block of the appropriate size... */
   2592 	bp = geteblk((int)dsize);
   2593 	bp->b_dev = dev;
   2594 
   2595 	/* get our ducks in a row for the write */
   2596 	bp->b_blkno = offset / DEV_BSIZE;
   2597 	bp->b_bcount = dsize;
   2598 	bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
   2599  	bp->b_resid = dsize;
   2600 
   2601 	memset(bp->b_data, 0, dsize);
   2602 	memcpy(bp->b_data, data, msize);
   2603 
   2604 	bdev_strategy(bp);
   2605 	if (asyncp)
   2606 		return 0;
   2607 	error = biowait(bp);
   2608 	brelse(bp, 0);
   2609 	if (error) {
   2610 #if 1
   2611 		printf("Failed to write RAID component info!\n");
   2612 #endif
   2613 	}
   2614 
   2615 	return(error);
   2616 }
   2617 
   2618 void
   2619 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
   2620 {
   2621 	int c;
   2622 
   2623 	for (c = 0; c < raidPtr->numCol; c++) {
   2624 		/* Skip dead disks. */
   2625 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
   2626 			continue;
   2627 		/* XXXjld: what if an error occurs here? */
   2628 		raidwrite_component_area(raidPtr->Disks[c].dev,
   2629 		    raidPtr->raid_cinfo[c].ci_vp, map,
   2630 		    RF_PARITYMAP_NBYTE,
   2631 		    rf_parity_map_offset(raidPtr),
   2632 		    rf_parity_map_size(raidPtr), 0);
   2633 	}
   2634 }
   2635 
   2636 void
   2637 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
   2638 {
   2639 	struct rf_paritymap_ondisk tmp;
   2640 	int c,first;
   2641 
   2642 	first=1;
   2643 	for (c = 0; c < raidPtr->numCol; c++) {
   2644 		/* Skip dead disks. */
   2645 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
   2646 			continue;
   2647 		raidread_component_area(raidPtr->Disks[c].dev,
   2648 		    raidPtr->raid_cinfo[c].ci_vp, &tmp,
   2649 		    RF_PARITYMAP_NBYTE,
   2650 		    rf_parity_map_offset(raidPtr),
   2651 		    rf_parity_map_size(raidPtr));
   2652 		if (first) {
   2653 			memcpy(map, &tmp, sizeof(*map));
   2654 			first = 0;
   2655 		} else {
   2656 			rf_paritymap_merge(map, &tmp);
   2657 		}
   2658 	}
   2659 }
   2660 
   2661 void
   2662 rf_markalldirty(RF_Raid_t *raidPtr)
   2663 {
   2664 	RF_ComponentLabel_t *clabel;
   2665 	int sparecol;
   2666 	int c;
   2667 	int j;
   2668 	int scol = -1;
   2669 
   2670 	raidPtr->mod_counter++;
   2671 	for (c = 0; c < raidPtr->numCol; c++) {
   2672 		/* we don't want to touch (at all) a disk that has
   2673 		   failed */
   2674 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   2675 			clabel = raidget_component_label(raidPtr, c);
   2676 			if (clabel->status == rf_ds_spared) {
   2677 				/* XXX do something special...
   2678 				   but whatever you do, don't
   2679 				   try to access it!! */
   2680 			} else {
   2681 				raidmarkdirty(raidPtr, c);
   2682 			}
   2683 		}
   2684 	}
   2685 
   2686 	for (c = 0; c < raidPtr->numSpare ; c++) {
   2687 		sparecol = raidPtr->numCol + c;
   2688 
   2689 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2690 			/*
   2691 
   2692 			   we claim this disk is "optimal" if it's
   2693 			   rf_ds_used_spare, as that means it should be
   2694 			   directly substitutable for the disk it replaced.
   2695 			   We note that too...
   2696 
   2697 			 */
   2698 
   2699 			for(j=0;j<raidPtr->numCol;j++) {
   2700 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2701 					scol = j;
   2702 					break;
   2703 				}
   2704 			}
   2705 
   2706 			clabel = raidget_component_label(raidPtr, sparecol);
   2707 			/* make sure status is noted */
   2708 
   2709 			raid_init_component_label(raidPtr, clabel);
   2710 
   2711 			clabel->row = 0;
   2712 			clabel->column = scol;
   2713 			/* Note: we *don't* change status from rf_ds_used_spare
   2714 			   to rf_ds_optimal */
   2715 			/* clabel.status = rf_ds_optimal; */
   2716 
   2717 			raidmarkdirty(raidPtr, sparecol);
   2718 		}
   2719 	}
   2720 }
   2721 
   2722 
   2723 void
   2724 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
   2725 {
   2726 	RF_ComponentLabel_t *clabel;
   2727 	int sparecol;
   2728 	int c;
   2729 	int j;
   2730 	int scol;
   2731 	struct raid_softc *rs = raidPtr->softc;
   2732 
   2733 	scol = -1;
   2734 
   2735 	/* XXX should do extra checks to make sure things really are clean,
   2736 	   rather than blindly setting the clean bit... */
   2737 
   2738 	raidPtr->mod_counter++;
   2739 
   2740 	for (c = 0; c < raidPtr->numCol; c++) {
   2741 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   2742 			clabel = raidget_component_label(raidPtr, c);
   2743 			/* make sure status is noted */
   2744 			clabel->status = rf_ds_optimal;
   2745 
   2746 			/* note what unit we are configured as */
   2747 			if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
   2748 				clabel->last_unit = raidPtr->raidid;
   2749 
   2750 			raidflush_component_label(raidPtr, c);
   2751 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2752 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2753 					raidmarkclean(raidPtr, c);
   2754 				}
   2755 			}
   2756 		}
   2757 		/* else we don't touch it.. */
   2758 	}
   2759 
   2760 	for (c = 0; c < raidPtr->numSpare ; c++) {
   2761 		sparecol = raidPtr->numCol + c;
   2762 
   2763 		/* Need to ensure that the reconstruct actually completed! */
   2764 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2765 			/*
   2766 
   2767 			   we claim this disk is "optimal" if it's
   2768 			   rf_ds_used_spare, as that means it should be
   2769 			   directly substitutable for the disk it replaced.
   2770 			   We note that too...
   2771 
   2772 			 */
   2773 
   2774 			for(j=0;j<raidPtr->numCol;j++) {
   2775 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2776 					scol = j;
   2777 					break;
   2778 				}
   2779 			}
   2780 
   2781 			/* XXX shouldn't *really* need this... */
   2782 			clabel = raidget_component_label(raidPtr, sparecol);
   2783 			/* make sure status is noted */
   2784 
   2785 			raid_init_component_label(raidPtr, clabel);
   2786 
   2787 			clabel->column = scol;
   2788 			clabel->status = rf_ds_optimal;
   2789 			if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
   2790 				clabel->last_unit = raidPtr->raidid;
   2791 
   2792 			raidflush_component_label(raidPtr, sparecol);
   2793 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2794 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2795 					raidmarkclean(raidPtr, sparecol);
   2796 				}
   2797 			}
   2798 		}
   2799 	}
   2800 }
   2801 
   2802 void
   2803 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
   2804 {
   2805 
   2806 	if (vp != NULL) {
   2807 		if (auto_configured == 1) {
   2808 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2809 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2810 			vput(vp);
   2811 
   2812 		} else {
   2813 			(void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
   2814 		}
   2815 	}
   2816 }
   2817 
   2818 
   2819 void
   2820 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
   2821 {
   2822 	int r,c;
   2823 	struct vnode *vp;
   2824 	int acd;
   2825 
   2826 
   2827 	/* We take this opportunity to close the vnodes like we should.. */
   2828 
   2829 	for (c = 0; c < raidPtr->numCol; c++) {
   2830 		vp = raidPtr->raid_cinfo[c].ci_vp;
   2831 		acd = raidPtr->Disks[c].auto_configured;
   2832 		rf_close_component(raidPtr, vp, acd);
   2833 		raidPtr->raid_cinfo[c].ci_vp = NULL;
   2834 		raidPtr->Disks[c].auto_configured = 0;
   2835 	}
   2836 
   2837 	for (r = 0; r < raidPtr->numSpare; r++) {
   2838 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
   2839 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
   2840 		rf_close_component(raidPtr, vp, acd);
   2841 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
   2842 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
   2843 	}
   2844 }
   2845 
   2846 
   2847 static void
   2848 rf_ReconThread(struct rf_recon_req_internal *req)
   2849 {
   2850 	int     s;
   2851 	RF_Raid_t *raidPtr;
   2852 
   2853 	s = splbio();
   2854 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2855 	raidPtr->recon_in_progress = 1;
   2856 
   2857 	if (req->flags & RF_FDFLAGS_RECON_FORCE) {
   2858 		raidPtr->forceRecon = 1;
   2859 	}
   2860 
   2861 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
   2862 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2863 
   2864 	if (req->flags & RF_FDFLAGS_RECON_FORCE) {
   2865 		raidPtr->forceRecon = 0;
   2866 	}
   2867 
   2868 	RF_Free(req, sizeof(*req));
   2869 
   2870 	raidPtr->recon_in_progress = 0;
   2871 	splx(s);
   2872 
   2873 	/* That's all... */
   2874 	kthread_exit(0);	/* does not return */
   2875 }
   2876 
   2877 static void
   2878 rf_RewriteParityThread(RF_Raid_t *raidPtr)
   2879 {
   2880 	int retcode;
   2881 	int s;
   2882 
   2883 	raidPtr->parity_rewrite_stripes_done = 0;
   2884 	raidPtr->parity_rewrite_in_progress = 1;
   2885 	s = splbio();
   2886 	retcode = rf_RewriteParity(raidPtr);
   2887 	splx(s);
   2888 	if (retcode) {
   2889 		printf("raid%d: Error re-writing parity (%d)!\n",
   2890 		    raidPtr->raidid, retcode);
   2891 	} else {
   2892 		/* set the clean bit!  If we shutdown correctly,
   2893 		   the clean bit on each component label will get
   2894 		   set */
   2895 		raidPtr->parity_good = RF_RAID_CLEAN;
   2896 	}
   2897 	raidPtr->parity_rewrite_in_progress = 0;
   2898 
   2899 	/* Anyone waiting for us to stop?  If so, inform them... */
   2900 	if (raidPtr->waitShutdown) {
   2901 		rf_lock_mutex2(raidPtr->rad_lock);
   2902 		cv_broadcast(&raidPtr->parity_rewrite_cv);
   2903 		rf_unlock_mutex2(raidPtr->rad_lock);
   2904 	}
   2905 
   2906 	/* That's all... */
   2907 	kthread_exit(0);	/* does not return */
   2908 }
   2909 
   2910 
   2911 static void
   2912 rf_CopybackThread(RF_Raid_t *raidPtr)
   2913 {
   2914 	int s;
   2915 
   2916 	raidPtr->copyback_in_progress = 1;
   2917 	s = splbio();
   2918 	rf_CopybackReconstructedData(raidPtr);
   2919 	splx(s);
   2920 	raidPtr->copyback_in_progress = 0;
   2921 
   2922 	/* That's all... */
   2923 	kthread_exit(0);	/* does not return */
   2924 }
   2925 
   2926 
   2927 static void
   2928 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
   2929 {
   2930 	int s;
   2931 	RF_Raid_t *raidPtr;
   2932 
   2933 	s = splbio();
   2934 	raidPtr = req->raidPtr;
   2935 	raidPtr->recon_in_progress = 1;
   2936 
   2937 	if (req->flags & RF_FDFLAGS_RECON_FORCE) {
   2938 		raidPtr->forceRecon = 1;
   2939 	}
   2940 
   2941 	rf_ReconstructInPlace(raidPtr, req->col);
   2942 
   2943 	if (req->flags & RF_FDFLAGS_RECON_FORCE) {
   2944 		raidPtr->forceRecon = 0;
   2945 	}
   2946 
   2947 	RF_Free(req, sizeof(*req));
   2948 	raidPtr->recon_in_progress = 0;
   2949 	splx(s);
   2950 
   2951 	/* That's all... */
   2952 	kthread_exit(0);	/* does not return */
   2953 }
   2954 
   2955 static RF_AutoConfig_t *
   2956 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
   2957     const char *cname, RF_SectorCount_t size, uint64_t numsecs,
   2958     unsigned secsize)
   2959 {
   2960 	int good_one = 0;
   2961 	RF_ComponentLabel_t *clabel;
   2962 	RF_AutoConfig_t *ac;
   2963 
   2964 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_WAITOK);
   2965 
   2966 	if (!raidread_component_label(secsize, dev, vp, clabel)) {
   2967 		/* Got the label.  Does it look reasonable? */
   2968 		if (rf_reasonable_label(clabel, numsecs) &&
   2969 		    (rf_component_label_partitionsize(clabel) <= size)) {
   2970 #ifdef DEBUG
   2971 			printf("Component on: %s: %llu\n",
   2972 				cname, (unsigned long long)size);
   2973 			rf_print_component_label(clabel);
   2974 #endif
   2975 			/* if it's reasonable, add it, else ignore it. */
   2976 			ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
   2977 				M_WAITOK);
   2978 			strlcpy(ac->devname, cname, sizeof(ac->devname));
   2979 			ac->dev = dev;
   2980 			ac->vp = vp;
   2981 			ac->clabel = clabel;
   2982 			ac->next = ac_list;
   2983 			ac_list = ac;
   2984 			good_one = 1;
   2985 		}
   2986 	}
   2987 	if (!good_one) {
   2988 		/* cleanup */
   2989 		free(clabel, M_RAIDFRAME);
   2990 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2991 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2992 		vput(vp);
   2993 	}
   2994 	return ac_list;
   2995 }
   2996 
   2997 static RF_AutoConfig_t *
   2998 rf_find_raid_components(void)
   2999 {
   3000 	struct vnode *vp;
   3001 	struct disklabel label;
   3002 	device_t dv;
   3003 	deviter_t di;
   3004 	dev_t dev;
   3005 	int bmajor, bminor, wedge, rf_part_found;
   3006 	int error;
   3007 	int i;
   3008 	RF_AutoConfig_t *ac_list;
   3009 	uint64_t numsecs;
   3010 	unsigned secsize;
   3011 	int dowedges;
   3012 
   3013 	/* initialize the AutoConfig list */
   3014 	ac_list = NULL;
   3015 
   3016 	/*
   3017 	 * we begin by trolling through *all* the devices on the system *twice*
   3018 	 * first we scan for wedges, second for other devices. This avoids
   3019 	 * using a raw partition instead of a wedge that covers the whole disk
   3020 	 */
   3021 
   3022 	for (dowedges=1; dowedges>=0; --dowedges) {
   3023 		for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
   3024 		     dv = deviter_next(&di)) {
   3025 
   3026 			/* we are only interested in disks */
   3027 			if (device_class(dv) != DV_DISK)
   3028 				continue;
   3029 
   3030 			/* we don't care about floppies */
   3031 			if (device_is_a(dv, "fd")) {
   3032 				continue;
   3033 			}
   3034 
   3035 			/* we don't care about CDs. */
   3036 			if (device_is_a(dv, "cd")) {
   3037 				continue;
   3038 			}
   3039 
   3040 			/* we don't care about md. */
   3041 			if (device_is_a(dv, "md")) {
   3042 				continue;
   3043 			}
   3044 
   3045 			/* hdfd is the Atari/Hades floppy driver */
   3046 			if (device_is_a(dv, "hdfd")) {
   3047 				continue;
   3048 			}
   3049 
   3050 			/* fdisa is the Atari/Milan floppy driver */
   3051 			if (device_is_a(dv, "fdisa")) {
   3052 				continue;
   3053 			}
   3054 
   3055 			/* we don't care about spiflash */
   3056 			if (device_is_a(dv, "spiflash")) {
   3057 				continue;
   3058 			}
   3059 
   3060 			/* are we in the wedges pass ? */
   3061 			wedge = device_is_a(dv, "dk");
   3062 			if (wedge != dowedges) {
   3063 				continue;
   3064 			}
   3065 
   3066 			/* need to find the device_name_to_block_device_major stuff */
   3067 			bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
   3068 
   3069 			rf_part_found = 0; /*No raid partition as yet*/
   3070 
   3071 			/* get a vnode for the raw partition of this disk */
   3072 			bminor = minor(device_unit(dv));
   3073 			dev = wedge ? makedev(bmajor, bminor) :
   3074 			    MAKEDISKDEV(bmajor, bminor, RAW_PART);
   3075 			if (bdevvp(dev, &vp))
   3076 				panic("RAID can't alloc vnode");
   3077 
   3078 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   3079 			error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
   3080 
   3081 			if (error) {
   3082 				/* "Who cares."  Continue looking
   3083 				   for something that exists*/
   3084 				vput(vp);
   3085 				continue;
   3086 			}
   3087 
   3088 			error = getdisksize(vp, &numsecs, &secsize);
   3089 			if (error) {
   3090 				/*
   3091 				 * Pseudo devices like vnd and cgd can be
   3092 				 * opened but may still need some configuration.
   3093 				 * Ignore these quietly.
   3094 				 */
   3095 				if (error != ENXIO)
   3096 					printf("RAIDframe: can't get disk size"
   3097 					    " for dev %s (%d)\n",
   3098 					    device_xname(dv), error);
   3099 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3100 				vput(vp);
   3101 				continue;
   3102 			}
   3103 			if (wedge) {
   3104 				struct dkwedge_info dkw;
   3105 				error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
   3106 				    NOCRED);
   3107 				if (error) {
   3108 					printf("RAIDframe: can't get wedge info for "
   3109 					    "dev %s (%d)\n", device_xname(dv), error);
   3110 					VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3111 					vput(vp);
   3112 					continue;
   3113 				}
   3114 
   3115 				if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
   3116 					VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3117 					vput(vp);
   3118 					continue;
   3119 				}
   3120 
   3121 				VOP_UNLOCK(vp);
   3122 				ac_list = rf_get_component(ac_list, dev, vp,
   3123 				    device_xname(dv), dkw.dkw_size, numsecs, secsize);
   3124 				rf_part_found = 1; /*There is a raid component on this disk*/
   3125 				continue;
   3126 			}
   3127 
   3128 			/* Ok, the disk exists.  Go get the disklabel. */
   3129 			error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
   3130 			if (error) {
   3131 				/*
   3132 				 * XXX can't happen - open() would
   3133 				 * have errored out (or faked up one)
   3134 				 */
   3135 				if (error != ENOTTY)
   3136 					printf("RAIDframe: can't get label for dev "
   3137 					    "%s (%d)\n", device_xname(dv), error);
   3138 			}
   3139 
   3140 			/* don't need this any more.  We'll allocate it again
   3141 			   a little later if we really do... */
   3142 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3143 			vput(vp);
   3144 
   3145 			if (error)
   3146 				continue;
   3147 
   3148 			rf_part_found = 0; /*No raid partitions yet*/
   3149 			for (i = 0; i < label.d_npartitions; i++) {
   3150 				char cname[sizeof(ac_list->devname)];
   3151 
   3152 				/* We only support partitions marked as RAID */
   3153 				if (label.d_partitions[i].p_fstype != FS_RAID)
   3154 					continue;
   3155 
   3156 				dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
   3157 				if (bdevvp(dev, &vp))
   3158 					panic("RAID can't alloc vnode");
   3159 
   3160 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   3161 				error = VOP_OPEN(vp, FREAD, NOCRED);
   3162 				if (error) {
   3163 					/* Not quite a 'whatever'.  In
   3164 					 * this situation we know
   3165 					 * there is a FS_RAID
   3166 					 * partition, but we can't
   3167 					 * open it.  The most likely
   3168 					 * reason is that the
   3169 					 * partition is already in
   3170 					 * use by another RAID set.
   3171 					 * So note that we've already
   3172 					 * found a partition on this
   3173 					 * disk so we don't attempt
   3174 					 * to use the raw disk later. */
   3175 					rf_part_found = 1;
   3176 					vput(vp);
   3177 					continue;
   3178 				}
   3179 				VOP_UNLOCK(vp);
   3180 				snprintf(cname, sizeof(cname), "%s%c",
   3181 				    device_xname(dv), 'a' + i);
   3182 				ac_list = rf_get_component(ac_list, dev, vp, cname,
   3183 					label.d_partitions[i].p_size, numsecs, secsize);
   3184 				rf_part_found = 1; /*There is at least one raid partition on this disk*/
   3185 			}
   3186 
   3187 			/*
   3188 			 *If there is no raid component on this disk, either in a
   3189 			 *disklabel or inside a wedge, check the raw partition as well,
   3190 			 *as it is possible to configure raid components on raw disk
   3191 			 *devices.
   3192 			 */
   3193 
   3194 			if (!rf_part_found) {
   3195 				char cname[sizeof(ac_list->devname)];
   3196 
   3197 				dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
   3198 				if (bdevvp(dev, &vp))
   3199 					panic("RAID can't alloc vnode");
   3200 
   3201 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   3202 
   3203 				error = VOP_OPEN(vp, FREAD, NOCRED);
   3204 				if (error) {
   3205 					/* Whatever... */
   3206 					vput(vp);
   3207 					continue;
   3208 				}
   3209 				VOP_UNLOCK(vp);
   3210 				snprintf(cname, sizeof(cname), "%s%c",
   3211 				    device_xname(dv), 'a' + RAW_PART);
   3212 				ac_list = rf_get_component(ac_list, dev, vp, cname,
   3213 					label.d_partitions[RAW_PART].p_size, numsecs, secsize);
   3214 			}
   3215 		}
   3216 		deviter_release(&di);
   3217 	}
   3218 	return ac_list;
   3219 }
   3220 
   3221 int
   3222 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
   3223 {
   3224 
   3225 	if ((clabel->version==RF_COMPONENT_LABEL_VERSION_1 ||
   3226 	     clabel->version==RF_COMPONENT_LABEL_VERSION ||
   3227 	     clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) &&
   3228 	    (clabel->clean == RF_RAID_CLEAN ||
   3229 	     clabel->clean == RF_RAID_DIRTY) &&
   3230 	    clabel->row >=0 &&
   3231 	    clabel->column >= 0 &&
   3232 	    clabel->num_rows > 0 &&
   3233 	    clabel->num_columns > 0 &&
   3234 	    clabel->row < clabel->num_rows &&
   3235 	    clabel->column < clabel->num_columns &&
   3236 	    clabel->blockSize > 0 &&
   3237 	    /*
   3238 	     * numBlocksHi may contain garbage, but it is ok since
   3239 	     * the type is unsigned.  If it is really garbage,
   3240 	     * rf_fix_old_label_size() will fix it.
   3241 	     */
   3242 	    rf_component_label_numblocks(clabel) > 0) {
   3243 		/*
   3244 		 * label looks reasonable enough...
   3245 		 * let's make sure it has no old garbage.
   3246 		 */
   3247 		if (numsecs)
   3248 			rf_fix_old_label_size(clabel, numsecs);
   3249 		return(1);
   3250 	}
   3251 	return(0);
   3252 }
   3253 
   3254 
   3255 /*
   3256  * For reasons yet unknown, some old component labels have garbage in
   3257  * the newer numBlocksHi region, and this causes lossage.  Since those
   3258  * disks will also have numsecs set to less than 32 bits of sectors,
   3259  * we can determine when this corruption has occurred, and fix it.
   3260  *
   3261  * The exact same problem, with the same unknown reason, happens to
   3262  * the partitionSizeHi member as well.
   3263  */
   3264 static void
   3265 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
   3266 {
   3267 
   3268 	if (numsecs < ((uint64_t)1 << 32)) {
   3269 		if (clabel->numBlocksHi) {
   3270 			printf("WARNING: total sectors < 32 bits, yet "
   3271 			       "numBlocksHi set\n"
   3272 			       "WARNING: resetting numBlocksHi to zero.\n");
   3273 			clabel->numBlocksHi = 0;
   3274 		}
   3275 
   3276 		if (clabel->partitionSizeHi) {
   3277 			printf("WARNING: total sectors < 32 bits, yet "
   3278 			       "partitionSizeHi set\n"
   3279 			       "WARNING: resetting partitionSizeHi to zero.\n");
   3280 			clabel->partitionSizeHi = 0;
   3281 		}
   3282 	}
   3283 }
   3284 
   3285 
   3286 #ifdef DEBUG
   3287 void
   3288 rf_print_component_label(RF_ComponentLabel_t *clabel)
   3289 {
   3290 	uint64_t numBlocks;
   3291 	static const char *rp[] = {
   3292 	    "No", "Force", "Soft", "*invalid*"
   3293 	};
   3294 
   3295 
   3296 	numBlocks = rf_component_label_numblocks(clabel);
   3297 
   3298 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   3299 	       clabel->row, clabel->column,
   3300 	       clabel->num_rows, clabel->num_columns);
   3301 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   3302 	       clabel->version, clabel->serial_number,
   3303 	       clabel->mod_counter);
   3304 	printf("   Clean: %s Status: %d\n",
   3305 	       clabel->clean ? "Yes" : "No", clabel->status);
   3306 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   3307 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   3308 	printf("   RAID Level: %c  blocksize: %d numBlocks: %"PRIu64"\n",
   3309 	       (char) clabel->parityConfig, clabel->blockSize, numBlocks);
   3310 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
   3311 	printf("   Root partition: %s\n", rp[clabel->root_partition & 3]);
   3312 	printf("   Last configured as: raid%d\n", clabel->last_unit);
   3313 #if 0
   3314 	   printf("   Config order: %d\n", clabel->config_order);
   3315 #endif
   3316 
   3317 }
   3318 #endif
   3319 
   3320 static RF_ConfigSet_t *
   3321 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
   3322 {
   3323 	RF_AutoConfig_t *ac;
   3324 	RF_ConfigSet_t *config_sets;
   3325 	RF_ConfigSet_t *cset;
   3326 	RF_AutoConfig_t *ac_next;
   3327 
   3328 
   3329 	config_sets = NULL;
   3330 
   3331 	/* Go through the AutoConfig list, and figure out which components
   3332 	   belong to what sets.  */
   3333 	ac = ac_list;
   3334 	while(ac!=NULL) {
   3335 		/* we're going to putz with ac->next, so save it here
   3336 		   for use at the end of the loop */
   3337 		ac_next = ac->next;
   3338 
   3339 		if (config_sets == NULL) {
   3340 			/* will need at least this one... */
   3341 			config_sets = malloc(sizeof(RF_ConfigSet_t),
   3342 				       M_RAIDFRAME, M_WAITOK);
   3343 			/* this one is easy :) */
   3344 			config_sets->ac = ac;
   3345 			config_sets->next = NULL;
   3346 			config_sets->rootable = 0;
   3347 			ac->next = NULL;
   3348 		} else {
   3349 			/* which set does this component fit into? */
   3350 			cset = config_sets;
   3351 			while(cset!=NULL) {
   3352 				if (rf_does_it_fit(cset, ac)) {
   3353 					/* looks like it matches... */
   3354 					ac->next = cset->ac;
   3355 					cset->ac = ac;
   3356 					break;
   3357 				}
   3358 				cset = cset->next;
   3359 			}
   3360 			if (cset==NULL) {
   3361 				/* didn't find a match above... new set..*/
   3362 				cset = malloc(sizeof(RF_ConfigSet_t),
   3363 					       M_RAIDFRAME, M_WAITOK);
   3364 				cset->ac = ac;
   3365 				ac->next = NULL;
   3366 				cset->next = config_sets;
   3367 				cset->rootable = 0;
   3368 				config_sets = cset;
   3369 			}
   3370 		}
   3371 		ac = ac_next;
   3372 	}
   3373 
   3374 
   3375 	return(config_sets);
   3376 }
   3377 
   3378 static int
   3379 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
   3380 {
   3381 	RF_ComponentLabel_t *clabel1, *clabel2;
   3382 
   3383 	/* If this one matches the *first* one in the set, that's good
   3384 	   enough, since the other members of the set would have been
   3385 	   through here too... */
   3386 	/* note that we are not checking partitionSize here..
   3387 
   3388 	   Note that we are also not checking the mod_counters here.
   3389 	   If everything else matches except the mod_counter, that's
   3390 	   good enough for this test.  We will deal with the mod_counters
   3391 	   a little later in the autoconfiguration process.
   3392 
   3393 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   3394 
   3395 	   The reason we don't check for this is that failed disks
   3396 	   will have lower modification counts.  If those disks are
   3397 	   not added to the set they used to belong to, then they will
   3398 	   form their own set, which may result in 2 different sets,
   3399 	   for example, competing to be configured at raid0, and
   3400 	   perhaps competing to be the root filesystem set.  If the
   3401 	   wrong ones get configured, or both attempt to become /,
   3402 	   weird behaviour and or serious lossage will occur.  Thus we
   3403 	   need to bring them into the fold here, and kick them out at
   3404 	   a later point.
   3405 
   3406 	*/
   3407 
   3408 	clabel1 = cset->ac->clabel;
   3409 	clabel2 = ac->clabel;
   3410 	if ((clabel1->version == clabel2->version) &&
   3411 	    (clabel1->serial_number == clabel2->serial_number) &&
   3412 	    (clabel1->num_rows == clabel2->num_rows) &&
   3413 	    (clabel1->num_columns == clabel2->num_columns) &&
   3414 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   3415 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   3416 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   3417 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   3418 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   3419 	    (clabel1->blockSize == clabel2->blockSize) &&
   3420 	    rf_component_label_numblocks(clabel1) ==
   3421 	    rf_component_label_numblocks(clabel2) &&
   3422 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   3423 	    (clabel1->root_partition == clabel2->root_partition) &&
   3424 	    (clabel1->last_unit == clabel2->last_unit) &&
   3425 	    (clabel1->config_order == clabel2->config_order)) {
   3426 		/* if it get's here, it almost *has* to be a match */
   3427 	} else {
   3428 		/* it's not consistent with somebody in the set..
   3429 		   punt */
   3430 		return(0);
   3431 	}
   3432 	/* all was fine.. it must fit... */
   3433 	return(1);
   3434 }
   3435 
   3436 static int
   3437 rf_have_enough_components(RF_ConfigSet_t *cset)
   3438 {
   3439 	RF_AutoConfig_t *ac;
   3440 	RF_AutoConfig_t *auto_config;
   3441 	RF_ComponentLabel_t *clabel;
   3442 	int c;
   3443 	int num_cols;
   3444 	int num_missing;
   3445 	int mod_counter;
   3446 	int mod_counter_found;
   3447 	int even_pair_failed;
   3448 	char parity_type;
   3449 
   3450 
   3451 	/* check to see that we have enough 'live' components
   3452 	   of this set.  If so, we can configure it if necessary */
   3453 
   3454 	num_cols = cset->ac->clabel->num_columns;
   3455 	parity_type = cset->ac->clabel->parityConfig;
   3456 
   3457 	/* XXX Check for duplicate components!?!?!? */
   3458 
   3459 	/* Determine what the mod_counter is supposed to be for this set. */
   3460 
   3461 	mod_counter_found = 0;
   3462 	mod_counter = 0;
   3463 	ac = cset->ac;
   3464 	while(ac!=NULL) {
   3465 		if (mod_counter_found==0) {
   3466 			mod_counter = ac->clabel->mod_counter;
   3467 			mod_counter_found = 1;
   3468 		} else {
   3469 			if (ac->clabel->mod_counter > mod_counter) {
   3470 				mod_counter = ac->clabel->mod_counter;
   3471 			}
   3472 		}
   3473 		ac = ac->next;
   3474 	}
   3475 
   3476 	num_missing = 0;
   3477 	auto_config = cset->ac;
   3478 
   3479 	even_pair_failed = 0;
   3480 	for(c=0; c<num_cols; c++) {
   3481 		ac = auto_config;
   3482 		while(ac!=NULL) {
   3483 			if ((ac->clabel->column == c) &&
   3484 			    (ac->clabel->mod_counter == mod_counter)) {
   3485 				/* it's this one... */
   3486 #ifdef DEBUG
   3487 				printf("Found: %s at %d\n",
   3488 				       ac->devname,c);
   3489 #endif
   3490 				break;
   3491 			}
   3492 			ac=ac->next;
   3493 		}
   3494 		if (ac==NULL) {
   3495 				/* Didn't find one here! */
   3496 				/* special case for RAID 1, especially
   3497 				   where there are more than 2
   3498 				   components (where RAIDframe treats
   3499 				   things a little differently :( ) */
   3500 			if (parity_type == '1') {
   3501 				if (c%2 == 0) { /* even component */
   3502 					even_pair_failed = 1;
   3503 				} else { /* odd component.  If
   3504 					    we're failed, and
   3505 					    so is the even
   3506 					    component, it's
   3507 					    "Good Night, Charlie" */
   3508 					if (even_pair_failed == 1) {
   3509 						return(0);
   3510 					}
   3511 				}
   3512 			} else {
   3513 				/* normal accounting */
   3514 				num_missing++;
   3515 			}
   3516 		}
   3517 		if ((parity_type == '1') && (c%2 == 1)) {
   3518 				/* Just did an even component, and we didn't
   3519 				   bail.. reset the even_pair_failed flag,
   3520 				   and go on to the next component.... */
   3521 			even_pair_failed = 0;
   3522 		}
   3523 	}
   3524 
   3525 	clabel = cset->ac->clabel;
   3526 
   3527 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3528 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3529 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3530 		/* XXX this needs to be made *much* more general */
   3531 		/* Too many failures */
   3532 		return(0);
   3533 	}
   3534 	/* otherwise, all is well, and we've got enough to take a kick
   3535 	   at autoconfiguring this set */
   3536 	return(1);
   3537 }
   3538 
   3539 static void
   3540 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
   3541 			RF_Raid_t *raidPtr)
   3542 {
   3543 	RF_ComponentLabel_t *clabel;
   3544 	int i;
   3545 
   3546 	clabel = ac->clabel;
   3547 
   3548 	/* 1. Fill in the common stuff */
   3549 	config->numCol = clabel->num_columns;
   3550 	config->numSpare = 0; /* XXX should this be set here? */
   3551 	config->sectPerSU = clabel->sectPerSU;
   3552 	config->SUsPerPU = clabel->SUsPerPU;
   3553 	config->SUsPerRU = clabel->SUsPerRU;
   3554 	config->parityConfig = clabel->parityConfig;
   3555 	/* XXX... */
   3556 	strcpy(config->diskQueueType,"fifo");
   3557 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3558 	config->layoutSpecificSize = 0; /* XXX ?? */
   3559 
   3560 	while(ac!=NULL) {
   3561 		/* row/col values will be in range due to the checks
   3562 		   in reasonable_label() */
   3563 		strcpy(config->devnames[0][ac->clabel->column],
   3564 		       ac->devname);
   3565 		ac = ac->next;
   3566 	}
   3567 
   3568 	for(i=0;i<RF_MAXDBGV;i++) {
   3569 		config->debugVars[i][0] = 0;
   3570 	}
   3571 }
   3572 
   3573 static int
   3574 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
   3575 {
   3576 	RF_ComponentLabel_t *clabel;
   3577 	int column;
   3578 	int sparecol;
   3579 
   3580 	raidPtr->autoconfigure = new_value;
   3581 
   3582 	for(column=0; column<raidPtr->numCol; column++) {
   3583 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3584 			clabel = raidget_component_label(raidPtr, column);
   3585 			clabel->autoconfigure = new_value;
   3586 			raidflush_component_label(raidPtr, column);
   3587 		}
   3588 	}
   3589 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3590 		sparecol = raidPtr->numCol + column;
   3591 
   3592 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3593 			clabel = raidget_component_label(raidPtr, sparecol);
   3594 			clabel->autoconfigure = new_value;
   3595 			raidflush_component_label(raidPtr, sparecol);
   3596 		}
   3597 	}
   3598 	return(new_value);
   3599 }
   3600 
   3601 static int
   3602 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
   3603 {
   3604 	RF_ComponentLabel_t *clabel;
   3605 	int column;
   3606 	int sparecol;
   3607 
   3608 	raidPtr->root_partition = new_value;
   3609 	for(column=0; column<raidPtr->numCol; column++) {
   3610 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3611 			clabel = raidget_component_label(raidPtr, column);
   3612 			clabel->root_partition = new_value;
   3613 			raidflush_component_label(raidPtr, column);
   3614 		}
   3615 	}
   3616 	for (column = 0; column < raidPtr->numSpare ; column++) {
   3617 		sparecol = raidPtr->numCol + column;
   3618 
   3619 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3620 			clabel = raidget_component_label(raidPtr, sparecol);
   3621 			clabel->root_partition = new_value;
   3622 			raidflush_component_label(raidPtr, sparecol);
   3623 		}
   3624 	}
   3625 	return(new_value);
   3626 }
   3627 
   3628 static void
   3629 rf_release_all_vps(RF_ConfigSet_t *cset)
   3630 {
   3631 	RF_AutoConfig_t *ac;
   3632 
   3633 	ac = cset->ac;
   3634 	while(ac!=NULL) {
   3635 		/* Close the vp, and give it back */
   3636 		if (ac->vp) {
   3637 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3638 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
   3639 			vput(ac->vp);
   3640 			ac->vp = NULL;
   3641 		}
   3642 		ac = ac->next;
   3643 	}
   3644 }
   3645 
   3646 
   3647 static void
   3648 rf_cleanup_config_set(RF_ConfigSet_t *cset)
   3649 {
   3650 	RF_AutoConfig_t *ac;
   3651 	RF_AutoConfig_t *next_ac;
   3652 
   3653 	ac = cset->ac;
   3654 	while(ac!=NULL) {
   3655 		next_ac = ac->next;
   3656 		/* nuke the label */
   3657 		free(ac->clabel, M_RAIDFRAME);
   3658 		/* cleanup the config structure */
   3659 		free(ac, M_RAIDFRAME);
   3660 		/* "next.." */
   3661 		ac = next_ac;
   3662 	}
   3663 	/* and, finally, nuke the config set */
   3664 	free(cset, M_RAIDFRAME);
   3665 }
   3666 
   3667 
   3668 void
   3669 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   3670 {
   3671 	/* avoid over-writing byteswapped version. */
   3672 	if (clabel->version != bswap32(RF_COMPONENT_LABEL_VERSION))
   3673 		clabel->version = RF_COMPONENT_LABEL_VERSION;
   3674 	clabel->serial_number = raidPtr->serial_number;
   3675 	clabel->mod_counter = raidPtr->mod_counter;
   3676 
   3677 	clabel->num_rows = 1;
   3678 	clabel->num_columns = raidPtr->numCol;
   3679 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3680 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3681 
   3682 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3683 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3684 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3685 
   3686 	clabel->blockSize = raidPtr->bytesPerSector;
   3687 	rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
   3688 
   3689 	/* XXX not portable */
   3690 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3691 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3692 	clabel->autoconfigure = raidPtr->autoconfigure;
   3693 	clabel->root_partition = raidPtr->root_partition;
   3694 	clabel->last_unit = raidPtr->raidid;
   3695 	clabel->config_order = raidPtr->config_order;
   3696 
   3697 #ifndef RF_NO_PARITY_MAP
   3698 	rf_paritymap_init_label(raidPtr->parity_map, clabel);
   3699 #endif
   3700 }
   3701 
   3702 static struct raid_softc *
   3703 rf_auto_config_set(RF_ConfigSet_t *cset)
   3704 {
   3705 	RF_Raid_t *raidPtr;
   3706 	RF_Config_t *config;
   3707 	int raidID;
   3708 	struct raid_softc *sc;
   3709 
   3710 #ifdef DEBUG
   3711 	printf("RAID autoconfigure\n");
   3712 #endif
   3713 
   3714 	/* 1. Create a config structure */
   3715 	config = malloc(sizeof(*config), M_RAIDFRAME, M_WAITOK|M_ZERO);
   3716 
   3717 	/*
   3718 	   2. Figure out what RAID ID this one is supposed to live at
   3719 	   See if we can get the same RAID dev that it was configured
   3720 	   on last time..
   3721 	*/
   3722 
   3723 	raidID = cset->ac->clabel->last_unit;
   3724 	for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
   3725 	     sc = raidget(++raidID, false))
   3726 		continue;
   3727 #ifdef DEBUG
   3728 	printf("Configuring raid%d:\n",raidID);
   3729 #endif
   3730 
   3731 	if (sc == NULL)
   3732 		sc = raidget(raidID, true);
   3733 	raidPtr = &sc->sc_r;
   3734 
   3735 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3736 	raidPtr->softc = sc;
   3737 	raidPtr->raidid = raidID;
   3738 	raidPtr->openings = RAIDOUTSTANDING;
   3739 
   3740 	/* 3. Build the configuration structure */
   3741 	rf_create_configuration(cset->ac, config, raidPtr);
   3742 
   3743 	/* 4. Do the configuration */
   3744 	if (rf_Configure(raidPtr, config, cset->ac) == 0) {
   3745 		raidinit(sc);
   3746 
   3747 		rf_markalldirty(raidPtr);
   3748 		raidPtr->autoconfigure = 1; /* XXX do this here? */
   3749 		switch (cset->ac->clabel->root_partition) {
   3750 		case 1:	/* Force Root */
   3751 		case 2:	/* Soft Root: root when boot partition part of raid */
   3752 			/*
   3753 			 * everything configured just fine.  Make a note
   3754 			 * that this set is eligible to be root,
   3755 			 * or forced to be root
   3756 			 */
   3757 			cset->rootable = cset->ac->clabel->root_partition;
   3758 			/* XXX do this here? */
   3759 			raidPtr->root_partition = cset->rootable;
   3760 			break;
   3761 		default:
   3762 			break;
   3763 		}
   3764 	} else {
   3765 		raidput(sc);
   3766 		sc = NULL;
   3767 	}
   3768 
   3769 	/* 5. Cleanup */
   3770 	free(config, M_RAIDFRAME);
   3771 	return sc;
   3772 }
   3773 
   3774 void
   3775 rf_pool_init(RF_Raid_t *raidPtr, char *w_chan, struct pool *p, size_t size, const char *pool_name,
   3776 	     size_t xmin, size_t xmax)
   3777 {
   3778 
   3779 	/* Format: raid%d_foo */
   3780 	snprintf(w_chan, RF_MAX_POOLNAMELEN, "raid%d_%s", raidPtr->raidid, pool_name);
   3781 
   3782 	pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
   3783 	pool_sethiwat(p, xmax);
   3784 	pool_prime(p, xmin);
   3785 }
   3786 
   3787 
   3788 /*
   3789  * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
   3790  * to see if there is IO pending and if that IO could possibly be done
   3791  * for a given RAID set.  Returns 0 if IO is waiting and can be done, 1
   3792  * otherwise.
   3793  *
   3794  */
   3795 int
   3796 rf_buf_queue_check(RF_Raid_t *raidPtr)
   3797 {
   3798 	struct raid_softc *rs;
   3799 	struct dk_softc *dksc;
   3800 
   3801 	rs = raidPtr->softc;
   3802 	dksc = &rs->sc_dksc;
   3803 
   3804 	if ((rs->sc_flags & RAIDF_INITED) == 0)
   3805 		return 1;
   3806 
   3807 	if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
   3808 		/* there is work to do */
   3809 		return 0;
   3810 	}
   3811 	/* default is nothing to do */
   3812 	return 1;
   3813 }
   3814 
   3815 int
   3816 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
   3817 {
   3818 	uint64_t numsecs;
   3819 	unsigned secsize;
   3820 	int error;
   3821 
   3822 	error = getdisksize(vp, &numsecs, &secsize);
   3823 	if (error == 0) {
   3824 		diskPtr->blockSize = secsize;
   3825 		diskPtr->numBlocks = numsecs - rf_protectedSectors;
   3826 		diskPtr->partitionSize = numsecs;
   3827 		return 0;
   3828 	}
   3829 	return error;
   3830 }
   3831 
   3832 static int
   3833 raid_match(device_t self, cfdata_t cfdata, void *aux)
   3834 {
   3835 	return 1;
   3836 }
   3837 
   3838 static void
   3839 raid_attach(device_t parent, device_t self, void *aux)
   3840 {
   3841 }
   3842 
   3843 
   3844 static int
   3845 raid_detach(device_t self, int flags)
   3846 {
   3847 	int error;
   3848 	struct raid_softc *rs = raidsoftc(self);
   3849 
   3850 	if (rs == NULL)
   3851 		return ENXIO;
   3852 
   3853 	if ((error = raidlock(rs)) != 0)
   3854 		return error;
   3855 
   3856 	error = raid_detach_unlocked(rs);
   3857 
   3858 	raidunlock(rs);
   3859 
   3860 	/* XXX raid can be referenced here */
   3861 
   3862 	if (error)
   3863 		return error;
   3864 
   3865 	/* Free the softc */
   3866 	raidput(rs);
   3867 
   3868 	return 0;
   3869 }
   3870 
   3871 static void
   3872 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
   3873 {
   3874 	struct dk_softc *dksc = &rs->sc_dksc;
   3875 	struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
   3876 
   3877 	memset(dg, 0, sizeof(*dg));
   3878 
   3879 	dg->dg_secperunit = raidPtr->totalSectors;
   3880 	dg->dg_secsize = raidPtr->bytesPerSector;
   3881 	dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   3882 	dg->dg_ntracks = 4 * raidPtr->numCol;
   3883 
   3884 	disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
   3885 }
   3886 
   3887 /*
   3888  * Get cache info for all the components (including spares).
   3889  * Returns intersection of all the cache flags of all disks, or first
   3890  * error if any encountered.
   3891  * XXXfua feature flags can change as spares are added - lock down somehow
   3892  */
   3893 static int
   3894 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
   3895 {
   3896 	int c;
   3897 	int error;
   3898 	int dkwhole = 0, dkpart;
   3899 
   3900 	for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
   3901 		/*
   3902 		 * Check any non-dead disk, even when currently being
   3903 		 * reconstructed.
   3904 		 */
   3905 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   3906 			error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
   3907 			    DIOCGCACHE, &dkpart, FREAD, NOCRED);
   3908 			if (error) {
   3909 				if (error != ENODEV) {
   3910 					printf("raid%d: get cache for component %s failed\n",
   3911 					    raidPtr->raidid,
   3912 					    raidPtr->Disks[c].devname);
   3913 				}
   3914 
   3915 				return error;
   3916 			}
   3917 
   3918 			if (c == 0)
   3919 				dkwhole = dkpart;
   3920 			else
   3921 				dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
   3922 		}
   3923 	}
   3924 
   3925 	*data = dkwhole;
   3926 
   3927 	return 0;
   3928 }
   3929 
   3930 /*
   3931  * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
   3932  * We end up returning whatever error was returned by the first cache flush
   3933  * that fails.
   3934  */
   3935 
   3936 static int
   3937 rf_sync_component_cache(RF_Raid_t *raidPtr, int c, int force)
   3938 {
   3939 	int e = 0;
   3940 	for (int i = 0; i < 5; i++) {
   3941 		e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
   3942 		    &force, FWRITE, NOCRED);
   3943 		if (!e || e == ENODEV)
   3944 			return e;
   3945 		printf("raid%d: cache flush[%d] to component %s failed (%d)\n",
   3946 		    raidPtr->raidid, i, raidPtr->Disks[c].devname, e);
   3947 	}
   3948 	return e;
   3949 }
   3950 
   3951 int
   3952 rf_sync_component_caches(RF_Raid_t *raidPtr, int force)
   3953 {
   3954 	int c, error;
   3955 
   3956 	error = 0;
   3957 	for (c = 0; c < raidPtr->numCol; c++) {
   3958 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   3959 			int e = rf_sync_component_cache(raidPtr, c, force);
   3960 			if (e && !error)
   3961 				error = e;
   3962 		}
   3963 	}
   3964 
   3965 	for (c = 0; c < raidPtr->numSpare ; c++) {
   3966 		int sparecol = raidPtr->numCol + c;
   3967 
   3968 		/* Need to ensure that the reconstruct actually completed! */
   3969 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3970 			int e = rf_sync_component_cache(raidPtr, sparecol,
   3971 			    force);
   3972 			if (e && !error)
   3973 				error = e;
   3974 		}
   3975 	}
   3976 	return error;
   3977 }
   3978 
   3979 /* Fill in info with the current status */
   3980 void
   3981 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
   3982 {
   3983 
   3984 	memset(info, 0, sizeof(*info));
   3985 
   3986 	if (raidPtr->status != rf_rs_reconstructing) {
   3987 		info->total = 100;
   3988 		info->completed = 100;
   3989 	} else {
   3990 		info->total = raidPtr->reconControl->numRUsTotal;
   3991 		info->completed = raidPtr->reconControl->numRUsComplete;
   3992 	}
   3993 	info->remaining = info->total - info->completed;
   3994 }
   3995 
   3996 /* Fill in info with the current status */
   3997 void
   3998 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
   3999 {
   4000 
   4001 	memset(info, 0, sizeof(*info));
   4002 
   4003 	if (raidPtr->parity_rewrite_in_progress == 1) {
   4004 		info->total = raidPtr->Layout.numStripe;
   4005 		info->completed = raidPtr->parity_rewrite_stripes_done;
   4006 	} else {
   4007 		info->completed = 100;
   4008 		info->total = 100;
   4009 	}
   4010 	info->remaining = info->total - info->completed;
   4011 }
   4012 
   4013 /* Fill in info with the current status */
   4014 void
   4015 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
   4016 {
   4017 
   4018 	memset(info, 0, sizeof(*info));
   4019 
   4020 	if (raidPtr->copyback_in_progress == 1) {
   4021 		info->total = raidPtr->Layout.numStripe;
   4022 		info->completed = raidPtr->copyback_stripes_done;
   4023 		info->remaining = info->total - info->completed;
   4024 	} else {
   4025 		info->remaining = 0;
   4026 		info->completed = 100;
   4027 		info->total = 100;
   4028 	}
   4029 }
   4030 
   4031 /* Fill in config with the current info */
   4032 int
   4033 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
   4034 {
   4035 	int	d, i, j;
   4036 
   4037 	if (!raidPtr->valid)
   4038 		return ENODEV;
   4039 	config->cols = raidPtr->numCol;
   4040 	config->ndevs = raidPtr->numCol;
   4041 	if (config->ndevs >= RF_MAX_DISKS)
   4042 		return ENOMEM;
   4043 	config->nspares = raidPtr->numSpare;
   4044 	if (config->nspares >= RF_MAX_DISKS)
   4045 		return ENOMEM;
   4046 	config->maxqdepth = raidPtr->maxQueueDepth;
   4047 	d = 0;
   4048 	for (j = 0; j < config->cols; j++) {
   4049 		config->devs[d] = raidPtr->Disks[j];
   4050 		d++;
   4051 	}
   4052 	for (i = 0; i < config->nspares; i++) {
   4053 		config->spares[i] = raidPtr->Disks[raidPtr->numCol + i];
   4054                 if (config->spares[i].status == rf_ds_rebuilding_spare) {
   4055                         /* raidctl(8) expects to see this as a used spare */
   4056                         config->spares[i].status = rf_ds_used_spare;
   4057                 }
   4058 	}
   4059 	return 0;
   4060 }
   4061 
   4062 int
   4063 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
   4064 {
   4065 	RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
   4066 	RF_ComponentLabel_t *raid_clabel;
   4067 	int column = clabel->column;
   4068 
   4069 	if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
   4070 		return EINVAL;
   4071 	raid_clabel = raidget_component_label(raidPtr, column);
   4072 	memcpy(clabel, raid_clabel, sizeof *clabel);
   4073 	/* Fix-up for userland. */
   4074 	if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION))
   4075 		clabel->version = RF_COMPONENT_LABEL_VERSION;
   4076 
   4077 	return 0;
   4078 }
   4079 
   4080 /*
   4081  * Module interface
   4082  */
   4083 
   4084 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
   4085 
   4086 #ifdef _MODULE
   4087 CFDRIVER_DECL(raid, DV_DISK, NULL);
   4088 #endif
   4089 
   4090 static int raid_modcmd(modcmd_t, void *);
   4091 static int raid_modcmd_init(void);
   4092 static int raid_modcmd_fini(void);
   4093 
   4094 static int
   4095 raid_modcmd(modcmd_t cmd, void *data)
   4096 {
   4097 	int error;
   4098 
   4099 	error = 0;
   4100 	switch (cmd) {
   4101 	case MODULE_CMD_INIT:
   4102 		error = raid_modcmd_init();
   4103 		break;
   4104 	case MODULE_CMD_FINI:
   4105 		error = raid_modcmd_fini();
   4106 		break;
   4107 	default:
   4108 		error = ENOTTY;
   4109 		break;
   4110 	}
   4111 	return error;
   4112 }
   4113 
   4114 static int
   4115 raid_modcmd_init(void)
   4116 {
   4117 	int error;
   4118 	int bmajor, cmajor;
   4119 
   4120 	mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
   4121 	mutex_enter(&raid_lock);
   4122 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   4123 	rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
   4124 	rf_init_cond2(rf_sparet_wait_cv, "sparetw");
   4125 	rf_init_cond2(rf_sparet_resp_cv, "rfgst");
   4126 
   4127 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
   4128 #endif
   4129 
   4130 	bmajor = cmajor = -1;
   4131 	error = devsw_attach("raid", &raid_bdevsw, &bmajor,
   4132 	    &raid_cdevsw, &cmajor);
   4133 	if (error != 0 && error != EEXIST) {
   4134 		aprint_error("%s: devsw_attach failed %d\n", __func__, error);
   4135 		mutex_exit(&raid_lock);
   4136 		return error;
   4137 	}
   4138 #ifdef _MODULE
   4139 	error = config_cfdriver_attach(&raid_cd);
   4140 	if (error != 0) {
   4141 		aprint_error("%s: config_cfdriver_attach failed %d\n",
   4142 		    __func__, error);
   4143 		devsw_detach(&raid_bdevsw, &raid_cdevsw);
   4144 		mutex_exit(&raid_lock);
   4145 		return error;
   4146 	}
   4147 #endif
   4148 	error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
   4149 	if (error != 0) {
   4150 		aprint_error("%s: config_cfattach_attach failed %d\n",
   4151 		    __func__, error);
   4152 #ifdef _MODULE
   4153 		config_cfdriver_detach(&raid_cd);
   4154 #endif
   4155 		devsw_detach(&raid_bdevsw, &raid_cdevsw);
   4156 		mutex_exit(&raid_lock);
   4157 		return error;
   4158 	}
   4159 
   4160 	raidautoconfigdone = false;
   4161 
   4162 	mutex_exit(&raid_lock);
   4163 
   4164 	if (error == 0) {
   4165 		if (rf_BootRaidframe(true) == 0)
   4166 			aprint_verbose("Kernelized RAIDframe activated\n");
   4167 		else
   4168 			panic("Serious error activating RAID!!");
   4169 	}
   4170 
   4171 	/*
   4172 	 * Register a finalizer which will be used to auto-config RAID
   4173 	 * sets once all real hardware devices have been found.
   4174 	 */
   4175 	error = config_finalize_register(NULL, rf_autoconfig);
   4176 	if (error != 0) {
   4177 		aprint_error("WARNING: unable to register RAIDframe "
   4178 		    "finalizer\n");
   4179 		error = 0;
   4180 	}
   4181 
   4182 	return error;
   4183 }
   4184 
   4185 static int
   4186 raid_modcmd_fini(void)
   4187 {
   4188 	int error;
   4189 
   4190 	mutex_enter(&raid_lock);
   4191 
   4192 	/* Don't allow unload if raid device(s) exist.  */
   4193 	if (!LIST_EMPTY(&raids)) {
   4194 		mutex_exit(&raid_lock);
   4195 		return EBUSY;
   4196 	}
   4197 
   4198 	error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
   4199 	if (error != 0) {
   4200 		aprint_error("%s: cannot detach cfattach\n",__func__);
   4201 		mutex_exit(&raid_lock);
   4202 		return error;
   4203 	}
   4204 #ifdef _MODULE
   4205 	error = config_cfdriver_detach(&raid_cd);
   4206 	if (error != 0) {
   4207 		aprint_error("%s: cannot detach cfdriver\n",__func__);
   4208 		config_cfattach_attach(raid_cd.cd_name, &raid_ca);
   4209 		mutex_exit(&raid_lock);
   4210 		return error;
   4211 	}
   4212 #endif
   4213 	devsw_detach(&raid_bdevsw, &raid_cdevsw);
   4214 	rf_BootRaidframe(false);
   4215 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   4216 	rf_destroy_mutex2(rf_sparet_wait_mutex);
   4217 	rf_destroy_cond2(rf_sparet_wait_cv);
   4218 	rf_destroy_cond2(rf_sparet_resp_cv);
   4219 #endif
   4220 	mutex_exit(&raid_lock);
   4221 	mutex_destroy(&raid_lock);
   4222 
   4223 	return error;
   4224 }
   4225