Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.346
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.346 2016/09/19 23:32:30 jdolecek Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Greg Oster; Jason R. Thorpe.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Copyright (c) 1988 University of Utah.
     34  * Copyright (c) 1990, 1993
     35  *      The Regents of the University of California.  All rights reserved.
     36  *
     37  * This code is derived from software contributed to Berkeley by
     38  * the Systems Programming Group of the University of Utah Computer
     39  * Science Department.
     40  *
     41  * Redistribution and use in source and binary forms, with or without
     42  * modification, are permitted provided that the following conditions
     43  * are met:
     44  * 1. Redistributions of source code must retain the above copyright
     45  *    notice, this list of conditions and the following disclaimer.
     46  * 2. Redistributions in binary form must reproduce the above copyright
     47  *    notice, this list of conditions and the following disclaimer in the
     48  *    documentation and/or other materials provided with the distribution.
     49  * 3. Neither the name of the University nor the names of its contributors
     50  *    may be used to endorse or promote products derived from this software
     51  *    without specific prior written permission.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     63  * SUCH DAMAGE.
     64  *
     65  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     66  *
     67  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     68  */
     69 
     70 /*
     71  * Copyright (c) 1995 Carnegie-Mellon University.
     72  * All rights reserved.
     73  *
     74  * Authors: Mark Holland, Jim Zelenka
     75  *
     76  * Permission to use, copy, modify and distribute this software and
     77  * its documentation is hereby granted, provided that both the copyright
     78  * notice and this permission notice appear in all copies of the
     79  * software, derivative works or modified versions, and any portions
     80  * thereof, and that both notices appear in supporting documentation.
     81  *
     82  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     83  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     84  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     85  *
     86  * Carnegie Mellon requests users of this software to return to
     87  *
     88  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     89  *  School of Computer Science
     90  *  Carnegie Mellon University
     91  *  Pittsburgh PA 15213-3890
     92  *
     93  * any improvements or extensions that they make and grant Carnegie the
     94  * rights to redistribute these changes.
     95  */
     96 
     97 /***********************************************************
     98  *
     99  * rf_kintf.c -- the kernel interface routines for RAIDframe
    100  *
    101  ***********************************************************/
    102 
    103 #include <sys/cdefs.h>
    104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.346 2016/09/19 23:32:30 jdolecek Exp $");
    105 
    106 #ifdef _KERNEL_OPT
    107 #include "opt_compat_netbsd.h"
    108 #include "opt_raid_autoconfig.h"
    109 #endif
    110 
    111 #include <sys/param.h>
    112 #include <sys/errno.h>
    113 #include <sys/pool.h>
    114 #include <sys/proc.h>
    115 #include <sys/queue.h>
    116 #include <sys/disk.h>
    117 #include <sys/device.h>
    118 #include <sys/stat.h>
    119 #include <sys/ioctl.h>
    120 #include <sys/fcntl.h>
    121 #include <sys/systm.h>
    122 #include <sys/vnode.h>
    123 #include <sys/disklabel.h>
    124 #include <sys/conf.h>
    125 #include <sys/buf.h>
    126 #include <sys/bufq.h>
    127 #include <sys/reboot.h>
    128 #include <sys/kauth.h>
    129 #include <sys/module.h>
    130 
    131 #include <prop/proplib.h>
    132 
    133 #include <dev/raidframe/raidframevar.h>
    134 #include <dev/raidframe/raidframeio.h>
    135 #include <dev/raidframe/rf_paritymap.h>
    136 
    137 #include "rf_raid.h"
    138 #include "rf_copyback.h"
    139 #include "rf_dag.h"
    140 #include "rf_dagflags.h"
    141 #include "rf_desc.h"
    142 #include "rf_diskqueue.h"
    143 #include "rf_etimer.h"
    144 #include "rf_general.h"
    145 #include "rf_kintf.h"
    146 #include "rf_options.h"
    147 #include "rf_driver.h"
    148 #include "rf_parityscan.h"
    149 #include "rf_threadstuff.h"
    150 
    151 #ifdef COMPAT_50
    152 #include "rf_compat50.h"
    153 #endif
    154 
    155 #include "ioconf.h"
    156 
    157 #ifdef DEBUG
    158 int     rf_kdebug_level = 0;
    159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    160 #else				/* DEBUG */
    161 #define db1_printf(a) { }
    162 #endif				/* DEBUG */
    163 
    164 #ifdef DEBUG_ROOT
    165 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
    166 #else
    167 #define DPRINTF(a, ...)
    168 #endif
    169 
    170 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
    171 static rf_declare_mutex2(rf_sparet_wait_mutex);
    172 static rf_declare_cond2(rf_sparet_wait_cv);
    173 static rf_declare_cond2(rf_sparet_resp_cv);
    174 
    175 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    176 						 * spare table */
    177 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    178 						 * installation process */
    179 #endif
    180 
    181 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
    182 
    183 /* prototypes */
    184 static void KernelWakeupFunc(struct buf *);
    185 static void InitBP(struct buf *, struct vnode *, unsigned,
    186     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
    187     void *, int, struct proc *);
    188 struct raid_softc;
    189 static void raidinit(struct raid_softc *);
    190 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
    191 
    192 static int raid_match(device_t, cfdata_t, void *);
    193 static void raid_attach(device_t, device_t, void *);
    194 static int raid_detach(device_t, int);
    195 
    196 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
    197     daddr_t, daddr_t);
    198 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
    199     daddr_t, daddr_t, int);
    200 
    201 static int raidwrite_component_label(unsigned,
    202     dev_t, struct vnode *, RF_ComponentLabel_t *);
    203 static int raidread_component_label(unsigned,
    204     dev_t, struct vnode *, RF_ComponentLabel_t *);
    205 
    206 static int raid_diskstart(device_t, struct buf *bp);
    207 static int raid_dumpblocks(device_t, void *, daddr_t, int);
    208 static int raid_lastclose(device_t);
    209 
    210 static dev_type_open(raidopen);
    211 static dev_type_close(raidclose);
    212 static dev_type_read(raidread);
    213 static dev_type_write(raidwrite);
    214 static dev_type_ioctl(raidioctl);
    215 static dev_type_strategy(raidstrategy);
    216 static dev_type_dump(raiddump);
    217 static dev_type_size(raidsize);
    218 
    219 const struct bdevsw raid_bdevsw = {
    220 	.d_open = raidopen,
    221 	.d_close = raidclose,
    222 	.d_strategy = raidstrategy,
    223 	.d_ioctl = raidioctl,
    224 	.d_dump = raiddump,
    225 	.d_psize = raidsize,
    226 	.d_discard = nodiscard,
    227 	.d_flag = D_DISK
    228 };
    229 
    230 const struct cdevsw raid_cdevsw = {
    231 	.d_open = raidopen,
    232 	.d_close = raidclose,
    233 	.d_read = raidread,
    234 	.d_write = raidwrite,
    235 	.d_ioctl = raidioctl,
    236 	.d_stop = nostop,
    237 	.d_tty = notty,
    238 	.d_poll = nopoll,
    239 	.d_mmap = nommap,
    240 	.d_kqfilter = nokqfilter,
    241 	.d_discard = nodiscard,
    242 	.d_flag = D_DISK
    243 };
    244 
    245 static struct dkdriver rf_dkdriver = {
    246 	.d_open = raidopen,
    247 	.d_close = raidclose,
    248 	.d_strategy = raidstrategy,
    249 	.d_diskstart = raid_diskstart,
    250 	.d_dumpblocks = raid_dumpblocks,
    251 	.d_lastclose = raid_lastclose,
    252 	.d_minphys = minphys
    253 };
    254 
    255 struct raid_softc {
    256 	struct dk_softc sc_dksc;
    257 	int	sc_unit;
    258 	int     sc_flags;	/* flags */
    259 	int     sc_cflags;	/* configuration flags */
    260 	kmutex_t sc_mutex;	/* interlock mutex */
    261 	kcondvar_t sc_cv;	/* and the condvar */
    262 	uint64_t sc_size;	/* size of the raid device */
    263 	char    sc_xname[20];	/* XXX external name */
    264 	RF_Raid_t sc_r;
    265 	LIST_ENTRY(raid_softc) sc_link;
    266 };
    267 /* sc_flags */
    268 #define RAIDF_INITED		0x01	/* unit has been initialized */
    269 #define RAIDF_SHUTDOWN		0x02	/* unit is being shutdown */
    270 #define RAIDF_DETACH  		0x04	/* detach after final close */
    271 #define RAIDF_WANTED		0x08	/* someone waiting to obtain a lock */
    272 #define RAIDF_LOCKED		0x10	/* unit is locked */
    273 #define RAIDF_UNIT_CHANGED	0x20	/* unit is being changed */
    274 
    275 #define	raidunit(x)	DISKUNIT(x)
    276 #define	raidsoftc(dev)	(((struct raid_softc *)device_private(dev))->sc_r.softc)
    277 
    278 extern struct cfdriver raid_cd;
    279 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
    280     raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
    281     DVF_DETACH_SHUTDOWN);
    282 
    283 /*
    284  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    285  * Be aware that large numbers can allow the driver to consume a lot of
    286  * kernel memory, especially on writes, and in degraded mode reads.
    287  *
    288  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    289  * a single 64K write will typically require 64K for the old data,
    290  * 64K for the old parity, and 64K for the new parity, for a total
    291  * of 192K (if the parity buffer is not re-used immediately).
    292  * Even it if is used immediately, that's still 128K, which when multiplied
    293  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    294  *
    295  * Now in degraded mode, for example, a 64K read on the above setup may
    296  * require data reconstruction, which will require *all* of the 4 remaining
    297  * disks to participate -- 4 * 32K/disk == 128K again.
    298  */
    299 
    300 #ifndef RAIDOUTSTANDING
    301 #define RAIDOUTSTANDING   6
    302 #endif
    303 
    304 #define RAIDLABELDEV(dev)	\
    305 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    306 
    307 /* declared here, and made public, for the benefit of KVM stuff.. */
    308 
    309 static int raidlock(struct raid_softc *);
    310 static void raidunlock(struct raid_softc *);
    311 
    312 static int raid_detach_unlocked(struct raid_softc *);
    313 
    314 static void rf_markalldirty(RF_Raid_t *);
    315 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
    316 
    317 void rf_ReconThread(struct rf_recon_req *);
    318 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    319 void rf_CopybackThread(RF_Raid_t *raidPtr);
    320 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    321 int rf_autoconfig(device_t);
    322 void rf_buildroothack(RF_ConfigSet_t *);
    323 
    324 RF_AutoConfig_t *rf_find_raid_components(void);
    325 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    326 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    327 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
    328 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    329 int rf_set_autoconfig(RF_Raid_t *, int);
    330 int rf_set_rootpartition(RF_Raid_t *, int);
    331 void rf_release_all_vps(RF_ConfigSet_t *);
    332 void rf_cleanup_config_set(RF_ConfigSet_t *);
    333 int rf_have_enough_components(RF_ConfigSet_t *);
    334 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
    335 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
    336 
    337 /*
    338  * Debugging, mostly.  Set to 0 to not allow autoconfig to take place.
    339  * Note that this is overridden by having RAID_AUTOCONFIG as an option
    340  * in the kernel config file.
    341  */
    342 #ifdef RAID_AUTOCONFIG
    343 int raidautoconfig = 1;
    344 #else
    345 int raidautoconfig = 0;
    346 #endif
    347 static bool raidautoconfigdone = false;
    348 
    349 struct RF_Pools_s rf_pools;
    350 
    351 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
    352 static kmutex_t raid_lock;
    353 
    354 static struct raid_softc *
    355 raidcreate(int unit) {
    356 	struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
    357 	if (sc == NULL) {
    358 #ifdef DIAGNOSTIC
    359 		printf("%s: out of memory\n", __func__);
    360 #endif
    361 		return NULL;
    362 	}
    363 	sc->sc_unit = unit;
    364 	cv_init(&sc->sc_cv, "raidunit");
    365 	mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
    366 	return sc;
    367 }
    368 
    369 static void
    370 raiddestroy(struct raid_softc *sc) {
    371 	cv_destroy(&sc->sc_cv);
    372 	mutex_destroy(&sc->sc_mutex);
    373 	kmem_free(sc, sizeof(*sc));
    374 }
    375 
    376 static struct raid_softc *
    377 raidget(int unit, bool create) {
    378 	struct raid_softc *sc;
    379 	if (unit < 0) {
    380 #ifdef DIAGNOSTIC
    381 		panic("%s: unit %d!", __func__, unit);
    382 #endif
    383 		return NULL;
    384 	}
    385 	mutex_enter(&raid_lock);
    386 	LIST_FOREACH(sc, &raids, sc_link) {
    387 		if (sc->sc_unit == unit) {
    388 			mutex_exit(&raid_lock);
    389 			return sc;
    390 		}
    391 	}
    392 	mutex_exit(&raid_lock);
    393 	if (!create)
    394 		return NULL;
    395 	if ((sc = raidcreate(unit)) == NULL)
    396 		return NULL;
    397 	mutex_enter(&raid_lock);
    398 	LIST_INSERT_HEAD(&raids, sc, sc_link);
    399 	mutex_exit(&raid_lock);
    400 	return sc;
    401 }
    402 
    403 static void
    404 raidput(struct raid_softc *sc) {
    405 	mutex_enter(&raid_lock);
    406 	LIST_REMOVE(sc, sc_link);
    407 	mutex_exit(&raid_lock);
    408 	raiddestroy(sc);
    409 }
    410 
    411 void
    412 raidattach(int num)
    413 {
    414 
    415 	/*
    416 	 * Device attachment and associated initialization now occurs
    417 	 * as part of the module initialization.
    418 	 */
    419 }
    420 
    421 int
    422 rf_autoconfig(device_t self)
    423 {
    424 	RF_AutoConfig_t *ac_list;
    425 	RF_ConfigSet_t *config_sets;
    426 
    427 	if (!raidautoconfig || raidautoconfigdone == true)
    428 		return (0);
    429 
    430 	/* XXX This code can only be run once. */
    431 	raidautoconfigdone = true;
    432 
    433 #ifdef __HAVE_CPU_BOOTCONF
    434 	/*
    435 	 * 0. find the boot device if needed first so we can use it later
    436 	 * this needs to be done before we autoconfigure any raid sets,
    437 	 * because if we use wedges we are not going to be able to open
    438 	 * the boot device later
    439 	 */
    440 	if (booted_device == NULL)
    441 		cpu_bootconf();
    442 #endif
    443 	/* 1. locate all RAID components on the system */
    444 	aprint_debug("Searching for RAID components...\n");
    445 	ac_list = rf_find_raid_components();
    446 
    447 	/* 2. Sort them into their respective sets. */
    448 	config_sets = rf_create_auto_sets(ac_list);
    449 
    450 	/*
    451 	 * 3. Evaluate each set and configure the valid ones.
    452 	 * This gets done in rf_buildroothack().
    453 	 */
    454 	rf_buildroothack(config_sets);
    455 
    456 	return 1;
    457 }
    458 
    459 static int
    460 rf_containsboot(RF_Raid_t *r, device_t bdv) {
    461 	const char *bootname = device_xname(bdv);
    462 	size_t len = strlen(bootname);
    463 
    464 	for (int col = 0; col < r->numCol; col++) {
    465 		const char *devname = r->Disks[col].devname;
    466 		devname += sizeof("/dev/") - 1;
    467 		if (strncmp(devname, "dk", 2) == 0) {
    468 			const char *parent =
    469 			    dkwedge_get_parent_name(r->Disks[col].dev);
    470 			if (parent != NULL)
    471 				devname = parent;
    472 		}
    473 		if (strncmp(devname, bootname, len) == 0) {
    474 			struct raid_softc *sc = r->softc;
    475 			aprint_debug("raid%d includes boot device %s\n",
    476 			    sc->sc_unit, devname);
    477 			return 1;
    478 		}
    479 	}
    480 	return 0;
    481 }
    482 
    483 void
    484 rf_buildroothack(RF_ConfigSet_t *config_sets)
    485 {
    486 	RF_ConfigSet_t *cset;
    487 	RF_ConfigSet_t *next_cset;
    488 	int num_root;
    489 	struct raid_softc *sc, *rsc;
    490 	struct dk_softc *dksc;
    491 
    492 	sc = rsc = NULL;
    493 	num_root = 0;
    494 	cset = config_sets;
    495 	while (cset != NULL) {
    496 		next_cset = cset->next;
    497 		if (rf_have_enough_components(cset) &&
    498 		    cset->ac->clabel->autoconfigure == 1) {
    499 			sc = rf_auto_config_set(cset);
    500 			if (sc != NULL) {
    501 				aprint_debug("raid%d: configured ok\n",
    502 				    sc->sc_unit);
    503 				if (cset->rootable) {
    504 					rsc = sc;
    505 					num_root++;
    506 				}
    507 			} else {
    508 				/* The autoconfig didn't work :( */
    509 				aprint_debug("Autoconfig failed\n");
    510 				rf_release_all_vps(cset);
    511 			}
    512 		} else {
    513 			/* we're not autoconfiguring this set...
    514 			   release the associated resources */
    515 			rf_release_all_vps(cset);
    516 		}
    517 		/* cleanup */
    518 		rf_cleanup_config_set(cset);
    519 		cset = next_cset;
    520 	}
    521 	dksc = &rsc->sc_dksc;
    522 
    523 	/* if the user has specified what the root device should be
    524 	   then we don't touch booted_device or boothowto... */
    525 
    526 	if (rootspec != NULL)
    527 		return;
    528 
    529 	/* we found something bootable... */
    530 
    531 	/*
    532 	 * XXX: The following code assumes that the root raid
    533 	 * is the first ('a') partition. This is about the best
    534 	 * we can do with a BSD disklabel, but we might be able
    535 	 * to do better with a GPT label, by setting a specified
    536 	 * attribute to indicate the root partition. We can then
    537 	 * stash the partition number in the r->root_partition
    538 	 * high bits (the bottom 2 bits are already used). For
    539 	 * now we just set booted_partition to 0 when we override
    540 	 * root.
    541 	 */
    542 	if (num_root == 1) {
    543 		device_t candidate_root;
    544 		if (dksc->sc_dkdev.dk_nwedges != 0) {
    545 			char cname[sizeof(cset->ac->devname)];
    546 			/* XXX: assume partition 'a' first */
    547 			snprintf(cname, sizeof(cname), "%s%c",
    548 			    device_xname(dksc->sc_dev), 'a');
    549 			candidate_root = dkwedge_find_by_wname(cname);
    550 			DPRINTF("%s: candidate wedge root=%s\n", __func__,
    551 			    cname);
    552 			if (candidate_root == NULL) {
    553 				/*
    554 				 * If that is not found, because we don't use
    555 				 * disklabel, return the first dk child
    556 				 * XXX: we can skip the 'a' check above
    557 				 * and always do this...
    558 				 */
    559 				size_t i = 0;
    560 				candidate_root = dkwedge_find_by_parent(
    561 				    device_xname(dksc->sc_dev), &i);
    562 			}
    563 			DPRINTF("%s: candidate wedge root=%p\n", __func__,
    564 			    candidate_root);
    565 		} else
    566 			candidate_root = dksc->sc_dev;
    567 		DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
    568 		DPRINTF("%s: booted_device=%p root_partition=%d "
    569 		   "contains_boot=%d\n", __func__, booted_device,
    570 		   rsc->sc_r.root_partition,
    571 		   rf_containsboot(&rsc->sc_r, booted_device));
    572 		if (booted_device == NULL ||
    573 		    rsc->sc_r.root_partition == 1 ||
    574 		    rf_containsboot(&rsc->sc_r, booted_device)) {
    575 			booted_device = candidate_root;
    576 			booted_partition = 0;	/* XXX assume 'a' */
    577 		}
    578 	} else if (num_root > 1) {
    579 		DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
    580 		    booted_device);
    581 
    582 		/*
    583 		 * Maybe the MD code can help. If it cannot, then
    584 		 * setroot() will discover that we have no
    585 		 * booted_device and will ask the user if nothing was
    586 		 * hardwired in the kernel config file
    587 		 */
    588 		if (booted_device == NULL)
    589 			return;
    590 
    591 		num_root = 0;
    592 		mutex_enter(&raid_lock);
    593 		LIST_FOREACH(sc, &raids, sc_link) {
    594 			RF_Raid_t *r = &sc->sc_r;
    595 			if (r->valid == 0)
    596 				continue;
    597 
    598 			if (r->root_partition == 0)
    599 				continue;
    600 
    601 			if (rf_containsboot(r, booted_device)) {
    602 				num_root++;
    603 				rsc = sc;
    604 				dksc = &rsc->sc_dksc;
    605 			}
    606 		}
    607 		mutex_exit(&raid_lock);
    608 
    609 		if (num_root == 1) {
    610 			booted_device = dksc->sc_dev;
    611 			booted_partition = 0;	/* XXX assume 'a' */
    612 		} else {
    613 			/* we can't guess.. require the user to answer... */
    614 			boothowto |= RB_ASKNAME;
    615 		}
    616 	}
    617 }
    618 
    619 static int
    620 raidsize(dev_t dev)
    621 {
    622 	struct raid_softc *rs;
    623 	struct dk_softc *dksc;
    624 	unsigned int unit;
    625 
    626 	unit = raidunit(dev);
    627 	if ((rs = raidget(unit, false)) == NULL)
    628 		return -1;
    629 	dksc = &rs->sc_dksc;
    630 
    631 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    632 		return -1;
    633 
    634 	return dk_size(dksc, dev);
    635 }
    636 
    637 static int
    638 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
    639 {
    640 	unsigned int unit;
    641 	struct raid_softc *rs;
    642 	struct dk_softc *dksc;
    643 
    644 	unit = raidunit(dev);
    645 	if ((rs = raidget(unit, false)) == NULL)
    646 		return ENXIO;
    647 	dksc = &rs->sc_dksc;
    648 
    649 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    650 		return ENODEV;
    651 
    652         /*
    653            Note that blkno is relative to this particular partition.
    654            By adding adding RF_PROTECTED_SECTORS, we get a value that
    655 	   is relative to the partition used for the underlying component.
    656         */
    657 	blkno += RF_PROTECTED_SECTORS;
    658 
    659 	return dk_dump(dksc, dev, blkno, va, size);
    660 }
    661 
    662 static int
    663 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
    664 {
    665 	struct raid_softc *rs = raidsoftc(dev);
    666 	const struct bdevsw *bdev;
    667 	RF_Raid_t *raidPtr;
    668 	int     c, sparecol, j, scol, dumpto;
    669 	int     error = 0;
    670 
    671 	raidPtr = &rs->sc_r;
    672 
    673 	/* we only support dumping to RAID 1 sets */
    674 	if (raidPtr->Layout.numDataCol != 1 ||
    675 	    raidPtr->Layout.numParityCol != 1)
    676 		return EINVAL;
    677 
    678 	if ((error = raidlock(rs)) != 0)
    679 		return error;
    680 
    681 	/* figure out what device is alive.. */
    682 
    683 	/*
    684 	   Look for a component to dump to.  The preference for the
    685 	   component to dump to is as follows:
    686 	   1) the master
    687 	   2) a used_spare of the master
    688 	   3) the slave
    689 	   4) a used_spare of the slave
    690 	*/
    691 
    692 	dumpto = -1;
    693 	for (c = 0; c < raidPtr->numCol; c++) {
    694 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
    695 			/* this might be the one */
    696 			dumpto = c;
    697 			break;
    698 		}
    699 	}
    700 
    701 	/*
    702 	   At this point we have possibly selected a live master or a
    703 	   live slave.  We now check to see if there is a spared
    704 	   master (or a spared slave), if we didn't find a live master
    705 	   or a live slave.
    706 	*/
    707 
    708 	for (c = 0; c < raidPtr->numSpare; c++) {
    709 		sparecol = raidPtr->numCol + c;
    710 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
    711 			/* How about this one? */
    712 			scol = -1;
    713 			for(j=0;j<raidPtr->numCol;j++) {
    714 				if (raidPtr->Disks[j].spareCol == sparecol) {
    715 					scol = j;
    716 					break;
    717 				}
    718 			}
    719 			if (scol == 0) {
    720 				/*
    721 				   We must have found a spared master!
    722 				   We'll take that over anything else
    723 				   found so far.  (We couldn't have
    724 				   found a real master before, since
    725 				   this is a used spare, and it's
    726 				   saying that it's replacing the
    727 				   master.)  On reboot (with
    728 				   autoconfiguration turned on)
    729 				   sparecol will become the 1st
    730 				   component (component0) of this set.
    731 				*/
    732 				dumpto = sparecol;
    733 				break;
    734 			} else if (scol != -1) {
    735 				/*
    736 				   Must be a spared slave.  We'll dump
    737 				   to that if we havn't found anything
    738 				   else so far.
    739 				*/
    740 				if (dumpto == -1)
    741 					dumpto = sparecol;
    742 			}
    743 		}
    744 	}
    745 
    746 	if (dumpto == -1) {
    747 		/* we couldn't find any live components to dump to!?!?
    748 		 */
    749 		error = EINVAL;
    750 		goto out;
    751 	}
    752 
    753 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
    754 	if (bdev == NULL) {
    755 		error = ENXIO;
    756 		goto out;
    757 	}
    758 
    759 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
    760 				blkno, va, nblk * raidPtr->bytesPerSector);
    761 
    762 out:
    763 	raidunlock(rs);
    764 
    765 	return error;
    766 }
    767 
    768 /* ARGSUSED */
    769 static int
    770 raidopen(dev_t dev, int flags, int fmt,
    771     struct lwp *l)
    772 {
    773 	int     unit = raidunit(dev);
    774 	struct raid_softc *rs;
    775 	struct dk_softc *dksc;
    776 	int     error = 0;
    777 	int     part, pmask;
    778 
    779 	if ((rs = raidget(unit, true)) == NULL)
    780 		return ENXIO;
    781 	if ((error = raidlock(rs)) != 0)
    782 		return (error);
    783 
    784 	if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
    785 		error = EBUSY;
    786 		goto bad;
    787 	}
    788 
    789 	dksc = &rs->sc_dksc;
    790 
    791 	part = DISKPART(dev);
    792 	pmask = (1 << part);
    793 
    794 	if (!DK_BUSY(dksc, pmask) &&
    795 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    796 		/* First one... mark things as dirty... Note that we *MUST*
    797 		 have done a configure before this.  I DO NOT WANT TO BE
    798 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    799 		 THAT THEY BELONG TOGETHER!!!!! */
    800 		/* XXX should check to see if we're only open for reading
    801 		   here... If so, we needn't do this, but then need some
    802 		   other way of keeping track of what's happened.. */
    803 
    804 		rf_markalldirty(&rs->sc_r);
    805 	}
    806 
    807 	if ((rs->sc_flags & RAIDF_INITED) != 0)
    808 		error = dk_open(dksc, dev, flags, fmt, l);
    809 
    810 bad:
    811 	raidunlock(rs);
    812 
    813 	return (error);
    814 
    815 
    816 }
    817 
    818 static int
    819 raid_lastclose(device_t self)
    820 {
    821 	struct raid_softc *rs = raidsoftc(self);
    822 
    823 	/* Last one... device is not unconfigured yet.
    824 	   Device shutdown has taken care of setting the
    825 	   clean bits if RAIDF_INITED is not set
    826 	   mark things as clean... */
    827 
    828 	rf_update_component_labels(&rs->sc_r,
    829 	    RF_FINAL_COMPONENT_UPDATE);
    830 
    831 	/* pass to unlocked code */
    832 	if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
    833 		rs->sc_flags |= RAIDF_DETACH;
    834 
    835 	return 0;
    836 }
    837 
    838 /* ARGSUSED */
    839 static int
    840 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
    841 {
    842 	int     unit = raidunit(dev);
    843 	struct raid_softc *rs;
    844 	struct dk_softc *dksc;
    845 	cfdata_t cf;
    846 	int     error = 0, do_detach = 0, do_put = 0;
    847 
    848 	if ((rs = raidget(unit, false)) == NULL)
    849 		return ENXIO;
    850 	dksc = &rs->sc_dksc;
    851 
    852 	if ((error = raidlock(rs)) != 0)
    853 		return (error);
    854 
    855 	if ((rs->sc_flags & RAIDF_INITED) != 0) {
    856 		error = dk_close(dksc, dev, flags, fmt, l);
    857 		if ((rs->sc_flags & RAIDF_DETACH) != 0)
    858 			do_detach = 1;
    859 	} else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
    860 		do_put = 1;
    861 
    862 	raidunlock(rs);
    863 
    864 	if (do_detach) {
    865 		/* free the pseudo device attach bits */
    866 		cf = device_cfdata(dksc->sc_dev);
    867 		error = config_detach(dksc->sc_dev, 0);
    868 		if (error == 0)
    869 			free(cf, M_RAIDFRAME);
    870 	} else if (do_put) {
    871 		raidput(rs);
    872 	}
    873 
    874 	return (error);
    875 
    876 }
    877 
    878 static void
    879 raid_wakeup(RF_Raid_t *raidPtr)
    880 {
    881 	rf_lock_mutex2(raidPtr->iodone_lock);
    882 	rf_signal_cond2(raidPtr->iodone_cv);
    883 	rf_unlock_mutex2(raidPtr->iodone_lock);
    884 }
    885 
    886 static void
    887 raidstrategy(struct buf *bp)
    888 {
    889 	unsigned int unit;
    890 	struct raid_softc *rs;
    891 	struct dk_softc *dksc;
    892 	RF_Raid_t *raidPtr;
    893 
    894 	unit = raidunit(bp->b_dev);
    895 	if ((rs = raidget(unit, false)) == NULL) {
    896 		bp->b_error = ENXIO;
    897 		goto fail;
    898 	}
    899 	if ((rs->sc_flags & RAIDF_INITED) == 0) {
    900 		bp->b_error = ENXIO;
    901 		goto fail;
    902 	}
    903 	dksc = &rs->sc_dksc;
    904 	raidPtr = &rs->sc_r;
    905 
    906 	/* Queue IO only */
    907 	if (dk_strategy_defer(dksc, bp))
    908 		goto done;
    909 
    910 	/* schedule the IO to happen at the next convenient time */
    911 	raid_wakeup(raidPtr);
    912 
    913 done:
    914 	return;
    915 
    916 fail:
    917 	bp->b_resid = bp->b_bcount;
    918 	biodone(bp);
    919 }
    920 
    921 static int
    922 raid_diskstart(device_t dev, struct buf *bp)
    923 {
    924 	struct raid_softc *rs = raidsoftc(dev);
    925 	RF_Raid_t *raidPtr;
    926 
    927 	raidPtr = &rs->sc_r;
    928 	if (!raidPtr->valid) {
    929 		db1_printf(("raid is not valid..\n"));
    930 		return ENODEV;
    931 	}
    932 
    933 	/* XXX */
    934 	bp->b_resid = 0;
    935 
    936 	return raiddoaccess(raidPtr, bp);
    937 }
    938 
    939 void
    940 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
    941 {
    942 	struct raid_softc *rs;
    943 	struct dk_softc *dksc;
    944 
    945 	rs = raidPtr->softc;
    946 	dksc = &rs->sc_dksc;
    947 
    948 	dk_done(dksc, bp);
    949 
    950 	rf_lock_mutex2(raidPtr->mutex);
    951 	raidPtr->openings++;
    952 	rf_unlock_mutex2(raidPtr->mutex);
    953 
    954 	/* schedule more IO */
    955 	raid_wakeup(raidPtr);
    956 }
    957 
    958 /* ARGSUSED */
    959 static int
    960 raidread(dev_t dev, struct uio *uio, int flags)
    961 {
    962 	int     unit = raidunit(dev);
    963 	struct raid_softc *rs;
    964 
    965 	if ((rs = raidget(unit, false)) == NULL)
    966 		return ENXIO;
    967 
    968 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    969 		return (ENXIO);
    970 
    971 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    972 
    973 }
    974 
    975 /* ARGSUSED */
    976 static int
    977 raidwrite(dev_t dev, struct uio *uio, int flags)
    978 {
    979 	int     unit = raidunit(dev);
    980 	struct raid_softc *rs;
    981 
    982 	if ((rs = raidget(unit, false)) == NULL)
    983 		return ENXIO;
    984 
    985 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    986 		return (ENXIO);
    987 
    988 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    989 
    990 }
    991 
    992 static int
    993 raid_detach_unlocked(struct raid_softc *rs)
    994 {
    995 	struct dk_softc *dksc = &rs->sc_dksc;
    996 	RF_Raid_t *raidPtr;
    997 	int error;
    998 
    999 	raidPtr = &rs->sc_r;
   1000 
   1001 	if (DK_BUSY(dksc, 0) ||
   1002 	    raidPtr->recon_in_progress != 0 ||
   1003 	    raidPtr->parity_rewrite_in_progress != 0 ||
   1004 	    raidPtr->copyback_in_progress != 0)
   1005 		return EBUSY;
   1006 
   1007 	if ((rs->sc_flags & RAIDF_INITED) == 0)
   1008 		return 0;
   1009 
   1010 	rs->sc_flags &= ~RAIDF_SHUTDOWN;
   1011 
   1012 	if ((error = rf_Shutdown(raidPtr)) != 0)
   1013 		return error;
   1014 
   1015 	rs->sc_flags &= ~RAIDF_INITED;
   1016 
   1017 	/* Kill off any queued buffers */
   1018 	dk_drain(dksc);
   1019 	bufq_free(dksc->sc_bufq);
   1020 
   1021 	/* Detach the disk. */
   1022 	dkwedge_delall(&dksc->sc_dkdev);
   1023 	disk_detach(&dksc->sc_dkdev);
   1024 	disk_destroy(&dksc->sc_dkdev);
   1025 	dk_detach(dksc);
   1026 
   1027 	return 0;
   1028 }
   1029 
   1030 static int
   1031 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
   1032 {
   1033 	int     unit = raidunit(dev);
   1034 	int     error = 0;
   1035 	int     part, pmask;
   1036 	struct raid_softc *rs;
   1037 	struct dk_softc *dksc;
   1038 	RF_Config_t *k_cfg, *u_cfg;
   1039 	RF_Raid_t *raidPtr;
   1040 	RF_RaidDisk_t *diskPtr;
   1041 	RF_AccTotals_t *totals;
   1042 	RF_DeviceConfig_t *d_cfg, **ucfgp;
   1043 	u_char *specific_buf;
   1044 	int retcode = 0;
   1045 	int column;
   1046 /*	int raidid; */
   1047 	struct rf_recon_req *rrcopy, *rr;
   1048 	RF_ComponentLabel_t *clabel;
   1049 	RF_ComponentLabel_t *ci_label;
   1050 	RF_ComponentLabel_t **clabel_ptr;
   1051 	RF_SingleComponent_t *sparePtr,*componentPtr;
   1052 	RF_SingleComponent_t component;
   1053 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
   1054 	int i, j, d;
   1055 
   1056 	if ((rs = raidget(unit, false)) == NULL)
   1057 		return ENXIO;
   1058 	dksc = &rs->sc_dksc;
   1059 	raidPtr = &rs->sc_r;
   1060 
   1061 	db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
   1062 		(int) DISKPART(dev), (int) unit, cmd));
   1063 
   1064 	/* Must be initialized for these... */
   1065 	switch (cmd) {
   1066 	case RAIDFRAME_REWRITEPARITY:
   1067 	case RAIDFRAME_GET_INFO:
   1068 	case RAIDFRAME_RESET_ACCTOTALS:
   1069 	case RAIDFRAME_GET_ACCTOTALS:
   1070 	case RAIDFRAME_KEEP_ACCTOTALS:
   1071 	case RAIDFRAME_GET_SIZE:
   1072 	case RAIDFRAME_FAIL_DISK:
   1073 	case RAIDFRAME_COPYBACK:
   1074 	case RAIDFRAME_CHECK_RECON_STATUS:
   1075 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1076 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1077 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1078 	case RAIDFRAME_ADD_HOT_SPARE:
   1079 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1080 	case RAIDFRAME_INIT_LABELS:
   1081 	case RAIDFRAME_REBUILD_IN_PLACE:
   1082 	case RAIDFRAME_CHECK_PARITY:
   1083 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1084 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1085 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1086 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1087 	case RAIDFRAME_SET_AUTOCONFIG:
   1088 	case RAIDFRAME_SET_ROOT:
   1089 	case RAIDFRAME_DELETE_COMPONENT:
   1090 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1091 	case RAIDFRAME_PARITYMAP_STATUS:
   1092 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
   1093 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
   1094 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
   1095 		if ((rs->sc_flags & RAIDF_INITED) == 0)
   1096 			return (ENXIO);
   1097 	}
   1098 
   1099 	switch (cmd) {
   1100 #ifdef COMPAT_50
   1101 	case RAIDFRAME_GET_INFO50:
   1102 		return rf_get_info50(raidPtr, data);
   1103 
   1104 	case RAIDFRAME_CONFIGURE50:
   1105 		if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
   1106 			return retcode;
   1107 		goto config;
   1108 #endif
   1109 		/* configure the system */
   1110 	case RAIDFRAME_CONFIGURE:
   1111 
   1112 		if (raidPtr->valid) {
   1113 			/* There is a valid RAID set running on this unit! */
   1114 			printf("raid%d: Device already configured!\n",unit);
   1115 			return(EINVAL);
   1116 		}
   1117 
   1118 		/* copy-in the configuration information */
   1119 		/* data points to a pointer to the configuration structure */
   1120 
   1121 		u_cfg = *((RF_Config_t **) data);
   1122 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
   1123 		if (k_cfg == NULL) {
   1124 			return (ENOMEM);
   1125 		}
   1126 		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
   1127 		if (retcode) {
   1128 			RF_Free(k_cfg, sizeof(RF_Config_t));
   1129 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
   1130 				retcode));
   1131 			goto no_config;
   1132 		}
   1133 		goto config;
   1134 	config:
   1135 		rs->sc_flags &= ~RAIDF_SHUTDOWN;
   1136 
   1137 		/* allocate a buffer for the layout-specific data, and copy it
   1138 		 * in */
   1139 		if (k_cfg->layoutSpecificSize) {
   1140 			if (k_cfg->layoutSpecificSize > 10000) {
   1141 				/* sanity check */
   1142 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1143 				retcode = EINVAL;
   1144 				goto no_config;
   1145 			}
   1146 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
   1147 			    (u_char *));
   1148 			if (specific_buf == NULL) {
   1149 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1150 				retcode = ENOMEM;
   1151 				goto no_config;
   1152 			}
   1153 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
   1154 			    k_cfg->layoutSpecificSize);
   1155 			if (retcode) {
   1156 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1157 				RF_Free(specific_buf,
   1158 					k_cfg->layoutSpecificSize);
   1159 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
   1160 					retcode));
   1161 				goto no_config;
   1162 			}
   1163 		} else
   1164 			specific_buf = NULL;
   1165 		k_cfg->layoutSpecific = specific_buf;
   1166 
   1167 		/* should do some kind of sanity check on the configuration.
   1168 		 * Store the sum of all the bytes in the last byte? */
   1169 
   1170 		/* configure the system */
   1171 
   1172 		/*
   1173 		 * Clear the entire RAID descriptor, just to make sure
   1174 		 *  there is no stale data left in the case of a
   1175 		 *  reconfiguration
   1176 		 */
   1177 		memset(raidPtr, 0, sizeof(*raidPtr));
   1178 		raidPtr->softc = rs;
   1179 		raidPtr->raidid = unit;
   1180 
   1181 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
   1182 
   1183 		if (retcode == 0) {
   1184 
   1185 			/* allow this many simultaneous IO's to
   1186 			   this RAID device */
   1187 			raidPtr->openings = RAIDOUTSTANDING;
   1188 
   1189 			raidinit(rs);
   1190 			raid_wakeup(raidPtr);
   1191 			rf_markalldirty(raidPtr);
   1192 		}
   1193 		/* free the buffers.  No return code here. */
   1194 		if (k_cfg->layoutSpecificSize) {
   1195 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
   1196 		}
   1197 		RF_Free(k_cfg, sizeof(RF_Config_t));
   1198 
   1199 	no_config:
   1200 		/*
   1201 		 * If configuration failed, set sc_flags so that we
   1202 		 * will detach the device when we close it.
   1203 		 */
   1204 		if (retcode != 0)
   1205 			rs->sc_flags |= RAIDF_SHUTDOWN;
   1206 		return (retcode);
   1207 
   1208 		/* shutdown the system */
   1209 	case RAIDFRAME_SHUTDOWN:
   1210 
   1211 		part = DISKPART(dev);
   1212 		pmask = (1 << part);
   1213 
   1214 		if ((error = raidlock(rs)) != 0)
   1215 			return (error);
   1216 
   1217 		if (DK_BUSY(dksc, pmask) ||
   1218 		    raidPtr->recon_in_progress != 0 ||
   1219 		    raidPtr->parity_rewrite_in_progress != 0 ||
   1220 		    raidPtr->copyback_in_progress != 0)
   1221 			retcode = EBUSY;
   1222 		else {
   1223 			/* detach and free on close */
   1224 			rs->sc_flags |= RAIDF_SHUTDOWN;
   1225 			retcode = 0;
   1226 		}
   1227 
   1228 		raidunlock(rs);
   1229 
   1230 		return (retcode);
   1231 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1232 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1233 		/* need to read the component label for the disk indicated
   1234 		   by row,column in clabel */
   1235 
   1236 		/*
   1237 		 * Perhaps there should be an option to skip the in-core
   1238 		 * copy and hit the disk, as with disklabel(8).
   1239 		 */
   1240 		RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
   1241 
   1242 		retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
   1243 
   1244 		if (retcode) {
   1245 			RF_Free(clabel, sizeof(*clabel));
   1246 			return retcode;
   1247 		}
   1248 
   1249 		clabel->row = 0; /* Don't allow looking at anything else.*/
   1250 
   1251 		column = clabel->column;
   1252 
   1253 		if ((column < 0) || (column >= raidPtr->numCol +
   1254 		    raidPtr->numSpare)) {
   1255 			RF_Free(clabel, sizeof(*clabel));
   1256 			return EINVAL;
   1257 		}
   1258 
   1259 		RF_Free(clabel, sizeof(*clabel));
   1260 
   1261 		clabel = raidget_component_label(raidPtr, column);
   1262 
   1263 		return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
   1264 
   1265 #if 0
   1266 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1267 		clabel = (RF_ComponentLabel_t *) data;
   1268 
   1269 		/* XXX check the label for valid stuff... */
   1270 		/* Note that some things *should not* get modified --
   1271 		   the user should be re-initing the labels instead of
   1272 		   trying to patch things.
   1273 		   */
   1274 
   1275 		raidid = raidPtr->raidid;
   1276 #ifdef DEBUG
   1277 		printf("raid%d: Got component label:\n", raidid);
   1278 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1279 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1280 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1281 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1282 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1283 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1284 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1285 #endif
   1286 		clabel->row = 0;
   1287 		column = clabel->column;
   1288 
   1289 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1290 			return(EINVAL);
   1291 		}
   1292 
   1293 		/* XXX this isn't allowed to do anything for now :-) */
   1294 
   1295 		/* XXX and before it is, we need to fill in the rest
   1296 		   of the fields!?!?!?! */
   1297 		memcpy(raidget_component_label(raidPtr, column),
   1298 		    clabel, sizeof(*clabel));
   1299 		raidflush_component_label(raidPtr, column);
   1300 		return (0);
   1301 #endif
   1302 
   1303 	case RAIDFRAME_INIT_LABELS:
   1304 		clabel = (RF_ComponentLabel_t *) data;
   1305 		/*
   1306 		   we only want the serial number from
   1307 		   the above.  We get all the rest of the information
   1308 		   from the config that was used to create this RAID
   1309 		   set.
   1310 		   */
   1311 
   1312 		raidPtr->serial_number = clabel->serial_number;
   1313 
   1314 		for(column=0;column<raidPtr->numCol;column++) {
   1315 			diskPtr = &raidPtr->Disks[column];
   1316 			if (!RF_DEAD_DISK(diskPtr->status)) {
   1317 				ci_label = raidget_component_label(raidPtr,
   1318 				    column);
   1319 				/* Zeroing this is important. */
   1320 				memset(ci_label, 0, sizeof(*ci_label));
   1321 				raid_init_component_label(raidPtr, ci_label);
   1322 				ci_label->serial_number =
   1323 				    raidPtr->serial_number;
   1324 				ci_label->row = 0; /* we dont' pretend to support more */
   1325 				rf_component_label_set_partitionsize(ci_label,
   1326 				    diskPtr->partitionSize);
   1327 				ci_label->column = column;
   1328 				raidflush_component_label(raidPtr, column);
   1329 			}
   1330 			/* XXXjld what about the spares? */
   1331 		}
   1332 
   1333 		return (retcode);
   1334 	case RAIDFRAME_SET_AUTOCONFIG:
   1335 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1336 		printf("raid%d: New autoconfig value is: %d\n",
   1337 		       raidPtr->raidid, d);
   1338 		*(int *) data = d;
   1339 		return (retcode);
   1340 
   1341 	case RAIDFRAME_SET_ROOT:
   1342 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1343 		printf("raid%d: New rootpartition value is: %d\n",
   1344 		       raidPtr->raidid, d);
   1345 		*(int *) data = d;
   1346 		return (retcode);
   1347 
   1348 		/* initialize all parity */
   1349 	case RAIDFRAME_REWRITEPARITY:
   1350 
   1351 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1352 			/* Parity for RAID 0 is trivially correct */
   1353 			raidPtr->parity_good = RF_RAID_CLEAN;
   1354 			return(0);
   1355 		}
   1356 
   1357 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1358 			/* Re-write is already in progress! */
   1359 			return(EINVAL);
   1360 		}
   1361 
   1362 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1363 					   rf_RewriteParityThread,
   1364 					   raidPtr,"raid_parity");
   1365 		return (retcode);
   1366 
   1367 
   1368 	case RAIDFRAME_ADD_HOT_SPARE:
   1369 		sparePtr = (RF_SingleComponent_t *) data;
   1370 		memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
   1371 		retcode = rf_add_hot_spare(raidPtr, &component);
   1372 		return(retcode);
   1373 
   1374 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1375 		return(retcode);
   1376 
   1377 	case RAIDFRAME_DELETE_COMPONENT:
   1378 		componentPtr = (RF_SingleComponent_t *)data;
   1379 		memcpy( &component, componentPtr,
   1380 			sizeof(RF_SingleComponent_t));
   1381 		retcode = rf_delete_component(raidPtr, &component);
   1382 		return(retcode);
   1383 
   1384 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1385 		componentPtr = (RF_SingleComponent_t *)data;
   1386 		memcpy( &component, componentPtr,
   1387 			sizeof(RF_SingleComponent_t));
   1388 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1389 		return(retcode);
   1390 
   1391 	case RAIDFRAME_REBUILD_IN_PLACE:
   1392 
   1393 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1394 			/* Can't do this on a RAID 0!! */
   1395 			return(EINVAL);
   1396 		}
   1397 
   1398 		if (raidPtr->recon_in_progress == 1) {
   1399 			/* a reconstruct is already in progress! */
   1400 			return(EINVAL);
   1401 		}
   1402 
   1403 		componentPtr = (RF_SingleComponent_t *) data;
   1404 		memcpy( &component, componentPtr,
   1405 			sizeof(RF_SingleComponent_t));
   1406 		component.row = 0; /* we don't support any more */
   1407 		column = component.column;
   1408 
   1409 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1410 			return(EINVAL);
   1411 		}
   1412 
   1413 		rf_lock_mutex2(raidPtr->mutex);
   1414 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
   1415 		    (raidPtr->numFailures > 0)) {
   1416 			/* XXX 0 above shouldn't be constant!!! */
   1417 			/* some component other than this has failed.
   1418 			   Let's not make things worse than they already
   1419 			   are... */
   1420 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1421 			       raidPtr->raidid);
   1422 			printf("raid%d:     Col: %d   Too many failures.\n",
   1423 			       raidPtr->raidid, column);
   1424 			rf_unlock_mutex2(raidPtr->mutex);
   1425 			return (EINVAL);
   1426 		}
   1427 		if (raidPtr->Disks[column].status ==
   1428 		    rf_ds_reconstructing) {
   1429 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1430 			       raidPtr->raidid);
   1431 			printf("raid%d:    Col: %d   Reconstruction already occurring!\n", raidPtr->raidid, column);
   1432 
   1433 			rf_unlock_mutex2(raidPtr->mutex);
   1434 			return (EINVAL);
   1435 		}
   1436 		if (raidPtr->Disks[column].status == rf_ds_spared) {
   1437 			rf_unlock_mutex2(raidPtr->mutex);
   1438 			return (EINVAL);
   1439 		}
   1440 		rf_unlock_mutex2(raidPtr->mutex);
   1441 
   1442 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1443 		if (rrcopy == NULL)
   1444 			return(ENOMEM);
   1445 
   1446 		rrcopy->raidPtr = (void *) raidPtr;
   1447 		rrcopy->col = column;
   1448 
   1449 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1450 					   rf_ReconstructInPlaceThread,
   1451 					   rrcopy,"raid_reconip");
   1452 		return(retcode);
   1453 
   1454 	case RAIDFRAME_GET_INFO:
   1455 		if (!raidPtr->valid)
   1456 			return (ENODEV);
   1457 		ucfgp = (RF_DeviceConfig_t **) data;
   1458 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1459 			  (RF_DeviceConfig_t *));
   1460 		if (d_cfg == NULL)
   1461 			return (ENOMEM);
   1462 		d_cfg->rows = 1; /* there is only 1 row now */
   1463 		d_cfg->cols = raidPtr->numCol;
   1464 		d_cfg->ndevs = raidPtr->numCol;
   1465 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1466 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1467 			return (ENOMEM);
   1468 		}
   1469 		d_cfg->nspares = raidPtr->numSpare;
   1470 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1471 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1472 			return (ENOMEM);
   1473 		}
   1474 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1475 		d = 0;
   1476 		for (j = 0; j < d_cfg->cols; j++) {
   1477 			d_cfg->devs[d] = raidPtr->Disks[j];
   1478 			d++;
   1479 		}
   1480 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1481 			d_cfg->spares[i] = raidPtr->Disks[j];
   1482 			if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
   1483 				/* XXX: raidctl(8) expects to see this as a used spare */
   1484 				d_cfg->spares[i].status = rf_ds_used_spare;
   1485 			}
   1486 		}
   1487 		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
   1488 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1489 
   1490 		return (retcode);
   1491 
   1492 	case RAIDFRAME_CHECK_PARITY:
   1493 		*(int *) data = raidPtr->parity_good;
   1494 		return (0);
   1495 
   1496 	case RAIDFRAME_PARITYMAP_STATUS:
   1497 		if (rf_paritymap_ineligible(raidPtr))
   1498 			return EINVAL;
   1499 		rf_paritymap_status(raidPtr->parity_map,
   1500 		    (struct rf_pmstat *)data);
   1501 		return 0;
   1502 
   1503 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
   1504 		if (rf_paritymap_ineligible(raidPtr))
   1505 			return EINVAL;
   1506 		if (raidPtr->parity_map == NULL)
   1507 			return ENOENT; /* ??? */
   1508 		if (0 != rf_paritymap_set_params(raidPtr->parity_map,
   1509 			(struct rf_pmparams *)data, 1))
   1510 			return EINVAL;
   1511 		return 0;
   1512 
   1513 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
   1514 		if (rf_paritymap_ineligible(raidPtr))
   1515 			return EINVAL;
   1516 		*(int *) data = rf_paritymap_get_disable(raidPtr);
   1517 		return 0;
   1518 
   1519 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
   1520 		if (rf_paritymap_ineligible(raidPtr))
   1521 			return EINVAL;
   1522 		rf_paritymap_set_disable(raidPtr, *(int *)data);
   1523 		/* XXX should errors be passed up? */
   1524 		return 0;
   1525 
   1526 	case RAIDFRAME_RESET_ACCTOTALS:
   1527 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1528 		return (0);
   1529 
   1530 	case RAIDFRAME_GET_ACCTOTALS:
   1531 		totals = (RF_AccTotals_t *) data;
   1532 		*totals = raidPtr->acc_totals;
   1533 		return (0);
   1534 
   1535 	case RAIDFRAME_KEEP_ACCTOTALS:
   1536 		raidPtr->keep_acc_totals = *(int *)data;
   1537 		return (0);
   1538 
   1539 	case RAIDFRAME_GET_SIZE:
   1540 		*(int *) data = raidPtr->totalSectors;
   1541 		return (0);
   1542 
   1543 		/* fail a disk & optionally start reconstruction */
   1544 	case RAIDFRAME_FAIL_DISK:
   1545 
   1546 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1547 			/* Can't do this on a RAID 0!! */
   1548 			return(EINVAL);
   1549 		}
   1550 
   1551 		rr = (struct rf_recon_req *) data;
   1552 		rr->row = 0;
   1553 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
   1554 			return (EINVAL);
   1555 
   1556 
   1557 		rf_lock_mutex2(raidPtr->mutex);
   1558 		if (raidPtr->status == rf_rs_reconstructing) {
   1559 			/* you can't fail a disk while we're reconstructing! */
   1560 			/* XXX wrong for RAID6 */
   1561 			rf_unlock_mutex2(raidPtr->mutex);
   1562 			return (EINVAL);
   1563 		}
   1564 		if ((raidPtr->Disks[rr->col].status ==
   1565 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
   1566 			/* some other component has failed.  Let's not make
   1567 			   things worse. XXX wrong for RAID6 */
   1568 			rf_unlock_mutex2(raidPtr->mutex);
   1569 			return (EINVAL);
   1570 		}
   1571 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
   1572 			/* Can't fail a spared disk! */
   1573 			rf_unlock_mutex2(raidPtr->mutex);
   1574 			return (EINVAL);
   1575 		}
   1576 		rf_unlock_mutex2(raidPtr->mutex);
   1577 
   1578 		/* make a copy of the recon request so that we don't rely on
   1579 		 * the user's buffer */
   1580 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1581 		if (rrcopy == NULL)
   1582 			return(ENOMEM);
   1583 		memcpy(rrcopy, rr, sizeof(*rr));
   1584 		rrcopy->raidPtr = (void *) raidPtr;
   1585 
   1586 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1587 					   rf_ReconThread,
   1588 					   rrcopy,"raid_recon");
   1589 		return (0);
   1590 
   1591 		/* invoke a copyback operation after recon on whatever disk
   1592 		 * needs it, if any */
   1593 	case RAIDFRAME_COPYBACK:
   1594 
   1595 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1596 			/* This makes no sense on a RAID 0!! */
   1597 			return(EINVAL);
   1598 		}
   1599 
   1600 		if (raidPtr->copyback_in_progress == 1) {
   1601 			/* Copyback is already in progress! */
   1602 			return(EINVAL);
   1603 		}
   1604 
   1605 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1606 					   rf_CopybackThread,
   1607 					   raidPtr,"raid_copyback");
   1608 		return (retcode);
   1609 
   1610 		/* return the percentage completion of reconstruction */
   1611 	case RAIDFRAME_CHECK_RECON_STATUS:
   1612 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1613 			/* This makes no sense on a RAID 0, so tell the
   1614 			   user it's done. */
   1615 			*(int *) data = 100;
   1616 			return(0);
   1617 		}
   1618 		if (raidPtr->status != rf_rs_reconstructing)
   1619 			*(int *) data = 100;
   1620 		else {
   1621 			if (raidPtr->reconControl->numRUsTotal > 0) {
   1622 				*(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
   1623 			} else {
   1624 				*(int *) data = 0;
   1625 			}
   1626 		}
   1627 		return (0);
   1628 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1629 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1630 		if (raidPtr->status != rf_rs_reconstructing) {
   1631 			progressInfo.remaining = 0;
   1632 			progressInfo.completed = 100;
   1633 			progressInfo.total = 100;
   1634 		} else {
   1635 			progressInfo.total =
   1636 				raidPtr->reconControl->numRUsTotal;
   1637 			progressInfo.completed =
   1638 				raidPtr->reconControl->numRUsComplete;
   1639 			progressInfo.remaining = progressInfo.total -
   1640 				progressInfo.completed;
   1641 		}
   1642 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1643 				  sizeof(RF_ProgressInfo_t));
   1644 		return (retcode);
   1645 
   1646 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1647 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1648 			/* This makes no sense on a RAID 0, so tell the
   1649 			   user it's done. */
   1650 			*(int *) data = 100;
   1651 			return(0);
   1652 		}
   1653 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1654 			*(int *) data = 100 *
   1655 				raidPtr->parity_rewrite_stripes_done /
   1656 				raidPtr->Layout.numStripe;
   1657 		} else {
   1658 			*(int *) data = 100;
   1659 		}
   1660 		return (0);
   1661 
   1662 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1663 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1664 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1665 			progressInfo.total = raidPtr->Layout.numStripe;
   1666 			progressInfo.completed =
   1667 				raidPtr->parity_rewrite_stripes_done;
   1668 			progressInfo.remaining = progressInfo.total -
   1669 				progressInfo.completed;
   1670 		} else {
   1671 			progressInfo.remaining = 0;
   1672 			progressInfo.completed = 100;
   1673 			progressInfo.total = 100;
   1674 		}
   1675 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1676 				  sizeof(RF_ProgressInfo_t));
   1677 		return (retcode);
   1678 
   1679 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1680 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1681 			/* This makes no sense on a RAID 0 */
   1682 			*(int *) data = 100;
   1683 			return(0);
   1684 		}
   1685 		if (raidPtr->copyback_in_progress == 1) {
   1686 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1687 				raidPtr->Layout.numStripe;
   1688 		} else {
   1689 			*(int *) data = 100;
   1690 		}
   1691 		return (0);
   1692 
   1693 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1694 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1695 		if (raidPtr->copyback_in_progress == 1) {
   1696 			progressInfo.total = raidPtr->Layout.numStripe;
   1697 			progressInfo.completed =
   1698 				raidPtr->copyback_stripes_done;
   1699 			progressInfo.remaining = progressInfo.total -
   1700 				progressInfo.completed;
   1701 		} else {
   1702 			progressInfo.remaining = 0;
   1703 			progressInfo.completed = 100;
   1704 			progressInfo.total = 100;
   1705 		}
   1706 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1707 				  sizeof(RF_ProgressInfo_t));
   1708 		return (retcode);
   1709 
   1710 	case RAIDFRAME_SET_LAST_UNIT:
   1711 		for (column = 0; column < raidPtr->numCol; column++)
   1712 			if (raidPtr->Disks[column].status != rf_ds_optimal)
   1713 				return EBUSY;
   1714 
   1715 		for (column = 0; column < raidPtr->numCol; column++) {
   1716 			clabel = raidget_component_label(raidPtr, column);
   1717 			clabel->last_unit = *(int *)data;
   1718 			raidflush_component_label(raidPtr, column);
   1719 		}
   1720 		rs->sc_cflags |= RAIDF_UNIT_CHANGED;
   1721 		return 0;
   1722 
   1723 		/* the sparetable daemon calls this to wait for the kernel to
   1724 		 * need a spare table. this ioctl does not return until a
   1725 		 * spare table is needed. XXX -- calling mpsleep here in the
   1726 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1727 		 * -- I should either compute the spare table in the kernel,
   1728 		 * or have a different -- XXX XXX -- interface (a different
   1729 		 * character device) for delivering the table     -- XXX */
   1730 #if 0
   1731 	case RAIDFRAME_SPARET_WAIT:
   1732 		rf_lock_mutex2(rf_sparet_wait_mutex);
   1733 		while (!rf_sparet_wait_queue)
   1734 			rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
   1735 		waitreq = rf_sparet_wait_queue;
   1736 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1737 		rf_unlock_mutex2(rf_sparet_wait_mutex);
   1738 
   1739 		/* structure assignment */
   1740 		*((RF_SparetWait_t *) data) = *waitreq;
   1741 
   1742 		RF_Free(waitreq, sizeof(*waitreq));
   1743 		return (0);
   1744 
   1745 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1746 		 * code in it that will cause the dameon to exit */
   1747 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1748 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1749 		waitreq->fcol = -1;
   1750 		rf_lock_mutex2(rf_sparet_wait_mutex);
   1751 		waitreq->next = rf_sparet_wait_queue;
   1752 		rf_sparet_wait_queue = waitreq;
   1753 		rf_broadcast_conf2(rf_sparet_wait_cv);
   1754 		rf_unlock_mutex2(rf_sparet_wait_mutex);
   1755 		return (0);
   1756 
   1757 		/* used by the spare table daemon to deliver a spare table
   1758 		 * into the kernel */
   1759 	case RAIDFRAME_SEND_SPARET:
   1760 
   1761 		/* install the spare table */
   1762 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1763 
   1764 		/* respond to the requestor.  the return status of the spare
   1765 		 * table installation is passed in the "fcol" field */
   1766 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1767 		waitreq->fcol = retcode;
   1768 		rf_lock_mutex2(rf_sparet_wait_mutex);
   1769 		waitreq->next = rf_sparet_resp_queue;
   1770 		rf_sparet_resp_queue = waitreq;
   1771 		rf_broadcast_cond2(rf_sparet_resp_cv);
   1772 		rf_unlock_mutex2(rf_sparet_wait_mutex);
   1773 
   1774 		return (retcode);
   1775 #endif
   1776 
   1777 	default:
   1778 		break; /* fall through to the os-specific code below */
   1779 
   1780 	}
   1781 
   1782 	if (!raidPtr->valid)
   1783 		return (EINVAL);
   1784 
   1785 	/*
   1786 	 * Add support for "regular" device ioctls here.
   1787 	 */
   1788 
   1789 	switch (cmd) {
   1790 	case DIOCCACHESYNC:
   1791 		retcode = rf_sync_component_caches(raidPtr);
   1792 
   1793 	default:
   1794 		retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
   1795 	}
   1796 
   1797 	return (retcode);
   1798 
   1799 }
   1800 
   1801 
   1802 /* raidinit -- complete the rest of the initialization for the
   1803    RAIDframe device.  */
   1804 
   1805 
   1806 static void
   1807 raidinit(struct raid_softc *rs)
   1808 {
   1809 	cfdata_t cf;
   1810 	unsigned int unit;
   1811 	struct dk_softc *dksc = &rs->sc_dksc;
   1812 	RF_Raid_t *raidPtr = &rs->sc_r;
   1813 	device_t dev;
   1814 
   1815 	unit = raidPtr->raidid;
   1816 
   1817 	/* XXX doesn't check bounds. */
   1818 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
   1819 
   1820 	/* attach the pseudo device */
   1821 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
   1822 	cf->cf_name = raid_cd.cd_name;
   1823 	cf->cf_atname = raid_cd.cd_name;
   1824 	cf->cf_unit = unit;
   1825 	cf->cf_fstate = FSTATE_STAR;
   1826 
   1827 	dev = config_attach_pseudo(cf);
   1828 	if (dev == NULL) {
   1829 		printf("raid%d: config_attach_pseudo failed\n",
   1830 		    raidPtr->raidid);
   1831 		free(cf, M_RAIDFRAME);
   1832 		return;
   1833 	}
   1834 
   1835 	/* provide a backpointer to the real softc */
   1836 	raidsoftc(dev) = rs;
   1837 
   1838 	/* disk_attach actually creates space for the CPU disklabel, among
   1839 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1840 	 * with disklabels. */
   1841 	dk_init(dksc, dev, DKTYPE_RAID);
   1842 	disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
   1843 
   1844 	/* XXX There may be a weird interaction here between this, and
   1845 	 * protectedSectors, as used in RAIDframe.  */
   1846 
   1847 	rs->sc_size = raidPtr->totalSectors;
   1848 
   1849 	/* Attach dk and disk subsystems */
   1850 	dk_attach(dksc);
   1851 	disk_attach(&dksc->sc_dkdev);
   1852 	rf_set_geometry(rs, raidPtr);
   1853 
   1854 	bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
   1855 
   1856 	/* mark unit as usuable */
   1857 	rs->sc_flags |= RAIDF_INITED;
   1858 
   1859 	dkwedge_discover(&dksc->sc_dkdev);
   1860 }
   1861 
   1862 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   1863 /* wake up the daemon & tell it to get us a spare table
   1864  * XXX
   1865  * the entries in the queues should be tagged with the raidPtr
   1866  * so that in the extremely rare case that two recons happen at once,
   1867  * we know for which device were requesting a spare table
   1868  * XXX
   1869  *
   1870  * XXX This code is not currently used. GO
   1871  */
   1872 int
   1873 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
   1874 {
   1875 	int     retcode;
   1876 
   1877 	rf_lock_mutex2(rf_sparet_wait_mutex);
   1878 	req->next = rf_sparet_wait_queue;
   1879 	rf_sparet_wait_queue = req;
   1880 	rf_broadcast_cond2(rf_sparet_wait_cv);
   1881 
   1882 	/* mpsleep unlocks the mutex */
   1883 	while (!rf_sparet_resp_queue) {
   1884 		rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
   1885 	}
   1886 	req = rf_sparet_resp_queue;
   1887 	rf_sparet_resp_queue = req->next;
   1888 	rf_unlock_mutex2(rf_sparet_wait_mutex);
   1889 
   1890 	retcode = req->fcol;
   1891 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1892 					 * alloc'd */
   1893 	return (retcode);
   1894 }
   1895 #endif
   1896 
   1897 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1898  * bp & passes it down.
   1899  * any calls originating in the kernel must use non-blocking I/O
   1900  * do some extra sanity checking to return "appropriate" error values for
   1901  * certain conditions (to make some standard utilities work)
   1902  *
   1903  * Formerly known as: rf_DoAccessKernel
   1904  */
   1905 void
   1906 raidstart(RF_Raid_t *raidPtr)
   1907 {
   1908 	struct raid_softc *rs;
   1909 	struct dk_softc *dksc;
   1910 
   1911 	rs = raidPtr->softc;
   1912 	dksc = &rs->sc_dksc;
   1913 	/* quick check to see if anything has died recently */
   1914 	rf_lock_mutex2(raidPtr->mutex);
   1915 	if (raidPtr->numNewFailures > 0) {
   1916 		rf_unlock_mutex2(raidPtr->mutex);
   1917 		rf_update_component_labels(raidPtr,
   1918 					   RF_NORMAL_COMPONENT_UPDATE);
   1919 		rf_lock_mutex2(raidPtr->mutex);
   1920 		raidPtr->numNewFailures--;
   1921 	}
   1922 	rf_unlock_mutex2(raidPtr->mutex);
   1923 
   1924 	if ((rs->sc_flags & RAIDF_INITED) == 0) {
   1925 		printf("raid%d: raidstart not ready\n", raidPtr->raidid);
   1926 		return;
   1927 	}
   1928 
   1929 	dk_start(dksc, NULL);
   1930 }
   1931 
   1932 static int
   1933 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
   1934 {
   1935 	RF_SectorCount_t num_blocks, pb, sum;
   1936 	RF_RaidAddr_t raid_addr;
   1937 	daddr_t blocknum;
   1938 	int     do_async;
   1939 	int rc;
   1940 
   1941 	rf_lock_mutex2(raidPtr->mutex);
   1942 	if (raidPtr->openings == 0) {
   1943 		rf_unlock_mutex2(raidPtr->mutex);
   1944 		return EAGAIN;
   1945 	}
   1946 	rf_unlock_mutex2(raidPtr->mutex);
   1947 
   1948 	blocknum = bp->b_rawblkno;
   1949 
   1950 	db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1951 		    (int) blocknum));
   1952 
   1953 	db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1954 	db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1955 
   1956 	/* *THIS* is where we adjust what block we're going to...
   1957 	 * but DO NOT TOUCH bp->b_blkno!!! */
   1958 	raid_addr = blocknum;
   1959 
   1960 	num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1961 	pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1962 	sum = raid_addr + num_blocks + pb;
   1963 	if (1 || rf_debugKernelAccess) {
   1964 		db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1965 			    (int) raid_addr, (int) sum, (int) num_blocks,
   1966 			    (int) pb, (int) bp->b_resid));
   1967 	}
   1968 	if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1969 	    || (sum < num_blocks) || (sum < pb)) {
   1970 		rc = ENOSPC;
   1971 		goto done;
   1972 	}
   1973 	/*
   1974 	 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1975 	 */
   1976 
   1977 	if (bp->b_bcount & raidPtr->sectorMask) {
   1978 		rc = ENOSPC;
   1979 		goto done;
   1980 	}
   1981 	db1_printf(("Calling DoAccess..\n"));
   1982 
   1983 
   1984 	rf_lock_mutex2(raidPtr->mutex);
   1985 	raidPtr->openings--;
   1986 	rf_unlock_mutex2(raidPtr->mutex);
   1987 
   1988 	/*
   1989 	 * Everything is async.
   1990 	 */
   1991 	do_async = 1;
   1992 
   1993 	/* don't ever condition on bp->b_flags & B_WRITE.
   1994 	 * always condition on B_READ instead */
   1995 
   1996 	rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1997 			 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1998 			 do_async, raid_addr, num_blocks,
   1999 			 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   2000 
   2001 done:
   2002 	return rc;
   2003 }
   2004 
   2005 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   2006 
   2007 int
   2008 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
   2009 {
   2010 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   2011 	struct buf *bp;
   2012 
   2013 	req->queue = queue;
   2014 	bp = req->bp;
   2015 
   2016 	switch (req->type) {
   2017 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   2018 		/* XXX need to do something extra here.. */
   2019 		/* I'm leaving this in, as I've never actually seen it used,
   2020 		 * and I'd like folks to report it... GO */
   2021 		printf(("WAKEUP CALLED\n"));
   2022 		queue->numOutstanding++;
   2023 
   2024 		bp->b_flags = 0;
   2025 		bp->b_private = req;
   2026 
   2027 		KernelWakeupFunc(bp);
   2028 		break;
   2029 
   2030 	case RF_IO_TYPE_READ:
   2031 	case RF_IO_TYPE_WRITE:
   2032 #if RF_ACC_TRACE > 0
   2033 		if (req->tracerec) {
   2034 			RF_ETIMER_START(req->tracerec->timer);
   2035 		}
   2036 #endif
   2037 		InitBP(bp, queue->rf_cinfo->ci_vp,
   2038 		    op, queue->rf_cinfo->ci_dev,
   2039 		    req->sectorOffset, req->numSector,
   2040 		    req->buf, KernelWakeupFunc, (void *) req,
   2041 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   2042 
   2043 		if (rf_debugKernelAccess) {
   2044 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   2045 				(long) bp->b_blkno));
   2046 		}
   2047 		queue->numOutstanding++;
   2048 		queue->last_deq_sector = req->sectorOffset;
   2049 		/* acc wouldn't have been let in if there were any pending
   2050 		 * reqs at any other priority */
   2051 		queue->curPriority = req->priority;
   2052 
   2053 		db1_printf(("Going for %c to unit %d col %d\n",
   2054 			    req->type, queue->raidPtr->raidid,
   2055 			    queue->col));
   2056 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   2057 			(int) req->sectorOffset, (int) req->numSector,
   2058 			(int) (req->numSector <<
   2059 			    queue->raidPtr->logBytesPerSector),
   2060 			(int) queue->raidPtr->logBytesPerSector));
   2061 
   2062 		/*
   2063 		 * XXX: drop lock here since this can block at
   2064 		 * least with backing SCSI devices.  Retake it
   2065 		 * to minimize fuss with calling interfaces.
   2066 		 */
   2067 
   2068 		RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
   2069 		bdev_strategy(bp);
   2070 		RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
   2071 		break;
   2072 
   2073 	default:
   2074 		panic("bad req->type in rf_DispatchKernelIO");
   2075 	}
   2076 	db1_printf(("Exiting from DispatchKernelIO\n"));
   2077 
   2078 	return (0);
   2079 }
   2080 /* this is the callback function associated with a I/O invoked from
   2081    kernel code.
   2082  */
   2083 static void
   2084 KernelWakeupFunc(struct buf *bp)
   2085 {
   2086 	RF_DiskQueueData_t *req = NULL;
   2087 	RF_DiskQueue_t *queue;
   2088 
   2089 	db1_printf(("recovering the request queue:\n"));
   2090 
   2091 	req = bp->b_private;
   2092 
   2093 	queue = (RF_DiskQueue_t *) req->queue;
   2094 
   2095 	rf_lock_mutex2(queue->raidPtr->iodone_lock);
   2096 
   2097 #if RF_ACC_TRACE > 0
   2098 	if (req->tracerec) {
   2099 		RF_ETIMER_STOP(req->tracerec->timer);
   2100 		RF_ETIMER_EVAL(req->tracerec->timer);
   2101 		rf_lock_mutex2(rf_tracing_mutex);
   2102 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2103 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2104 		req->tracerec->num_phys_ios++;
   2105 		rf_unlock_mutex2(rf_tracing_mutex);
   2106 	}
   2107 #endif
   2108 
   2109 	/* XXX Ok, let's get aggressive... If b_error is set, let's go
   2110 	 * ballistic, and mark the component as hosed... */
   2111 
   2112 	if (bp->b_error != 0) {
   2113 		/* Mark the disk as dead */
   2114 		/* but only mark it once... */
   2115 		/* and only if it wouldn't leave this RAID set
   2116 		   completely broken */
   2117 		if (((queue->raidPtr->Disks[queue->col].status ==
   2118 		      rf_ds_optimal) ||
   2119 		     (queue->raidPtr->Disks[queue->col].status ==
   2120 		      rf_ds_used_spare)) &&
   2121 		     (queue->raidPtr->numFailures <
   2122 		      queue->raidPtr->Layout.map->faultsTolerated)) {
   2123 			printf("raid%d: IO Error (%d). Marking %s as failed.\n",
   2124 			       queue->raidPtr->raidid,
   2125 			       bp->b_error,
   2126 			       queue->raidPtr->Disks[queue->col].devname);
   2127 			queue->raidPtr->Disks[queue->col].status =
   2128 			    rf_ds_failed;
   2129 			queue->raidPtr->status = rf_rs_degraded;
   2130 			queue->raidPtr->numFailures++;
   2131 			queue->raidPtr->numNewFailures++;
   2132 		} else {	/* Disk is already dead... */
   2133 			/* printf("Disk already marked as dead!\n"); */
   2134 		}
   2135 
   2136 	}
   2137 
   2138 	/* Fill in the error value */
   2139 	req->error = bp->b_error;
   2140 
   2141 	/* Drop this one on the "finished" queue... */
   2142 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   2143 
   2144 	/* Let the raidio thread know there is work to be done. */
   2145 	rf_signal_cond2(queue->raidPtr->iodone_cv);
   2146 
   2147 	rf_unlock_mutex2(queue->raidPtr->iodone_lock);
   2148 }
   2149 
   2150 
   2151 /*
   2152  * initialize a buf structure for doing an I/O in the kernel.
   2153  */
   2154 static void
   2155 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
   2156        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
   2157        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
   2158        struct proc *b_proc)
   2159 {
   2160 	/* bp->b_flags       = B_PHYS | rw_flag; */
   2161 	bp->b_flags = rw_flag;	/* XXX need B_PHYS here too??? */
   2162 	bp->b_oflags = 0;
   2163 	bp->b_cflags = 0;
   2164 	bp->b_bcount = numSect << logBytesPerSector;
   2165 	bp->b_bufsize = bp->b_bcount;
   2166 	bp->b_error = 0;
   2167 	bp->b_dev = dev;
   2168 	bp->b_data = bf;
   2169 	bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
   2170 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   2171 	if (bp->b_bcount == 0) {
   2172 		panic("bp->b_bcount is zero in InitBP!!");
   2173 	}
   2174 	bp->b_proc = b_proc;
   2175 	bp->b_iodone = cbFunc;
   2176 	bp->b_private = cbArg;
   2177 }
   2178 
   2179 /*
   2180  * Wait interruptibly for an exclusive lock.
   2181  *
   2182  * XXX
   2183  * Several drivers do this; it should be abstracted and made MP-safe.
   2184  * (Hmm... where have we seen this warning before :->  GO )
   2185  */
   2186 static int
   2187 raidlock(struct raid_softc *rs)
   2188 {
   2189 	int     error;
   2190 
   2191 	error = 0;
   2192 	mutex_enter(&rs->sc_mutex);
   2193 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2194 		rs->sc_flags |= RAIDF_WANTED;
   2195 		error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
   2196 		if (error != 0)
   2197 			goto done;
   2198 	}
   2199 	rs->sc_flags |= RAIDF_LOCKED;
   2200 done:
   2201 	mutex_exit(&rs->sc_mutex);
   2202 	return (error);
   2203 }
   2204 /*
   2205  * Unlock and wake up any waiters.
   2206  */
   2207 static void
   2208 raidunlock(struct raid_softc *rs)
   2209 {
   2210 
   2211 	mutex_enter(&rs->sc_mutex);
   2212 	rs->sc_flags &= ~RAIDF_LOCKED;
   2213 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2214 		rs->sc_flags &= ~RAIDF_WANTED;
   2215 		cv_broadcast(&rs->sc_cv);
   2216 	}
   2217 	mutex_exit(&rs->sc_mutex);
   2218 }
   2219 
   2220 
   2221 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2222 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2223 #define RF_PARITY_MAP_SIZE   RF_PARITYMAP_NBYTE
   2224 
   2225 static daddr_t
   2226 rf_component_info_offset(void)
   2227 {
   2228 
   2229 	return RF_COMPONENT_INFO_OFFSET;
   2230 }
   2231 
   2232 static daddr_t
   2233 rf_component_info_size(unsigned secsize)
   2234 {
   2235 	daddr_t info_size;
   2236 
   2237 	KASSERT(secsize);
   2238 	if (secsize > RF_COMPONENT_INFO_SIZE)
   2239 		info_size = secsize;
   2240 	else
   2241 		info_size = RF_COMPONENT_INFO_SIZE;
   2242 
   2243 	return info_size;
   2244 }
   2245 
   2246 static daddr_t
   2247 rf_parity_map_offset(RF_Raid_t *raidPtr)
   2248 {
   2249 	daddr_t map_offset;
   2250 
   2251 	KASSERT(raidPtr->bytesPerSector);
   2252 	if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
   2253 		map_offset = raidPtr->bytesPerSector;
   2254 	else
   2255 		map_offset = RF_COMPONENT_INFO_SIZE;
   2256 	map_offset += rf_component_info_offset();
   2257 
   2258 	return map_offset;
   2259 }
   2260 
   2261 static daddr_t
   2262 rf_parity_map_size(RF_Raid_t *raidPtr)
   2263 {
   2264 	daddr_t map_size;
   2265 
   2266 	if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
   2267 		map_size = raidPtr->bytesPerSector;
   2268 	else
   2269 		map_size = RF_PARITY_MAP_SIZE;
   2270 
   2271 	return map_size;
   2272 }
   2273 
   2274 int
   2275 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2276 {
   2277 	RF_ComponentLabel_t *clabel;
   2278 
   2279 	clabel = raidget_component_label(raidPtr, col);
   2280 	clabel->clean = RF_RAID_CLEAN;
   2281 	raidflush_component_label(raidPtr, col);
   2282 	return(0);
   2283 }
   2284 
   2285 
   2286 int
   2287 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2288 {
   2289 	RF_ComponentLabel_t *clabel;
   2290 
   2291 	clabel = raidget_component_label(raidPtr, col);
   2292 	clabel->clean = RF_RAID_DIRTY;
   2293 	raidflush_component_label(raidPtr, col);
   2294 	return(0);
   2295 }
   2296 
   2297 int
   2298 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2299 {
   2300 	KASSERT(raidPtr->bytesPerSector);
   2301 	return raidread_component_label(raidPtr->bytesPerSector,
   2302 	    raidPtr->Disks[col].dev,
   2303 	    raidPtr->raid_cinfo[col].ci_vp,
   2304 	    &raidPtr->raid_cinfo[col].ci_label);
   2305 }
   2306 
   2307 RF_ComponentLabel_t *
   2308 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2309 {
   2310 	return &raidPtr->raid_cinfo[col].ci_label;
   2311 }
   2312 
   2313 int
   2314 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2315 {
   2316 	RF_ComponentLabel_t *label;
   2317 
   2318 	label = &raidPtr->raid_cinfo[col].ci_label;
   2319 	label->mod_counter = raidPtr->mod_counter;
   2320 #ifndef RF_NO_PARITY_MAP
   2321 	label->parity_map_modcount = label->mod_counter;
   2322 #endif
   2323 	return raidwrite_component_label(raidPtr->bytesPerSector,
   2324 	    raidPtr->Disks[col].dev,
   2325 	    raidPtr->raid_cinfo[col].ci_vp, label);
   2326 }
   2327 
   2328 
   2329 static int
   2330 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
   2331     RF_ComponentLabel_t *clabel)
   2332 {
   2333 	return raidread_component_area(dev, b_vp, clabel,
   2334 	    sizeof(RF_ComponentLabel_t),
   2335 	    rf_component_info_offset(),
   2336 	    rf_component_info_size(secsize));
   2337 }
   2338 
   2339 /* ARGSUSED */
   2340 static int
   2341 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
   2342     size_t msize, daddr_t offset, daddr_t dsize)
   2343 {
   2344 	struct buf *bp;
   2345 	int error;
   2346 
   2347 	/* XXX should probably ensure that we don't try to do this if
   2348 	   someone has changed rf_protected_sectors. */
   2349 
   2350 	if (b_vp == NULL) {
   2351 		/* For whatever reason, this component is not valid.
   2352 		   Don't try to read a component label from it. */
   2353 		return(EINVAL);
   2354 	}
   2355 
   2356 	/* get a block of the appropriate size... */
   2357 	bp = geteblk((int)dsize);
   2358 	bp->b_dev = dev;
   2359 
   2360 	/* get our ducks in a row for the read */
   2361 	bp->b_blkno = offset / DEV_BSIZE;
   2362 	bp->b_bcount = dsize;
   2363 	bp->b_flags |= B_READ;
   2364  	bp->b_resid = dsize;
   2365 
   2366 	bdev_strategy(bp);
   2367 	error = biowait(bp);
   2368 
   2369 	if (!error) {
   2370 		memcpy(data, bp->b_data, msize);
   2371 	}
   2372 
   2373 	brelse(bp, 0);
   2374 	return(error);
   2375 }
   2376 
   2377 
   2378 static int
   2379 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
   2380     RF_ComponentLabel_t *clabel)
   2381 {
   2382 	return raidwrite_component_area(dev, b_vp, clabel,
   2383 	    sizeof(RF_ComponentLabel_t),
   2384 	    rf_component_info_offset(),
   2385 	    rf_component_info_size(secsize), 0);
   2386 }
   2387 
   2388 /* ARGSUSED */
   2389 static int
   2390 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
   2391     size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
   2392 {
   2393 	struct buf *bp;
   2394 	int error;
   2395 
   2396 	/* get a block of the appropriate size... */
   2397 	bp = geteblk((int)dsize);
   2398 	bp->b_dev = dev;
   2399 
   2400 	/* get our ducks in a row for the write */
   2401 	bp->b_blkno = offset / DEV_BSIZE;
   2402 	bp->b_bcount = dsize;
   2403 	bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
   2404  	bp->b_resid = dsize;
   2405 
   2406 	memset(bp->b_data, 0, dsize);
   2407 	memcpy(bp->b_data, data, msize);
   2408 
   2409 	bdev_strategy(bp);
   2410 	if (asyncp)
   2411 		return 0;
   2412 	error = biowait(bp);
   2413 	brelse(bp, 0);
   2414 	if (error) {
   2415 #if 1
   2416 		printf("Failed to write RAID component info!\n");
   2417 #endif
   2418 	}
   2419 
   2420 	return(error);
   2421 }
   2422 
   2423 void
   2424 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
   2425 {
   2426 	int c;
   2427 
   2428 	for (c = 0; c < raidPtr->numCol; c++) {
   2429 		/* Skip dead disks. */
   2430 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
   2431 			continue;
   2432 		/* XXXjld: what if an error occurs here? */
   2433 		raidwrite_component_area(raidPtr->Disks[c].dev,
   2434 		    raidPtr->raid_cinfo[c].ci_vp, map,
   2435 		    RF_PARITYMAP_NBYTE,
   2436 		    rf_parity_map_offset(raidPtr),
   2437 		    rf_parity_map_size(raidPtr), 0);
   2438 	}
   2439 }
   2440 
   2441 void
   2442 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
   2443 {
   2444 	struct rf_paritymap_ondisk tmp;
   2445 	int c,first;
   2446 
   2447 	first=1;
   2448 	for (c = 0; c < raidPtr->numCol; c++) {
   2449 		/* Skip dead disks. */
   2450 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
   2451 			continue;
   2452 		raidread_component_area(raidPtr->Disks[c].dev,
   2453 		    raidPtr->raid_cinfo[c].ci_vp, &tmp,
   2454 		    RF_PARITYMAP_NBYTE,
   2455 		    rf_parity_map_offset(raidPtr),
   2456 		    rf_parity_map_size(raidPtr));
   2457 		if (first) {
   2458 			memcpy(map, &tmp, sizeof(*map));
   2459 			first = 0;
   2460 		} else {
   2461 			rf_paritymap_merge(map, &tmp);
   2462 		}
   2463 	}
   2464 }
   2465 
   2466 void
   2467 rf_markalldirty(RF_Raid_t *raidPtr)
   2468 {
   2469 	RF_ComponentLabel_t *clabel;
   2470 	int sparecol;
   2471 	int c;
   2472 	int j;
   2473 	int scol = -1;
   2474 
   2475 	raidPtr->mod_counter++;
   2476 	for (c = 0; c < raidPtr->numCol; c++) {
   2477 		/* we don't want to touch (at all) a disk that has
   2478 		   failed */
   2479 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   2480 			clabel = raidget_component_label(raidPtr, c);
   2481 			if (clabel->status == rf_ds_spared) {
   2482 				/* XXX do something special...
   2483 				   but whatever you do, don't
   2484 				   try to access it!! */
   2485 			} else {
   2486 				raidmarkdirty(raidPtr, c);
   2487 			}
   2488 		}
   2489 	}
   2490 
   2491 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2492 		sparecol = raidPtr->numCol + c;
   2493 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2494 			/*
   2495 
   2496 			   we claim this disk is "optimal" if it's
   2497 			   rf_ds_used_spare, as that means it should be
   2498 			   directly substitutable for the disk it replaced.
   2499 			   We note that too...
   2500 
   2501 			 */
   2502 
   2503 			for(j=0;j<raidPtr->numCol;j++) {
   2504 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2505 					scol = j;
   2506 					break;
   2507 				}
   2508 			}
   2509 
   2510 			clabel = raidget_component_label(raidPtr, sparecol);
   2511 			/* make sure status is noted */
   2512 
   2513 			raid_init_component_label(raidPtr, clabel);
   2514 
   2515 			clabel->row = 0;
   2516 			clabel->column = scol;
   2517 			/* Note: we *don't* change status from rf_ds_used_spare
   2518 			   to rf_ds_optimal */
   2519 			/* clabel.status = rf_ds_optimal; */
   2520 
   2521 			raidmarkdirty(raidPtr, sparecol);
   2522 		}
   2523 	}
   2524 }
   2525 
   2526 
   2527 void
   2528 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
   2529 {
   2530 	RF_ComponentLabel_t *clabel;
   2531 	int sparecol;
   2532 	int c;
   2533 	int j;
   2534 	int scol;
   2535 	struct raid_softc *rs = raidPtr->softc;
   2536 
   2537 	scol = -1;
   2538 
   2539 	/* XXX should do extra checks to make sure things really are clean,
   2540 	   rather than blindly setting the clean bit... */
   2541 
   2542 	raidPtr->mod_counter++;
   2543 
   2544 	for (c = 0; c < raidPtr->numCol; c++) {
   2545 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   2546 			clabel = raidget_component_label(raidPtr, c);
   2547 			/* make sure status is noted */
   2548 			clabel->status = rf_ds_optimal;
   2549 
   2550 			/* note what unit we are configured as */
   2551 			if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
   2552 				clabel->last_unit = raidPtr->raidid;
   2553 
   2554 			raidflush_component_label(raidPtr, c);
   2555 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2556 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2557 					raidmarkclean(raidPtr, c);
   2558 				}
   2559 			}
   2560 		}
   2561 		/* else we don't touch it.. */
   2562 	}
   2563 
   2564 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2565 		sparecol = raidPtr->numCol + c;
   2566 		/* Need to ensure that the reconstruct actually completed! */
   2567 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2568 			/*
   2569 
   2570 			   we claim this disk is "optimal" if it's
   2571 			   rf_ds_used_spare, as that means it should be
   2572 			   directly substitutable for the disk it replaced.
   2573 			   We note that too...
   2574 
   2575 			 */
   2576 
   2577 			for(j=0;j<raidPtr->numCol;j++) {
   2578 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2579 					scol = j;
   2580 					break;
   2581 				}
   2582 			}
   2583 
   2584 			/* XXX shouldn't *really* need this... */
   2585 			clabel = raidget_component_label(raidPtr, sparecol);
   2586 			/* make sure status is noted */
   2587 
   2588 			raid_init_component_label(raidPtr, clabel);
   2589 
   2590 			clabel->column = scol;
   2591 			clabel->status = rf_ds_optimal;
   2592 			if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
   2593 				clabel->last_unit = raidPtr->raidid;
   2594 
   2595 			raidflush_component_label(raidPtr, sparecol);
   2596 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2597 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2598 					raidmarkclean(raidPtr, sparecol);
   2599 				}
   2600 			}
   2601 		}
   2602 	}
   2603 }
   2604 
   2605 void
   2606 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
   2607 {
   2608 
   2609 	if (vp != NULL) {
   2610 		if (auto_configured == 1) {
   2611 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2612 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2613 			vput(vp);
   2614 
   2615 		} else {
   2616 			(void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
   2617 		}
   2618 	}
   2619 }
   2620 
   2621 
   2622 void
   2623 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
   2624 {
   2625 	int r,c;
   2626 	struct vnode *vp;
   2627 	int acd;
   2628 
   2629 
   2630 	/* We take this opportunity to close the vnodes like we should.. */
   2631 
   2632 	for (c = 0; c < raidPtr->numCol; c++) {
   2633 		vp = raidPtr->raid_cinfo[c].ci_vp;
   2634 		acd = raidPtr->Disks[c].auto_configured;
   2635 		rf_close_component(raidPtr, vp, acd);
   2636 		raidPtr->raid_cinfo[c].ci_vp = NULL;
   2637 		raidPtr->Disks[c].auto_configured = 0;
   2638 	}
   2639 
   2640 	for (r = 0; r < raidPtr->numSpare; r++) {
   2641 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
   2642 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
   2643 		rf_close_component(raidPtr, vp, acd);
   2644 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
   2645 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
   2646 	}
   2647 }
   2648 
   2649 
   2650 void
   2651 rf_ReconThread(struct rf_recon_req *req)
   2652 {
   2653 	int     s;
   2654 	RF_Raid_t *raidPtr;
   2655 
   2656 	s = splbio();
   2657 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2658 	raidPtr->recon_in_progress = 1;
   2659 
   2660 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
   2661 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2662 
   2663 	RF_Free(req, sizeof(*req));
   2664 
   2665 	raidPtr->recon_in_progress = 0;
   2666 	splx(s);
   2667 
   2668 	/* That's all... */
   2669 	kthread_exit(0);	/* does not return */
   2670 }
   2671 
   2672 void
   2673 rf_RewriteParityThread(RF_Raid_t *raidPtr)
   2674 {
   2675 	int retcode;
   2676 	int s;
   2677 
   2678 	raidPtr->parity_rewrite_stripes_done = 0;
   2679 	raidPtr->parity_rewrite_in_progress = 1;
   2680 	s = splbio();
   2681 	retcode = rf_RewriteParity(raidPtr);
   2682 	splx(s);
   2683 	if (retcode) {
   2684 		printf("raid%d: Error re-writing parity (%d)!\n",
   2685 		    raidPtr->raidid, retcode);
   2686 	} else {
   2687 		/* set the clean bit!  If we shutdown correctly,
   2688 		   the clean bit on each component label will get
   2689 		   set */
   2690 		raidPtr->parity_good = RF_RAID_CLEAN;
   2691 	}
   2692 	raidPtr->parity_rewrite_in_progress = 0;
   2693 
   2694 	/* Anyone waiting for us to stop?  If so, inform them... */
   2695 	if (raidPtr->waitShutdown) {
   2696 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2697 	}
   2698 
   2699 	/* That's all... */
   2700 	kthread_exit(0);	/* does not return */
   2701 }
   2702 
   2703 
   2704 void
   2705 rf_CopybackThread(RF_Raid_t *raidPtr)
   2706 {
   2707 	int s;
   2708 
   2709 	raidPtr->copyback_in_progress = 1;
   2710 	s = splbio();
   2711 	rf_CopybackReconstructedData(raidPtr);
   2712 	splx(s);
   2713 	raidPtr->copyback_in_progress = 0;
   2714 
   2715 	/* That's all... */
   2716 	kthread_exit(0);	/* does not return */
   2717 }
   2718 
   2719 
   2720 void
   2721 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
   2722 {
   2723 	int s;
   2724 	RF_Raid_t *raidPtr;
   2725 
   2726 	s = splbio();
   2727 	raidPtr = req->raidPtr;
   2728 	raidPtr->recon_in_progress = 1;
   2729 	rf_ReconstructInPlace(raidPtr, req->col);
   2730 	RF_Free(req, sizeof(*req));
   2731 	raidPtr->recon_in_progress = 0;
   2732 	splx(s);
   2733 
   2734 	/* That's all... */
   2735 	kthread_exit(0);	/* does not return */
   2736 }
   2737 
   2738 static RF_AutoConfig_t *
   2739 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
   2740     const char *cname, RF_SectorCount_t size, uint64_t numsecs,
   2741     unsigned secsize)
   2742 {
   2743 	int good_one = 0;
   2744 	RF_ComponentLabel_t *clabel;
   2745 	RF_AutoConfig_t *ac;
   2746 
   2747 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
   2748 	if (clabel == NULL) {
   2749 oomem:
   2750 		    while(ac_list) {
   2751 			    ac = ac_list;
   2752 			    if (ac->clabel)
   2753 				    free(ac->clabel, M_RAIDFRAME);
   2754 			    ac_list = ac_list->next;
   2755 			    free(ac, M_RAIDFRAME);
   2756 		    }
   2757 		    printf("RAID auto config: out of memory!\n");
   2758 		    return NULL; /* XXX probably should panic? */
   2759 	}
   2760 
   2761 	if (!raidread_component_label(secsize, dev, vp, clabel)) {
   2762 		/* Got the label.  Does it look reasonable? */
   2763 		if (rf_reasonable_label(clabel, numsecs) &&
   2764 		    (rf_component_label_partitionsize(clabel) <= size)) {
   2765 #ifdef DEBUG
   2766 			printf("Component on: %s: %llu\n",
   2767 				cname, (unsigned long long)size);
   2768 			rf_print_component_label(clabel);
   2769 #endif
   2770 			/* if it's reasonable, add it, else ignore it. */
   2771 			ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
   2772 				M_NOWAIT);
   2773 			if (ac == NULL) {
   2774 				free(clabel, M_RAIDFRAME);
   2775 				goto oomem;
   2776 			}
   2777 			strlcpy(ac->devname, cname, sizeof(ac->devname));
   2778 			ac->dev = dev;
   2779 			ac->vp = vp;
   2780 			ac->clabel = clabel;
   2781 			ac->next = ac_list;
   2782 			ac_list = ac;
   2783 			good_one = 1;
   2784 		}
   2785 	}
   2786 	if (!good_one) {
   2787 		/* cleanup */
   2788 		free(clabel, M_RAIDFRAME);
   2789 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2790 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2791 		vput(vp);
   2792 	}
   2793 	return ac_list;
   2794 }
   2795 
   2796 RF_AutoConfig_t *
   2797 rf_find_raid_components(void)
   2798 {
   2799 	struct vnode *vp;
   2800 	struct disklabel label;
   2801 	device_t dv;
   2802 	deviter_t di;
   2803 	dev_t dev;
   2804 	int bmajor, bminor, wedge, rf_part_found;
   2805 	int error;
   2806 	int i;
   2807 	RF_AutoConfig_t *ac_list;
   2808 	uint64_t numsecs;
   2809 	unsigned secsize;
   2810 	int dowedges;
   2811 
   2812 	/* initialize the AutoConfig list */
   2813 	ac_list = NULL;
   2814 
   2815 	/*
   2816 	 * we begin by trolling through *all* the devices on the system *twice*
   2817 	 * first we scan for wedges, second for other devices. This avoids
   2818 	 * using a raw partition instead of a wedge that covers the whole disk
   2819 	 */
   2820 
   2821 	for (dowedges=1; dowedges>=0; --dowedges) {
   2822 		for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
   2823 		     dv = deviter_next(&di)) {
   2824 
   2825 			/* we are only interested in disks... */
   2826 			if (device_class(dv) != DV_DISK)
   2827 				continue;
   2828 
   2829 			/* we don't care about floppies... */
   2830 			if (device_is_a(dv, "fd")) {
   2831 				continue;
   2832 			}
   2833 
   2834 			/* we don't care about CD's... */
   2835 			if (device_is_a(dv, "cd")) {
   2836 				continue;
   2837 			}
   2838 
   2839 			/* we don't care about md's... */
   2840 			if (device_is_a(dv, "md")) {
   2841 				continue;
   2842 			}
   2843 
   2844 			/* hdfd is the Atari/Hades floppy driver */
   2845 			if (device_is_a(dv, "hdfd")) {
   2846 				continue;
   2847 			}
   2848 
   2849 			/* fdisa is the Atari/Milan floppy driver */
   2850 			if (device_is_a(dv, "fdisa")) {
   2851 				continue;
   2852 			}
   2853 
   2854 			/* are we in the wedges pass ? */
   2855 			wedge = device_is_a(dv, "dk");
   2856 			if (wedge != dowedges) {
   2857 				continue;
   2858 			}
   2859 
   2860 			/* need to find the device_name_to_block_device_major stuff */
   2861 			bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
   2862 
   2863 			rf_part_found = 0; /*No raid partition as yet*/
   2864 
   2865 			/* get a vnode for the raw partition of this disk */
   2866 			bminor = minor(device_unit(dv));
   2867 			dev = wedge ? makedev(bmajor, bminor) :
   2868 			    MAKEDISKDEV(bmajor, bminor, RAW_PART);
   2869 			if (bdevvp(dev, &vp))
   2870 				panic("RAID can't alloc vnode");
   2871 
   2872 			error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
   2873 
   2874 			if (error) {
   2875 				/* "Who cares."  Continue looking
   2876 				   for something that exists*/
   2877 				vput(vp);
   2878 				continue;
   2879 			}
   2880 
   2881 			error = getdisksize(vp, &numsecs, &secsize);
   2882 			if (error) {
   2883 				/*
   2884 				 * Pseudo devices like vnd and cgd can be
   2885 				 * opened but may still need some configuration.
   2886 				 * Ignore these quietly.
   2887 				 */
   2888 				if (error != ENXIO)
   2889 					printf("RAIDframe: can't get disk size"
   2890 					    " for dev %s (%d)\n",
   2891 					    device_xname(dv), error);
   2892 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2893 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2894 				vput(vp);
   2895 				continue;
   2896 			}
   2897 			if (wedge) {
   2898 				struct dkwedge_info dkw;
   2899 				error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
   2900 				    NOCRED);
   2901 				if (error) {
   2902 					printf("RAIDframe: can't get wedge info for "
   2903 					    "dev %s (%d)\n", device_xname(dv), error);
   2904 					vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2905 					VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2906 					vput(vp);
   2907 					continue;
   2908 				}
   2909 
   2910 				if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
   2911 					vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2912 					VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2913 					vput(vp);
   2914 					continue;
   2915 				}
   2916 
   2917 				ac_list = rf_get_component(ac_list, dev, vp,
   2918 				    device_xname(dv), dkw.dkw_size, numsecs, secsize);
   2919 				rf_part_found = 1; /*There is a raid component on this disk*/
   2920 				continue;
   2921 			}
   2922 
   2923 			/* Ok, the disk exists.  Go get the disklabel. */
   2924 			error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
   2925 			if (error) {
   2926 				/*
   2927 				 * XXX can't happen - open() would
   2928 				 * have errored out (or faked up one)
   2929 				 */
   2930 				if (error != ENOTTY)
   2931 					printf("RAIDframe: can't get label for dev "
   2932 					    "%s (%d)\n", device_xname(dv), error);
   2933 			}
   2934 
   2935 			/* don't need this any more.  We'll allocate it again
   2936 			   a little later if we really do... */
   2937 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2938 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2939 			vput(vp);
   2940 
   2941 			if (error)
   2942 				continue;
   2943 
   2944 			rf_part_found = 0; /*No raid partitions yet*/
   2945 			for (i = 0; i < label.d_npartitions; i++) {
   2946 				char cname[sizeof(ac_list->devname)];
   2947 
   2948 				/* We only support partitions marked as RAID */
   2949 				if (label.d_partitions[i].p_fstype != FS_RAID)
   2950 					continue;
   2951 
   2952 				dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
   2953 				if (bdevvp(dev, &vp))
   2954 					panic("RAID can't alloc vnode");
   2955 
   2956 				error = VOP_OPEN(vp, FREAD, NOCRED);
   2957 				if (error) {
   2958 					/* Whatever... */
   2959 					vput(vp);
   2960 					continue;
   2961 				}
   2962 				snprintf(cname, sizeof(cname), "%s%c",
   2963 				    device_xname(dv), 'a' + i);
   2964 				ac_list = rf_get_component(ac_list, dev, vp, cname,
   2965 					label.d_partitions[i].p_size, numsecs, secsize);
   2966 				rf_part_found = 1; /*There is at least one raid partition on this disk*/
   2967 			}
   2968 
   2969 			/*
   2970 			 *If there is no raid component on this disk, either in a
   2971 			 *disklabel or inside a wedge, check the raw partition as well,
   2972 			 *as it is possible to configure raid components on raw disk
   2973 			 *devices.
   2974 			 */
   2975 
   2976 			if (!rf_part_found) {
   2977 				char cname[sizeof(ac_list->devname)];
   2978 
   2979 				dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
   2980 				if (bdevvp(dev, &vp))
   2981 					panic("RAID can't alloc vnode");
   2982 
   2983 				error = VOP_OPEN(vp, FREAD, NOCRED);
   2984 				if (error) {
   2985 					/* Whatever... */
   2986 					vput(vp);
   2987 					continue;
   2988 				}
   2989 				snprintf(cname, sizeof(cname), "%s%c",
   2990 				    device_xname(dv), 'a' + RAW_PART);
   2991 				ac_list = rf_get_component(ac_list, dev, vp, cname,
   2992 					label.d_partitions[RAW_PART].p_size, numsecs, secsize);
   2993 			}
   2994 		}
   2995 		deviter_release(&di);
   2996 	}
   2997 	return ac_list;
   2998 }
   2999 
   3000 
   3001 int
   3002 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
   3003 {
   3004 
   3005 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   3006 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   3007 	    ((clabel->clean == RF_RAID_CLEAN) ||
   3008 	     (clabel->clean == RF_RAID_DIRTY)) &&
   3009 	    clabel->row >=0 &&
   3010 	    clabel->column >= 0 &&
   3011 	    clabel->num_rows > 0 &&
   3012 	    clabel->num_columns > 0 &&
   3013 	    clabel->row < clabel->num_rows &&
   3014 	    clabel->column < clabel->num_columns &&
   3015 	    clabel->blockSize > 0 &&
   3016 	    /*
   3017 	     * numBlocksHi may contain garbage, but it is ok since
   3018 	     * the type is unsigned.  If it is really garbage,
   3019 	     * rf_fix_old_label_size() will fix it.
   3020 	     */
   3021 	    rf_component_label_numblocks(clabel) > 0) {
   3022 		/*
   3023 		 * label looks reasonable enough...
   3024 		 * let's make sure it has no old garbage.
   3025 		 */
   3026 		if (numsecs)
   3027 			rf_fix_old_label_size(clabel, numsecs);
   3028 		return(1);
   3029 	}
   3030 	return(0);
   3031 }
   3032 
   3033 
   3034 /*
   3035  * For reasons yet unknown, some old component labels have garbage in
   3036  * the newer numBlocksHi region, and this causes lossage.  Since those
   3037  * disks will also have numsecs set to less than 32 bits of sectors,
   3038  * we can determine when this corruption has occurred, and fix it.
   3039  *
   3040  * The exact same problem, with the same unknown reason, happens to
   3041  * the partitionSizeHi member as well.
   3042  */
   3043 static void
   3044 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
   3045 {
   3046 
   3047 	if (numsecs < ((uint64_t)1 << 32)) {
   3048 		if (clabel->numBlocksHi) {
   3049 			printf("WARNING: total sectors < 32 bits, yet "
   3050 			       "numBlocksHi set\n"
   3051 			       "WARNING: resetting numBlocksHi to zero.\n");
   3052 			clabel->numBlocksHi = 0;
   3053 		}
   3054 
   3055 		if (clabel->partitionSizeHi) {
   3056 			printf("WARNING: total sectors < 32 bits, yet "
   3057 			       "partitionSizeHi set\n"
   3058 			       "WARNING: resetting partitionSizeHi to zero.\n");
   3059 			clabel->partitionSizeHi = 0;
   3060 		}
   3061 	}
   3062 }
   3063 
   3064 
   3065 #ifdef DEBUG
   3066 void
   3067 rf_print_component_label(RF_ComponentLabel_t *clabel)
   3068 {
   3069 	uint64_t numBlocks;
   3070 	static const char *rp[] = {
   3071 	    "No", "Force", "Soft", "*invalid*"
   3072 	};
   3073 
   3074 
   3075 	numBlocks = rf_component_label_numblocks(clabel);
   3076 
   3077 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   3078 	       clabel->row, clabel->column,
   3079 	       clabel->num_rows, clabel->num_columns);
   3080 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   3081 	       clabel->version, clabel->serial_number,
   3082 	       clabel->mod_counter);
   3083 	printf("   Clean: %s Status: %d\n",
   3084 	       clabel->clean ? "Yes" : "No", clabel->status);
   3085 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   3086 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   3087 	printf("   RAID Level: %c  blocksize: %d numBlocks: %"PRIu64"\n",
   3088 	       (char) clabel->parityConfig, clabel->blockSize, numBlocks);
   3089 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
   3090 	printf("   Root partition: %s\n", rp[clabel->root_partition & 3]);
   3091 	printf("   Last configured as: raid%d\n", clabel->last_unit);
   3092 #if 0
   3093 	   printf("   Config order: %d\n", clabel->config_order);
   3094 #endif
   3095 
   3096 }
   3097 #endif
   3098 
   3099 RF_ConfigSet_t *
   3100 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
   3101 {
   3102 	RF_AutoConfig_t *ac;
   3103 	RF_ConfigSet_t *config_sets;
   3104 	RF_ConfigSet_t *cset;
   3105 	RF_AutoConfig_t *ac_next;
   3106 
   3107 
   3108 	config_sets = NULL;
   3109 
   3110 	/* Go through the AutoConfig list, and figure out which components
   3111 	   belong to what sets.  */
   3112 	ac = ac_list;
   3113 	while(ac!=NULL) {
   3114 		/* we're going to putz with ac->next, so save it here
   3115 		   for use at the end of the loop */
   3116 		ac_next = ac->next;
   3117 
   3118 		if (config_sets == NULL) {
   3119 			/* will need at least this one... */
   3120 			config_sets = (RF_ConfigSet_t *)
   3121 				malloc(sizeof(RF_ConfigSet_t),
   3122 				       M_RAIDFRAME, M_NOWAIT);
   3123 			if (config_sets == NULL) {
   3124 				panic("rf_create_auto_sets: No memory!");
   3125 			}
   3126 			/* this one is easy :) */
   3127 			config_sets->ac = ac;
   3128 			config_sets->next = NULL;
   3129 			config_sets->rootable = 0;
   3130 			ac->next = NULL;
   3131 		} else {
   3132 			/* which set does this component fit into? */
   3133 			cset = config_sets;
   3134 			while(cset!=NULL) {
   3135 				if (rf_does_it_fit(cset, ac)) {
   3136 					/* looks like it matches... */
   3137 					ac->next = cset->ac;
   3138 					cset->ac = ac;
   3139 					break;
   3140 				}
   3141 				cset = cset->next;
   3142 			}
   3143 			if (cset==NULL) {
   3144 				/* didn't find a match above... new set..*/
   3145 				cset = (RF_ConfigSet_t *)
   3146 					malloc(sizeof(RF_ConfigSet_t),
   3147 					       M_RAIDFRAME, M_NOWAIT);
   3148 				if (cset == NULL) {
   3149 					panic("rf_create_auto_sets: No memory!");
   3150 				}
   3151 				cset->ac = ac;
   3152 				ac->next = NULL;
   3153 				cset->next = config_sets;
   3154 				cset->rootable = 0;
   3155 				config_sets = cset;
   3156 			}
   3157 		}
   3158 		ac = ac_next;
   3159 	}
   3160 
   3161 
   3162 	return(config_sets);
   3163 }
   3164 
   3165 static int
   3166 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
   3167 {
   3168 	RF_ComponentLabel_t *clabel1, *clabel2;
   3169 
   3170 	/* If this one matches the *first* one in the set, that's good
   3171 	   enough, since the other members of the set would have been
   3172 	   through here too... */
   3173 	/* note that we are not checking partitionSize here..
   3174 
   3175 	   Note that we are also not checking the mod_counters here.
   3176 	   If everything else matches except the mod_counter, that's
   3177 	   good enough for this test.  We will deal with the mod_counters
   3178 	   a little later in the autoconfiguration process.
   3179 
   3180 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   3181 
   3182 	   The reason we don't check for this is that failed disks
   3183 	   will have lower modification counts.  If those disks are
   3184 	   not added to the set they used to belong to, then they will
   3185 	   form their own set, which may result in 2 different sets,
   3186 	   for example, competing to be configured at raid0, and
   3187 	   perhaps competing to be the root filesystem set.  If the
   3188 	   wrong ones get configured, or both attempt to become /,
   3189 	   weird behaviour and or serious lossage will occur.  Thus we
   3190 	   need to bring them into the fold here, and kick them out at
   3191 	   a later point.
   3192 
   3193 	*/
   3194 
   3195 	clabel1 = cset->ac->clabel;
   3196 	clabel2 = ac->clabel;
   3197 	if ((clabel1->version == clabel2->version) &&
   3198 	    (clabel1->serial_number == clabel2->serial_number) &&
   3199 	    (clabel1->num_rows == clabel2->num_rows) &&
   3200 	    (clabel1->num_columns == clabel2->num_columns) &&
   3201 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   3202 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   3203 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   3204 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   3205 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   3206 	    (clabel1->blockSize == clabel2->blockSize) &&
   3207 	    rf_component_label_numblocks(clabel1) ==
   3208 	    rf_component_label_numblocks(clabel2) &&
   3209 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   3210 	    (clabel1->root_partition == clabel2->root_partition) &&
   3211 	    (clabel1->last_unit == clabel2->last_unit) &&
   3212 	    (clabel1->config_order == clabel2->config_order)) {
   3213 		/* if it get's here, it almost *has* to be a match */
   3214 	} else {
   3215 		/* it's not consistent with somebody in the set..
   3216 		   punt */
   3217 		return(0);
   3218 	}
   3219 	/* all was fine.. it must fit... */
   3220 	return(1);
   3221 }
   3222 
   3223 int
   3224 rf_have_enough_components(RF_ConfigSet_t *cset)
   3225 {
   3226 	RF_AutoConfig_t *ac;
   3227 	RF_AutoConfig_t *auto_config;
   3228 	RF_ComponentLabel_t *clabel;
   3229 	int c;
   3230 	int num_cols;
   3231 	int num_missing;
   3232 	int mod_counter;
   3233 	int mod_counter_found;
   3234 	int even_pair_failed;
   3235 	char parity_type;
   3236 
   3237 
   3238 	/* check to see that we have enough 'live' components
   3239 	   of this set.  If so, we can configure it if necessary */
   3240 
   3241 	num_cols = cset->ac->clabel->num_columns;
   3242 	parity_type = cset->ac->clabel->parityConfig;
   3243 
   3244 	/* XXX Check for duplicate components!?!?!? */
   3245 
   3246 	/* Determine what the mod_counter is supposed to be for this set. */
   3247 
   3248 	mod_counter_found = 0;
   3249 	mod_counter = 0;
   3250 	ac = cset->ac;
   3251 	while(ac!=NULL) {
   3252 		if (mod_counter_found==0) {
   3253 			mod_counter = ac->clabel->mod_counter;
   3254 			mod_counter_found = 1;
   3255 		} else {
   3256 			if (ac->clabel->mod_counter > mod_counter) {
   3257 				mod_counter = ac->clabel->mod_counter;
   3258 			}
   3259 		}
   3260 		ac = ac->next;
   3261 	}
   3262 
   3263 	num_missing = 0;
   3264 	auto_config = cset->ac;
   3265 
   3266 	even_pair_failed = 0;
   3267 	for(c=0; c<num_cols; c++) {
   3268 		ac = auto_config;
   3269 		while(ac!=NULL) {
   3270 			if ((ac->clabel->column == c) &&
   3271 			    (ac->clabel->mod_counter == mod_counter)) {
   3272 				/* it's this one... */
   3273 #ifdef DEBUG
   3274 				printf("Found: %s at %d\n",
   3275 				       ac->devname,c);
   3276 #endif
   3277 				break;
   3278 			}
   3279 			ac=ac->next;
   3280 		}
   3281 		if (ac==NULL) {
   3282 				/* Didn't find one here! */
   3283 				/* special case for RAID 1, especially
   3284 				   where there are more than 2
   3285 				   components (where RAIDframe treats
   3286 				   things a little differently :( ) */
   3287 			if (parity_type == '1') {
   3288 				if (c%2 == 0) { /* even component */
   3289 					even_pair_failed = 1;
   3290 				} else { /* odd component.  If
   3291 					    we're failed, and
   3292 					    so is the even
   3293 					    component, it's
   3294 					    "Good Night, Charlie" */
   3295 					if (even_pair_failed == 1) {
   3296 						return(0);
   3297 					}
   3298 				}
   3299 			} else {
   3300 				/* normal accounting */
   3301 				num_missing++;
   3302 			}
   3303 		}
   3304 		if ((parity_type == '1') && (c%2 == 1)) {
   3305 				/* Just did an even component, and we didn't
   3306 				   bail.. reset the even_pair_failed flag,
   3307 				   and go on to the next component.... */
   3308 			even_pair_failed = 0;
   3309 		}
   3310 	}
   3311 
   3312 	clabel = cset->ac->clabel;
   3313 
   3314 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3315 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3316 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3317 		/* XXX this needs to be made *much* more general */
   3318 		/* Too many failures */
   3319 		return(0);
   3320 	}
   3321 	/* otherwise, all is well, and we've got enough to take a kick
   3322 	   at autoconfiguring this set */
   3323 	return(1);
   3324 }
   3325 
   3326 void
   3327 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
   3328 			RF_Raid_t *raidPtr)
   3329 {
   3330 	RF_ComponentLabel_t *clabel;
   3331 	int i;
   3332 
   3333 	clabel = ac->clabel;
   3334 
   3335 	/* 1. Fill in the common stuff */
   3336 	config->numRow = clabel->num_rows = 1;
   3337 	config->numCol = clabel->num_columns;
   3338 	config->numSpare = 0; /* XXX should this be set here? */
   3339 	config->sectPerSU = clabel->sectPerSU;
   3340 	config->SUsPerPU = clabel->SUsPerPU;
   3341 	config->SUsPerRU = clabel->SUsPerRU;
   3342 	config->parityConfig = clabel->parityConfig;
   3343 	/* XXX... */
   3344 	strcpy(config->diskQueueType,"fifo");
   3345 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3346 	config->layoutSpecificSize = 0; /* XXX ?? */
   3347 
   3348 	while(ac!=NULL) {
   3349 		/* row/col values will be in range due to the checks
   3350 		   in reasonable_label() */
   3351 		strcpy(config->devnames[0][ac->clabel->column],
   3352 		       ac->devname);
   3353 		ac = ac->next;
   3354 	}
   3355 
   3356 	for(i=0;i<RF_MAXDBGV;i++) {
   3357 		config->debugVars[i][0] = 0;
   3358 	}
   3359 }
   3360 
   3361 int
   3362 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
   3363 {
   3364 	RF_ComponentLabel_t *clabel;
   3365 	int column;
   3366 	int sparecol;
   3367 
   3368 	raidPtr->autoconfigure = new_value;
   3369 
   3370 	for(column=0; column<raidPtr->numCol; column++) {
   3371 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3372 			clabel = raidget_component_label(raidPtr, column);
   3373 			clabel->autoconfigure = new_value;
   3374 			raidflush_component_label(raidPtr, column);
   3375 		}
   3376 	}
   3377 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3378 		sparecol = raidPtr->numCol + column;
   3379 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3380 			clabel = raidget_component_label(raidPtr, sparecol);
   3381 			clabel->autoconfigure = new_value;
   3382 			raidflush_component_label(raidPtr, sparecol);
   3383 		}
   3384 	}
   3385 	return(new_value);
   3386 }
   3387 
   3388 int
   3389 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
   3390 {
   3391 	RF_ComponentLabel_t *clabel;
   3392 	int column;
   3393 	int sparecol;
   3394 
   3395 	raidPtr->root_partition = new_value;
   3396 	for(column=0; column<raidPtr->numCol; column++) {
   3397 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3398 			clabel = raidget_component_label(raidPtr, column);
   3399 			clabel->root_partition = new_value;
   3400 			raidflush_component_label(raidPtr, column);
   3401 		}
   3402 	}
   3403 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3404 		sparecol = raidPtr->numCol + column;
   3405 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3406 			clabel = raidget_component_label(raidPtr, sparecol);
   3407 			clabel->root_partition = new_value;
   3408 			raidflush_component_label(raidPtr, sparecol);
   3409 		}
   3410 	}
   3411 	return(new_value);
   3412 }
   3413 
   3414 void
   3415 rf_release_all_vps(RF_ConfigSet_t *cset)
   3416 {
   3417 	RF_AutoConfig_t *ac;
   3418 
   3419 	ac = cset->ac;
   3420 	while(ac!=NULL) {
   3421 		/* Close the vp, and give it back */
   3422 		if (ac->vp) {
   3423 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3424 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
   3425 			vput(ac->vp);
   3426 			ac->vp = NULL;
   3427 		}
   3428 		ac = ac->next;
   3429 	}
   3430 }
   3431 
   3432 
   3433 void
   3434 rf_cleanup_config_set(RF_ConfigSet_t *cset)
   3435 {
   3436 	RF_AutoConfig_t *ac;
   3437 	RF_AutoConfig_t *next_ac;
   3438 
   3439 	ac = cset->ac;
   3440 	while(ac!=NULL) {
   3441 		next_ac = ac->next;
   3442 		/* nuke the label */
   3443 		free(ac->clabel, M_RAIDFRAME);
   3444 		/* cleanup the config structure */
   3445 		free(ac, M_RAIDFRAME);
   3446 		/* "next.." */
   3447 		ac = next_ac;
   3448 	}
   3449 	/* and, finally, nuke the config set */
   3450 	free(cset, M_RAIDFRAME);
   3451 }
   3452 
   3453 
   3454 void
   3455 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   3456 {
   3457 	/* current version number */
   3458 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3459 	clabel->serial_number = raidPtr->serial_number;
   3460 	clabel->mod_counter = raidPtr->mod_counter;
   3461 
   3462 	clabel->num_rows = 1;
   3463 	clabel->num_columns = raidPtr->numCol;
   3464 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3465 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3466 
   3467 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3468 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3469 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3470 
   3471 	clabel->blockSize = raidPtr->bytesPerSector;
   3472 	rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
   3473 
   3474 	/* XXX not portable */
   3475 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3476 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3477 	clabel->autoconfigure = raidPtr->autoconfigure;
   3478 	clabel->root_partition = raidPtr->root_partition;
   3479 	clabel->last_unit = raidPtr->raidid;
   3480 	clabel->config_order = raidPtr->config_order;
   3481 
   3482 #ifndef RF_NO_PARITY_MAP
   3483 	rf_paritymap_init_label(raidPtr->parity_map, clabel);
   3484 #endif
   3485 }
   3486 
   3487 struct raid_softc *
   3488 rf_auto_config_set(RF_ConfigSet_t *cset)
   3489 {
   3490 	RF_Raid_t *raidPtr;
   3491 	RF_Config_t *config;
   3492 	int raidID;
   3493 	struct raid_softc *sc;
   3494 
   3495 #ifdef DEBUG
   3496 	printf("RAID autoconfigure\n");
   3497 #endif
   3498 
   3499 	/* 1. Create a config structure */
   3500 	config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
   3501 	if (config == NULL) {
   3502 		printf("%s: Out of mem - config!?!?\n", __func__);
   3503 				/* XXX do something more intelligent here. */
   3504 		return NULL;
   3505 	}
   3506 
   3507 	/*
   3508 	   2. Figure out what RAID ID this one is supposed to live at
   3509 	   See if we can get the same RAID dev that it was configured
   3510 	   on last time..
   3511 	*/
   3512 
   3513 	raidID = cset->ac->clabel->last_unit;
   3514 	for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
   3515 	     sc = raidget(++raidID, false))
   3516 		continue;
   3517 #ifdef DEBUG
   3518 	printf("Configuring raid%d:\n",raidID);
   3519 #endif
   3520 
   3521 	if (sc == NULL)
   3522 		sc = raidget(raidID, true);
   3523 	if (sc == NULL) {
   3524 		printf("%s: Out of mem - softc!?!?\n", __func__);
   3525 				/* XXX do something more intelligent here. */
   3526 		free(config, M_RAIDFRAME);
   3527 		return NULL;
   3528 	}
   3529 
   3530 	raidPtr = &sc->sc_r;
   3531 
   3532 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3533 	raidPtr->softc = sc;
   3534 	raidPtr->raidid = raidID;
   3535 	raidPtr->openings = RAIDOUTSTANDING;
   3536 
   3537 	/* 3. Build the configuration structure */
   3538 	rf_create_configuration(cset->ac, config, raidPtr);
   3539 
   3540 	/* 4. Do the configuration */
   3541 	if (rf_Configure(raidPtr, config, cset->ac) == 0) {
   3542 		raidinit(sc);
   3543 
   3544 		rf_markalldirty(raidPtr);
   3545 		raidPtr->autoconfigure = 1; /* XXX do this here? */
   3546 		switch (cset->ac->clabel->root_partition) {
   3547 		case 1:	/* Force Root */
   3548 		case 2:	/* Soft Root: root when boot partition part of raid */
   3549 			/*
   3550 			 * everything configured just fine.  Make a note
   3551 			 * that this set is eligible to be root,
   3552 			 * or forced to be root
   3553 			 */
   3554 			cset->rootable = cset->ac->clabel->root_partition;
   3555 			/* XXX do this here? */
   3556 			raidPtr->root_partition = cset->rootable;
   3557 			break;
   3558 		default:
   3559 			break;
   3560 		}
   3561 	} else {
   3562 		raidput(sc);
   3563 		sc = NULL;
   3564 	}
   3565 
   3566 	/* 5. Cleanup */
   3567 	free(config, M_RAIDFRAME);
   3568 	return sc;
   3569 }
   3570 
   3571 void
   3572 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
   3573 	     size_t xmin, size_t xmax)
   3574 {
   3575 	pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
   3576 	pool_sethiwat(p, xmax);
   3577 	pool_prime(p, xmin);
   3578 	pool_setlowat(p, xmin);
   3579 }
   3580 
   3581 /*
   3582  * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
   3583  * to see if there is IO pending and if that IO could possibly be done
   3584  * for a given RAID set.  Returns 0 if IO is waiting and can be done, 1
   3585  * otherwise.
   3586  *
   3587  */
   3588 int
   3589 rf_buf_queue_check(RF_Raid_t *raidPtr)
   3590 {
   3591 	struct raid_softc *rs;
   3592 	struct dk_softc *dksc;
   3593 
   3594 	rs = raidPtr->softc;
   3595 	dksc = &rs->sc_dksc;
   3596 
   3597 	if ((rs->sc_flags & RAIDF_INITED) == 0)
   3598 		return 1;
   3599 
   3600 	if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
   3601 		/* there is work to do */
   3602 		return 0;
   3603 	}
   3604 	/* default is nothing to do */
   3605 	return 1;
   3606 }
   3607 
   3608 int
   3609 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
   3610 {
   3611 	uint64_t numsecs;
   3612 	unsigned secsize;
   3613 	int error;
   3614 
   3615 	error = getdisksize(vp, &numsecs, &secsize);
   3616 	if (error == 0) {
   3617 		diskPtr->blockSize = secsize;
   3618 		diskPtr->numBlocks = numsecs - rf_protectedSectors;
   3619 		diskPtr->partitionSize = numsecs;
   3620 		return 0;
   3621 	}
   3622 	return error;
   3623 }
   3624 
   3625 static int
   3626 raid_match(device_t self, cfdata_t cfdata, void *aux)
   3627 {
   3628 	return 1;
   3629 }
   3630 
   3631 static void
   3632 raid_attach(device_t parent, device_t self, void *aux)
   3633 {
   3634 }
   3635 
   3636 
   3637 static int
   3638 raid_detach(device_t self, int flags)
   3639 {
   3640 	int error;
   3641 	struct raid_softc *rs = raidsoftc(self);
   3642 
   3643 	if (rs == NULL)
   3644 		return ENXIO;
   3645 
   3646 	if ((error = raidlock(rs)) != 0)
   3647 		return (error);
   3648 
   3649 	error = raid_detach_unlocked(rs);
   3650 
   3651 	raidunlock(rs);
   3652 
   3653 	/* XXX raid can be referenced here */
   3654 
   3655 	if (error)
   3656 		return error;
   3657 
   3658 	/* Free the softc */
   3659 	raidput(rs);
   3660 
   3661 	return 0;
   3662 }
   3663 
   3664 static void
   3665 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
   3666 {
   3667 	struct dk_softc *dksc = &rs->sc_dksc;
   3668 	struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
   3669 
   3670 	memset(dg, 0, sizeof(*dg));
   3671 
   3672 	dg->dg_secperunit = raidPtr->totalSectors;
   3673 	dg->dg_secsize = raidPtr->bytesPerSector;
   3674 	dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   3675 	dg->dg_ntracks = 4 * raidPtr->numCol;
   3676 
   3677 	disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
   3678 }
   3679 
   3680 /*
   3681  * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
   3682  * We end up returning whatever error was returned by the first cache flush
   3683  * that fails.
   3684  */
   3685 
   3686 int
   3687 rf_sync_component_caches(RF_Raid_t *raidPtr)
   3688 {
   3689 	int c, sparecol;
   3690 	int e,error;
   3691 	int force = 1;
   3692 
   3693 	error = 0;
   3694 	for (c = 0; c < raidPtr->numCol; c++) {
   3695 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   3696 			e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
   3697 					  &force, FWRITE, NOCRED);
   3698 			if (e) {
   3699 				if (e != ENODEV)
   3700 					printf("raid%d: cache flush to component %s failed.\n",
   3701 					       raidPtr->raidid, raidPtr->Disks[c].devname);
   3702 				if (error == 0) {
   3703 					error = e;
   3704 				}
   3705 			}
   3706 		}
   3707 	}
   3708 
   3709 	for( c = 0; c < raidPtr->numSpare ; c++) {
   3710 		sparecol = raidPtr->numCol + c;
   3711 		/* Need to ensure that the reconstruct actually completed! */
   3712 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3713 			e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
   3714 					  DIOCCACHESYNC, &force, FWRITE, NOCRED);
   3715 			if (e) {
   3716 				if (e != ENODEV)
   3717 					printf("raid%d: cache flush to component %s failed.\n",
   3718 					       raidPtr->raidid, raidPtr->Disks[sparecol].devname);
   3719 				if (error == 0) {
   3720 					error = e;
   3721 				}
   3722 			}
   3723 		}
   3724 	}
   3725 	return error;
   3726 }
   3727 
   3728 /*
   3729  * Module interface
   3730  */
   3731 
   3732 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr");
   3733 
   3734 #ifdef _MODULE
   3735 CFDRIVER_DECL(raid, DV_DISK, NULL);
   3736 #endif
   3737 
   3738 static int raid_modcmd(modcmd_t, void *);
   3739 static int raid_modcmd_init(void);
   3740 static int raid_modcmd_fini(void);
   3741 
   3742 static int
   3743 raid_modcmd(modcmd_t cmd, void *data)
   3744 {
   3745 	int error;
   3746 
   3747 	error = 0;
   3748 	switch (cmd) {
   3749 	case MODULE_CMD_INIT:
   3750 		error = raid_modcmd_init();
   3751 		break;
   3752 	case MODULE_CMD_FINI:
   3753 		error = raid_modcmd_fini();
   3754 		break;
   3755 	default:
   3756 		error = ENOTTY;
   3757 		break;
   3758 	}
   3759 	return error;
   3760 }
   3761 
   3762 static int
   3763 raid_modcmd_init(void)
   3764 {
   3765 	int error;
   3766 	int bmajor, cmajor;
   3767 
   3768 	mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
   3769 	mutex_enter(&raid_lock);
   3770 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   3771 	rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
   3772 	rf_init_cond2(rf_sparet_wait_cv, "sparetw");
   3773 	rf_init_cond2(rf_sparet_resp_cv, "rfgst");
   3774 
   3775 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
   3776 #endif
   3777 
   3778 	bmajor = cmajor = -1;
   3779 	error = devsw_attach("raid", &raid_bdevsw, &bmajor,
   3780 	    &raid_cdevsw, &cmajor);
   3781 	if (error != 0 && error != EEXIST) {
   3782 		aprint_error("%s: devsw_attach failed %d\n", __func__, error);
   3783 		mutex_exit(&raid_lock);
   3784 		return error;
   3785 	}
   3786 #ifdef _MODULE
   3787 	error = config_cfdriver_attach(&raid_cd);
   3788 	if (error != 0) {
   3789 		aprint_error("%s: config_cfdriver_attach failed %d\n",
   3790 		    __func__, error);
   3791 		devsw_detach(&raid_bdevsw, &raid_cdevsw);
   3792 		mutex_exit(&raid_lock);
   3793 		return error;
   3794 	}
   3795 #endif
   3796 	error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
   3797 	if (error != 0) {
   3798 		aprint_error("%s: config_cfattach_attach failed %d\n",
   3799 		    __func__, error);
   3800 #ifdef _MODULE
   3801 		config_cfdriver_detach(&raid_cd);
   3802 #endif
   3803 		devsw_detach(&raid_bdevsw, &raid_cdevsw);
   3804 		mutex_exit(&raid_lock);
   3805 		return error;
   3806 	}
   3807 
   3808 	raidautoconfigdone = false;
   3809 
   3810 	mutex_exit(&raid_lock);
   3811 
   3812 	if (error == 0) {
   3813 		if (rf_BootRaidframe(true) == 0)
   3814 			aprint_verbose("Kernelized RAIDframe activated\n");
   3815 		else
   3816 			panic("Serious error activating RAID!!");
   3817 	}
   3818 
   3819 	/*
   3820 	 * Register a finalizer which will be used to auto-config RAID
   3821 	 * sets once all real hardware devices have been found.
   3822 	 */
   3823 	error = config_finalize_register(NULL, rf_autoconfig);
   3824 	if (error != 0) {
   3825 		aprint_error("WARNING: unable to register RAIDframe "
   3826 		    "finalizer\n");
   3827 		error = 0;
   3828 	}
   3829 
   3830 	return error;
   3831 }
   3832 
   3833 static int
   3834 raid_modcmd_fini(void)
   3835 {
   3836 	int error;
   3837 
   3838 	mutex_enter(&raid_lock);
   3839 
   3840 	/* Don't allow unload if raid device(s) exist.  */
   3841 	if (!LIST_EMPTY(&raids)) {
   3842 		mutex_exit(&raid_lock);
   3843 		return EBUSY;
   3844 	}
   3845 
   3846 	error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
   3847 	if (error != 0) {
   3848 		aprint_error("%s: cannot detach cfattach\n",__func__);
   3849 		mutex_exit(&raid_lock);
   3850 		return error;
   3851 	}
   3852 #ifdef _MODULE
   3853 	error = config_cfdriver_detach(&raid_cd);
   3854 	if (error != 0) {
   3855 		aprint_error("%s: cannot detach cfdriver\n",__func__);
   3856 		config_cfattach_attach(raid_cd.cd_name, &raid_ca);
   3857 		mutex_exit(&raid_lock);
   3858 		return error;
   3859 	}
   3860 #endif
   3861 	error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
   3862 	if (error != 0) {
   3863 		aprint_error("%s: cannot detach devsw\n",__func__);
   3864 #ifdef _MODULE
   3865 		config_cfdriver_attach(&raid_cd);
   3866 #endif
   3867 		config_cfattach_attach(raid_cd.cd_name, &raid_ca);
   3868 		mutex_exit(&raid_lock);
   3869 		return error;
   3870 	}
   3871 	rf_BootRaidframe(false);
   3872 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   3873 	rf_destroy_mutex2(rf_sparet_wait_mutex);
   3874 	rf_destroy_cond2(rf_sparet_wait_cv);
   3875 	rf_destroy_cond2(rf_sparet_resp_cv);
   3876 #endif
   3877 	mutex_exit(&raid_lock);
   3878 	mutex_destroy(&raid_lock);
   3879 
   3880 	return error;
   3881 }
   3882