Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.335
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.335 2016/01/03 08:17:24 mlelstv Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Greg Oster; Jason R. Thorpe.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Copyright (c) 1988 University of Utah.
     34  * Copyright (c) 1990, 1993
     35  *      The Regents of the University of California.  All rights reserved.
     36  *
     37  * This code is derived from software contributed to Berkeley by
     38  * the Systems Programming Group of the University of Utah Computer
     39  * Science Department.
     40  *
     41  * Redistribution and use in source and binary forms, with or without
     42  * modification, are permitted provided that the following conditions
     43  * are met:
     44  * 1. Redistributions of source code must retain the above copyright
     45  *    notice, this list of conditions and the following disclaimer.
     46  * 2. Redistributions in binary form must reproduce the above copyright
     47  *    notice, this list of conditions and the following disclaimer in the
     48  *    documentation and/or other materials provided with the distribution.
     49  * 3. Neither the name of the University nor the names of its contributors
     50  *    may be used to endorse or promote products derived from this software
     51  *    without specific prior written permission.
     52  *
     53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     63  * SUCH DAMAGE.
     64  *
     65  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     66  *
     67  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     68  */
     69 
     70 /*
     71  * Copyright (c) 1995 Carnegie-Mellon University.
     72  * All rights reserved.
     73  *
     74  * Authors: Mark Holland, Jim Zelenka
     75  *
     76  * Permission to use, copy, modify and distribute this software and
     77  * its documentation is hereby granted, provided that both the copyright
     78  * notice and this permission notice appear in all copies of the
     79  * software, derivative works or modified versions, and any portions
     80  * thereof, and that both notices appear in supporting documentation.
     81  *
     82  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     83  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     84  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     85  *
     86  * Carnegie Mellon requests users of this software to return to
     87  *
     88  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     89  *  School of Computer Science
     90  *  Carnegie Mellon University
     91  *  Pittsburgh PA 15213-3890
     92  *
     93  * any improvements or extensions that they make and grant Carnegie the
     94  * rights to redistribute these changes.
     95  */
     96 
     97 /***********************************************************
     98  *
     99  * rf_kintf.c -- the kernel interface routines for RAIDframe
    100  *
    101  ***********************************************************/
    102 
    103 #include <sys/cdefs.h>
    104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.335 2016/01/03 08:17:24 mlelstv Exp $");
    105 
    106 #ifdef _KERNEL_OPT
    107 #include "opt_compat_netbsd.h"
    108 #include "opt_raid_autoconfig.h"
    109 #endif
    110 
    111 #include <sys/param.h>
    112 #include <sys/errno.h>
    113 #include <sys/pool.h>
    114 #include <sys/proc.h>
    115 #include <sys/queue.h>
    116 #include <sys/disk.h>
    117 #include <sys/device.h>
    118 #include <sys/stat.h>
    119 #include <sys/ioctl.h>
    120 #include <sys/fcntl.h>
    121 #include <sys/systm.h>
    122 #include <sys/vnode.h>
    123 #include <sys/disklabel.h>
    124 #include <sys/conf.h>
    125 #include <sys/buf.h>
    126 #include <sys/bufq.h>
    127 #include <sys/reboot.h>
    128 #include <sys/kauth.h>
    129 #include <sys/module.h>
    130 
    131 #include <prop/proplib.h>
    132 
    133 #include <dev/raidframe/raidframevar.h>
    134 #include <dev/raidframe/raidframeio.h>
    135 #include <dev/raidframe/rf_paritymap.h>
    136 
    137 #include "rf_raid.h"
    138 #include "rf_copyback.h"
    139 #include "rf_dag.h"
    140 #include "rf_dagflags.h"
    141 #include "rf_desc.h"
    142 #include "rf_diskqueue.h"
    143 #include "rf_etimer.h"
    144 #include "rf_general.h"
    145 #include "rf_kintf.h"
    146 #include "rf_options.h"
    147 #include "rf_driver.h"
    148 #include "rf_parityscan.h"
    149 #include "rf_threadstuff.h"
    150 
    151 #ifdef COMPAT_50
    152 #include "rf_compat50.h"
    153 #endif
    154 
    155 #include "ioconf.h"
    156 
    157 #ifdef DEBUG
    158 int     rf_kdebug_level = 0;
    159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    160 #else				/* DEBUG */
    161 #define db1_printf(a) { }
    162 #endif				/* DEBUG */
    163 
    164 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
    165 static rf_declare_mutex2(rf_sparet_wait_mutex);
    166 static rf_declare_cond2(rf_sparet_wait_cv);
    167 static rf_declare_cond2(rf_sparet_resp_cv);
    168 
    169 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    170 						 * spare table */
    171 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    172 						 * installation process */
    173 #endif
    174 
    175 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
    176 
    177 /* prototypes */
    178 static void KernelWakeupFunc(struct buf *);
    179 static void InitBP(struct buf *, struct vnode *, unsigned,
    180     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
    181     void *, int, struct proc *);
    182 struct raid_softc;
    183 static void raidinit(struct raid_softc *);
    184 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
    185 
    186 static int raid_match(device_t, cfdata_t, void *);
    187 static void raid_attach(device_t, device_t, void *);
    188 static int raid_detach(device_t, int);
    189 
    190 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
    191     daddr_t, daddr_t);
    192 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
    193     daddr_t, daddr_t, int);
    194 
    195 static int raidwrite_component_label(unsigned,
    196     dev_t, struct vnode *, RF_ComponentLabel_t *);
    197 static int raidread_component_label(unsigned,
    198     dev_t, struct vnode *, RF_ComponentLabel_t *);
    199 
    200 static int raid_diskstart(device_t, struct buf *bp);
    201 static int raid_dumpblocks(device_t, void *, daddr_t, int);
    202 static int raid_lastclose(device_t);
    203 
    204 static dev_type_open(raidopen);
    205 static dev_type_close(raidclose);
    206 static dev_type_read(raidread);
    207 static dev_type_write(raidwrite);
    208 static dev_type_ioctl(raidioctl);
    209 static dev_type_strategy(raidstrategy);
    210 static dev_type_dump(raiddump);
    211 static dev_type_size(raidsize);
    212 
    213 const struct bdevsw raid_bdevsw = {
    214 	.d_open = raidopen,
    215 	.d_close = raidclose,
    216 	.d_strategy = raidstrategy,
    217 	.d_ioctl = raidioctl,
    218 	.d_dump = raiddump,
    219 	.d_psize = raidsize,
    220 	.d_discard = nodiscard,
    221 	.d_flag = D_DISK
    222 };
    223 
    224 const struct cdevsw raid_cdevsw = {
    225 	.d_open = raidopen,
    226 	.d_close = raidclose,
    227 	.d_read = raidread,
    228 	.d_write = raidwrite,
    229 	.d_ioctl = raidioctl,
    230 	.d_stop = nostop,
    231 	.d_tty = notty,
    232 	.d_poll = nopoll,
    233 	.d_mmap = nommap,
    234 	.d_kqfilter = nokqfilter,
    235 	.d_discard = nodiscard,
    236 	.d_flag = D_DISK
    237 };
    238 
    239 static struct dkdriver rf_dkdriver = {
    240 	.d_open = raidopen,
    241 	.d_close = raidclose,
    242 	.d_strategy = raidstrategy,
    243 	.d_diskstart = raid_diskstart,
    244 	.d_dumpblocks = raid_dumpblocks,
    245 	.d_lastclose = raid_lastclose,
    246 	.d_minphys = minphys
    247 };
    248 
    249 struct raid_softc {
    250 	struct dk_softc sc_dksc;
    251 	int	sc_unit;
    252 	int     sc_flags;	/* flags */
    253 	int     sc_cflags;	/* configuration flags */
    254 	kmutex_t sc_mutex;	/* interlock mutex */
    255 	kcondvar_t sc_cv;	/* and the condvar */
    256 	uint64_t sc_size;	/* size of the raid device */
    257 	char    sc_xname[20];	/* XXX external name */
    258 	RF_Raid_t sc_r;
    259 	LIST_ENTRY(raid_softc) sc_link;
    260 };
    261 /* sc_flags */
    262 #define RAIDF_INITED	0x01	/* unit has been initialized */
    263 #define RAIDF_WLABEL	0x02	/* label area is writable */
    264 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    265 #define RAIDF_SHUTDOWN	0x08	/* unit is being shutdown */
    266 #define RAIDF_DETACH  	0x10	/* detach after final close */
    267 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    268 #define RAIDF_LOCKED	0x80	/* unit is locked */
    269 
    270 #define	raidunit(x)	DISKUNIT(x)
    271 #define	raidsoftc(dev)	(((struct raid_softc *)device_private(dev))->sc_r.softc)
    272 
    273 extern struct cfdriver raid_cd;
    274 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
    275     raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
    276     DVF_DETACH_SHUTDOWN);
    277 
    278 /*
    279  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    280  * Be aware that large numbers can allow the driver to consume a lot of
    281  * kernel memory, especially on writes, and in degraded mode reads.
    282  *
    283  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    284  * a single 64K write will typically require 64K for the old data,
    285  * 64K for the old parity, and 64K for the new parity, for a total
    286  * of 192K (if the parity buffer is not re-used immediately).
    287  * Even it if is used immediately, that's still 128K, which when multiplied
    288  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    289  *
    290  * Now in degraded mode, for example, a 64K read on the above setup may
    291  * require data reconstruction, which will require *all* of the 4 remaining
    292  * disks to participate -- 4 * 32K/disk == 128K again.
    293  */
    294 
    295 #ifndef RAIDOUTSTANDING
    296 #define RAIDOUTSTANDING   6
    297 #endif
    298 
    299 #define RAIDLABELDEV(dev)	\
    300 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    301 
    302 /* declared here, and made public, for the benefit of KVM stuff.. */
    303 
    304 static int raidlock(struct raid_softc *);
    305 static void raidunlock(struct raid_softc *);
    306 
    307 static int raid_detach_unlocked(struct raid_softc *);
    308 
    309 static void rf_markalldirty(RF_Raid_t *);
    310 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
    311 
    312 void rf_ReconThread(struct rf_recon_req *);
    313 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    314 void rf_CopybackThread(RF_Raid_t *raidPtr);
    315 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    316 int rf_autoconfig(device_t);
    317 void rf_buildroothack(RF_ConfigSet_t *);
    318 
    319 RF_AutoConfig_t *rf_find_raid_components(void);
    320 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    321 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    322 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
    323 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    324 int rf_set_autoconfig(RF_Raid_t *, int);
    325 int rf_set_rootpartition(RF_Raid_t *, int);
    326 void rf_release_all_vps(RF_ConfigSet_t *);
    327 void rf_cleanup_config_set(RF_ConfigSet_t *);
    328 int rf_have_enough_components(RF_ConfigSet_t *);
    329 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
    330 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
    331 
    332 /*
    333  * Debugging, mostly.  Set to 0 to not allow autoconfig to take place.
    334  * Note that this is overridden by having RAID_AUTOCONFIG as an option
    335  * in the kernel config file.
    336  */
    337 #ifdef RAID_AUTOCONFIG
    338 int raidautoconfig = 1;
    339 #else
    340 int raidautoconfig = 0;
    341 #endif
    342 static bool raidautoconfigdone = false;
    343 
    344 struct RF_Pools_s rf_pools;
    345 
    346 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
    347 static kmutex_t raid_lock;
    348 
    349 static struct raid_softc *
    350 raidcreate(int unit) {
    351 	struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
    352 	if (sc == NULL) {
    353 #ifdef DIAGNOSTIC
    354 		printf("%s: out of memory\n", __func__);
    355 #endif
    356 		return NULL;
    357 	}
    358 	sc->sc_unit = unit;
    359 	cv_init(&sc->sc_cv, "raidunit");
    360 	mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
    361 	return sc;
    362 }
    363 
    364 static void
    365 raiddestroy(struct raid_softc *sc) {
    366 	cv_destroy(&sc->sc_cv);
    367 	mutex_destroy(&sc->sc_mutex);
    368 	kmem_free(sc, sizeof(*sc));
    369 }
    370 
    371 static struct raid_softc *
    372 raidget(int unit, bool create) {
    373 	struct raid_softc *sc;
    374 	if (unit < 0) {
    375 #ifdef DIAGNOSTIC
    376 		panic("%s: unit %d!", __func__, unit);
    377 #endif
    378 		return NULL;
    379 	}
    380 	mutex_enter(&raid_lock);
    381 	LIST_FOREACH(sc, &raids, sc_link) {
    382 		if (sc->sc_unit == unit) {
    383 			mutex_exit(&raid_lock);
    384 			return sc;
    385 		}
    386 	}
    387 	mutex_exit(&raid_lock);
    388 	if (!create)
    389 		return NULL;
    390 	if ((sc = raidcreate(unit)) == NULL)
    391 		return NULL;
    392 	mutex_enter(&raid_lock);
    393 	LIST_INSERT_HEAD(&raids, sc, sc_link);
    394 	mutex_exit(&raid_lock);
    395 	return sc;
    396 }
    397 
    398 static void
    399 raidput(struct raid_softc *sc) {
    400 	mutex_enter(&raid_lock);
    401 	LIST_REMOVE(sc, sc_link);
    402 	mutex_exit(&raid_lock);
    403 	raiddestroy(sc);
    404 }
    405 
    406 void
    407 raidattach(int num)
    408 {
    409 
    410 	/*
    411 	 * Device attachment and associated initialization now occurs
    412 	 * as part of the module initialization.
    413 	 */
    414 }
    415 
    416 int
    417 rf_autoconfig(device_t self)
    418 {
    419 	RF_AutoConfig_t *ac_list;
    420 	RF_ConfigSet_t *config_sets;
    421 
    422 	if (!raidautoconfig || raidautoconfigdone == true)
    423 		return (0);
    424 
    425 	/* XXX This code can only be run once. */
    426 	raidautoconfigdone = true;
    427 
    428 #ifdef __HAVE_CPU_BOOTCONF
    429 	/*
    430 	 * 0. find the boot device if needed first so we can use it later
    431 	 * this needs to be done before we autoconfigure any raid sets,
    432 	 * because if we use wedges we are not going to be able to open
    433 	 * the boot device later
    434 	 */
    435 	if (booted_device == NULL)
    436 		cpu_bootconf();
    437 #endif
    438 	/* 1. locate all RAID components on the system */
    439 	aprint_debug("Searching for RAID components...\n");
    440 	ac_list = rf_find_raid_components();
    441 
    442 	/* 2. Sort them into their respective sets. */
    443 	config_sets = rf_create_auto_sets(ac_list);
    444 
    445 	/*
    446 	 * 3. Evaluate each set and configure the valid ones.
    447 	 * This gets done in rf_buildroothack().
    448 	 */
    449 	rf_buildroothack(config_sets);
    450 
    451 	return 1;
    452 }
    453 
    454 static int
    455 rf_containsboot(RF_Raid_t *r, device_t bdv) {
    456 	const char *bootname = device_xname(bdv);
    457 	size_t len = strlen(bootname);
    458 
    459 	for (int col = 0; col < r->numCol; col++) {
    460 		const char *devname = r->Disks[col].devname;
    461 		devname += sizeof("/dev/") - 1;
    462 		if (strncmp(devname, "dk", 2) == 0) {
    463 			const char *parent =
    464 			    dkwedge_get_parent_name(r->Disks[col].dev);
    465 			if (parent != NULL)
    466 				devname = parent;
    467 		}
    468 		if (strncmp(devname, bootname, len) == 0) {
    469 			struct raid_softc *sc = r->softc;
    470 			aprint_debug("raid%d includes boot device %s\n",
    471 			    sc->sc_unit, devname);
    472 			return 1;
    473 		}
    474 	}
    475 	return 0;
    476 }
    477 
    478 void
    479 rf_buildroothack(RF_ConfigSet_t *config_sets)
    480 {
    481 	RF_ConfigSet_t *cset;
    482 	RF_ConfigSet_t *next_cset;
    483 	int num_root;
    484 	struct raid_softc *sc, *rsc;
    485 	struct dk_softc *dksc;
    486 
    487 	sc = rsc = NULL;
    488 	num_root = 0;
    489 	cset = config_sets;
    490 	while (cset != NULL) {
    491 		next_cset = cset->next;
    492 		if (rf_have_enough_components(cset) &&
    493 		    cset->ac->clabel->autoconfigure == 1) {
    494 			sc = rf_auto_config_set(cset);
    495 			if (sc != NULL) {
    496 				aprint_debug("raid%d: configured ok\n",
    497 				    sc->sc_unit);
    498 				if (cset->rootable) {
    499 					rsc = sc;
    500 					num_root++;
    501 				}
    502 			} else {
    503 				/* The autoconfig didn't work :( */
    504 				aprint_debug("Autoconfig failed\n");
    505 				rf_release_all_vps(cset);
    506 			}
    507 		} else {
    508 			/* we're not autoconfiguring this set...
    509 			   release the associated resources */
    510 			rf_release_all_vps(cset);
    511 		}
    512 		/* cleanup */
    513 		rf_cleanup_config_set(cset);
    514 		cset = next_cset;
    515 	}
    516 	dksc = &rsc->sc_dksc;
    517 
    518 	/* if the user has specified what the root device should be
    519 	   then we don't touch booted_device or boothowto... */
    520 
    521 	if (rootspec != NULL)
    522 		return;
    523 
    524 	/* we found something bootable... */
    525 
    526 	/*
    527 	 * XXX: The following code assumes that the root raid
    528 	 * is the first ('a') partition. This is about the best
    529 	 * we can do with a BSD disklabel, but we might be able
    530 	 * to do better with a GPT label, by setting a specified
    531 	 * attribute to indicate the root partition. We can then
    532 	 * stash the partition number in the r->root_partition
    533 	 * high bits (the bottom 2 bits are already used). For
    534 	 * now we just set booted_partition to 0 when we override
    535 	 * root.
    536 	 */
    537 	if (num_root == 1) {
    538 		device_t candidate_root;
    539 		if (dksc->sc_dkdev.dk_nwedges != 0) {
    540 			char cname[sizeof(cset->ac->devname)];
    541 			/* XXX: assume 'a' */
    542 			snprintf(cname, sizeof(cname), "%s%c",
    543 			    device_xname(dksc->sc_dev), 'a');
    544 			candidate_root = dkwedge_find_by_wname(cname);
    545 		} else
    546 			candidate_root = dksc->sc_dev;
    547 		if (booted_device == NULL ||
    548 		    rsc->sc_r.root_partition == 1 ||
    549 		    rf_containsboot(&rsc->sc_r, booted_device)) {
    550 			booted_device = candidate_root;
    551 			booted_partition = 0;	/* XXX assume 'a' */
    552 		}
    553 	} else if (num_root > 1) {
    554 
    555 		/*
    556 		 * Maybe the MD code can help. If it cannot, then
    557 		 * setroot() will discover that we have no
    558 		 * booted_device and will ask the user if nothing was
    559 		 * hardwired in the kernel config file
    560 		 */
    561 		if (booted_device == NULL)
    562 			return;
    563 
    564 		num_root = 0;
    565 		mutex_enter(&raid_lock);
    566 		LIST_FOREACH(sc, &raids, sc_link) {
    567 			RF_Raid_t *r = &sc->sc_r;
    568 			if (r->valid == 0)
    569 				continue;
    570 
    571 			if (r->root_partition == 0)
    572 				continue;
    573 
    574 			if (rf_containsboot(r, booted_device)) {
    575 				num_root++;
    576 				rsc = sc;
    577 				dksc = &rsc->sc_dksc;
    578 			}
    579 		}
    580 		mutex_exit(&raid_lock);
    581 
    582 		if (num_root == 1) {
    583 			booted_device = dksc->sc_dev;
    584 			booted_partition = 0;	/* XXX assume 'a' */
    585 		} else {
    586 			/* we can't guess.. require the user to answer... */
    587 			boothowto |= RB_ASKNAME;
    588 		}
    589 	}
    590 }
    591 
    592 static int
    593 raidsize(dev_t dev)
    594 {
    595 	struct raid_softc *rs;
    596 	struct dk_softc *dksc;
    597 	unsigned int unit;
    598 
    599 	unit = raidunit(dev);
    600 	if ((rs = raidget(unit, false)) == NULL)
    601 		return ENXIO;
    602 	dksc = &rs->sc_dksc;
    603 
    604 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    605 		return (ENODEV);
    606 
    607 	return dk_size(dksc, dev);
    608 }
    609 
    610 static int
    611 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
    612 {
    613 	unsigned int unit;
    614 	struct raid_softc *rs;
    615 	struct dk_softc *dksc;
    616 
    617 	unit = raidunit(dev);
    618 	if ((rs = raidget(unit, false)) == NULL)
    619 		return ENXIO;
    620 	dksc = &rs->sc_dksc;
    621 
    622 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    623 		return ENODEV;
    624 
    625 	return dk_dump(dksc, dev, blkno, va, size);
    626 }
    627 
    628 static int
    629 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
    630 {
    631 	struct raid_softc *rs = raidsoftc(dev);
    632 	const struct bdevsw *bdev;
    633 	RF_Raid_t *raidPtr;
    634 	int     c, sparecol, j, scol, dumpto;
    635 	int     error = 0;
    636 
    637 	raidPtr = &rs->sc_r;
    638 
    639 	/* we only support dumping to RAID 1 sets */
    640 	if (raidPtr->Layout.numDataCol != 1 ||
    641 	    raidPtr->Layout.numParityCol != 1)
    642 		return EINVAL;
    643 
    644 	if ((error = raidlock(rs)) != 0)
    645 		return error;
    646 
    647 	/* figure out what device is alive.. */
    648 
    649 	/*
    650 	   Look for a component to dump to.  The preference for the
    651 	   component to dump to is as follows:
    652 	   1) the master
    653 	   2) a used_spare of the master
    654 	   3) the slave
    655 	   4) a used_spare of the slave
    656 	*/
    657 
    658 	dumpto = -1;
    659 	for (c = 0; c < raidPtr->numCol; c++) {
    660 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
    661 			/* this might be the one */
    662 			dumpto = c;
    663 			break;
    664 		}
    665 	}
    666 
    667 	/*
    668 	   At this point we have possibly selected a live master or a
    669 	   live slave.  We now check to see if there is a spared
    670 	   master (or a spared slave), if we didn't find a live master
    671 	   or a live slave.
    672 	*/
    673 
    674 	for (c = 0; c < raidPtr->numSpare; c++) {
    675 		sparecol = raidPtr->numCol + c;
    676 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
    677 			/* How about this one? */
    678 			scol = -1;
    679 			for(j=0;j<raidPtr->numCol;j++) {
    680 				if (raidPtr->Disks[j].spareCol == sparecol) {
    681 					scol = j;
    682 					break;
    683 				}
    684 			}
    685 			if (scol == 0) {
    686 				/*
    687 				   We must have found a spared master!
    688 				   We'll take that over anything else
    689 				   found so far.  (We couldn't have
    690 				   found a real master before, since
    691 				   this is a used spare, and it's
    692 				   saying that it's replacing the
    693 				   master.)  On reboot (with
    694 				   autoconfiguration turned on)
    695 				   sparecol will become the 1st
    696 				   component (component0) of this set.
    697 				*/
    698 				dumpto = sparecol;
    699 				break;
    700 			} else if (scol != -1) {
    701 				/*
    702 				   Must be a spared slave.  We'll dump
    703 				   to that if we havn't found anything
    704 				   else so far.
    705 				*/
    706 				if (dumpto == -1)
    707 					dumpto = sparecol;
    708 			}
    709 		}
    710 	}
    711 
    712 	if (dumpto == -1) {
    713 		/* we couldn't find any live components to dump to!?!?
    714 		 */
    715 		error = EINVAL;
    716 		goto out;
    717 	}
    718 
    719 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
    720 
    721 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
    722 				blkno, va, nblk);
    723 
    724 out:
    725 	raidunlock(rs);
    726 
    727 	return error;
    728 }
    729 
    730 /* ARGSUSED */
    731 static int
    732 raidopen(dev_t dev, int flags, int fmt,
    733     struct lwp *l)
    734 {
    735 	int     unit = raidunit(dev);
    736 	struct raid_softc *rs;
    737 	struct dk_softc *dksc;
    738 	int     error = 0;
    739 	int     part, pmask;
    740 
    741 	if ((rs = raidget(unit, true)) == NULL)
    742 		return ENXIO;
    743 	if ((error = raidlock(rs)) != 0)
    744 		return (error);
    745 
    746 	if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
    747 		error = EBUSY;
    748 		goto bad;
    749 	}
    750 
    751 	dksc = &rs->sc_dksc;
    752 
    753 	part = DISKPART(dev);
    754 	pmask = (1 << part);
    755 
    756 	if (!DK_BUSY(dksc, pmask) &&
    757 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    758 		/* First one... mark things as dirty... Note that we *MUST*
    759 		 have done a configure before this.  I DO NOT WANT TO BE
    760 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    761 		 THAT THEY BELONG TOGETHER!!!!! */
    762 		/* XXX should check to see if we're only open for reading
    763 		   here... If so, we needn't do this, but then need some
    764 		   other way of keeping track of what's happened.. */
    765 
    766 		rf_markalldirty(&rs->sc_r);
    767 	}
    768 
    769 	if ((rs->sc_flags & RAIDF_INITED) != 0)
    770 		error = dk_open(dksc, dev, flags, fmt, l);
    771 
    772 bad:
    773 	raidunlock(rs);
    774 
    775 	return (error);
    776 
    777 
    778 }
    779 
    780 static int
    781 raid_lastclose(device_t self)
    782 {
    783 	struct raid_softc *rs = raidsoftc(self);
    784 
    785 	/* Last one... device is not unconfigured yet.
    786 	   Device shutdown has taken care of setting the
    787 	   clean bits if RAIDF_INITED is not set
    788 	   mark things as clean... */
    789 
    790 	rf_update_component_labels(&rs->sc_r,
    791 	    RF_FINAL_COMPONENT_UPDATE);
    792 
    793 	/* pass to unlocked code */
    794 	if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
    795 		rs->sc_flags |= RAIDF_DETACH;
    796 
    797 	return 0;
    798 }
    799 
    800 /* ARGSUSED */
    801 static int
    802 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
    803 {
    804 	int     unit = raidunit(dev);
    805 	struct raid_softc *rs;
    806 	struct dk_softc *dksc;
    807 	cfdata_t cf;
    808 	int     error = 0, do_detach = 0, do_put = 0;
    809 
    810 	if ((rs = raidget(unit, false)) == NULL)
    811 		return ENXIO;
    812 	dksc = &rs->sc_dksc;
    813 
    814 	if ((error = raidlock(rs)) != 0)
    815 		return (error);
    816 
    817 	if ((rs->sc_flags & RAIDF_INITED) != 0) {
    818 		error = dk_close(dksc, dev, flags, fmt, l);
    819 		if ((rs->sc_flags & RAIDF_DETACH) != 0)
    820 			do_detach = 1;
    821 	} else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
    822 		do_put = 1;
    823 
    824 	raidunlock(rs);
    825 
    826 	if (do_detach) {
    827 		/* free the pseudo device attach bits */
    828 		cf = device_cfdata(dksc->sc_dev);
    829 		error = config_detach(dksc->sc_dev, 0);
    830 		if (error == 0)
    831 			free(cf, M_RAIDFRAME);
    832 	} else if (do_put) {
    833 		raidput(rs);
    834 	}
    835 
    836 	return (error);
    837 
    838 }
    839 
    840 static void
    841 raid_wakeup(RF_Raid_t *raidPtr)
    842 {
    843 	rf_lock_mutex2(raidPtr->iodone_lock);
    844 	rf_signal_cond2(raidPtr->iodone_cv);
    845 	rf_unlock_mutex2(raidPtr->iodone_lock);
    846 }
    847 
    848 static void
    849 raidstrategy(struct buf *bp)
    850 {
    851 	unsigned int unit;
    852 	struct raid_softc *rs;
    853 	struct dk_softc *dksc;
    854 	RF_Raid_t *raidPtr;
    855 
    856 	unit = raidunit(bp->b_dev);
    857 	if ((rs = raidget(unit, false)) == NULL) {
    858 		bp->b_error = ENXIO;
    859 		goto fail;
    860 	}
    861 	if ((rs->sc_flags & RAIDF_INITED) == 0) {
    862 		bp->b_error = ENXIO;
    863 		goto fail;
    864 	}
    865 	dksc = &rs->sc_dksc;
    866 	raidPtr = &rs->sc_r;
    867 
    868 	/* Queue IO only */
    869 	if (dk_strategy_defer(dksc, bp))
    870 		goto done;
    871 
    872 	/* schedule the IO to happen at the next convenient time */
    873 	raid_wakeup(raidPtr);
    874 
    875 done:
    876 	return;
    877 
    878 fail:
    879 	bp->b_resid = bp->b_bcount;
    880 	biodone(bp);
    881 }
    882 
    883 static int
    884 raid_diskstart(device_t dev, struct buf *bp)
    885 {
    886 	struct raid_softc *rs = raidsoftc(dev);
    887 	RF_Raid_t *raidPtr;
    888 
    889 	raidPtr = &rs->sc_r;
    890 	if (!raidPtr->valid) {
    891 		db1_printf(("raid is not valid..\n"));
    892 		return ENODEV;
    893 	}
    894 
    895 	/* XXX */
    896 	bp->b_resid = 0;
    897 
    898 	return raiddoaccess(raidPtr, bp);
    899 }
    900 
    901 void
    902 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
    903 {
    904 	struct raid_softc *rs;
    905 	struct dk_softc *dksc;
    906 
    907 	rs = raidPtr->softc;
    908 	dksc = &rs->sc_dksc;
    909 
    910 	dk_done(dksc, bp);
    911 
    912 	rf_lock_mutex2(raidPtr->mutex);
    913 	raidPtr->openings++;
    914 	rf_unlock_mutex2(raidPtr->mutex);
    915 
    916 	/* schedule more IO */
    917 	raid_wakeup(raidPtr);
    918 }
    919 
    920 /* ARGSUSED */
    921 static int
    922 raidread(dev_t dev, struct uio *uio, int flags)
    923 {
    924 	int     unit = raidunit(dev);
    925 	struct raid_softc *rs;
    926 
    927 	if ((rs = raidget(unit, false)) == NULL)
    928 		return ENXIO;
    929 
    930 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    931 		return (ENXIO);
    932 
    933 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    934 
    935 }
    936 
    937 /* ARGSUSED */
    938 static int
    939 raidwrite(dev_t dev, struct uio *uio, int flags)
    940 {
    941 	int     unit = raidunit(dev);
    942 	struct raid_softc *rs;
    943 
    944 	if ((rs = raidget(unit, false)) == NULL)
    945 		return ENXIO;
    946 
    947 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    948 		return (ENXIO);
    949 
    950 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    951 
    952 }
    953 
    954 static int
    955 raid_detach_unlocked(struct raid_softc *rs)
    956 {
    957 	struct dk_softc *dksc = &rs->sc_dksc;
    958 	RF_Raid_t *raidPtr;
    959 	int error;
    960 
    961 	raidPtr = &rs->sc_r;
    962 
    963 	if (DK_BUSY(dksc, 0))
    964 		return EBUSY;
    965 
    966 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    967 		return 0;
    968 
    969 	rs->sc_flags &= ~RAIDF_SHUTDOWN;
    970 
    971 	if ((error = rf_Shutdown(raidPtr)) != 0)
    972 		return error;
    973 
    974 	rs->sc_flags &= ~RAIDF_INITED;
    975 
    976 	/* Kill off any queued buffers */
    977 	dk_drain(dksc);
    978 	bufq_free(dksc->sc_bufq);
    979 
    980 	/* Detach the disk. */
    981 	dkwedge_delall(&dksc->sc_dkdev);
    982 	disk_detach(&dksc->sc_dkdev);
    983 	disk_destroy(&dksc->sc_dkdev);
    984 	dk_detach(dksc);
    985 
    986 	return 0;
    987 }
    988 
    989 static int
    990 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
    991 {
    992 	int     unit = raidunit(dev);
    993 	int     error = 0;
    994 	int     part, pmask;
    995 	struct raid_softc *rs;
    996 	struct dk_softc *dksc;
    997 	RF_Config_t *k_cfg, *u_cfg;
    998 	RF_Raid_t *raidPtr;
    999 	RF_RaidDisk_t *diskPtr;
   1000 	RF_AccTotals_t *totals;
   1001 	RF_DeviceConfig_t *d_cfg, **ucfgp;
   1002 	u_char *specific_buf;
   1003 	int retcode = 0;
   1004 	int column;
   1005 /*	int raidid; */
   1006 	struct rf_recon_req *rrcopy, *rr;
   1007 	RF_ComponentLabel_t *clabel;
   1008 	RF_ComponentLabel_t *ci_label;
   1009 	RF_ComponentLabel_t **clabel_ptr;
   1010 	RF_SingleComponent_t *sparePtr,*componentPtr;
   1011 	RF_SingleComponent_t component;
   1012 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
   1013 	int i, j, d;
   1014 
   1015 	if ((rs = raidget(unit, false)) == NULL)
   1016 		return ENXIO;
   1017 	dksc = &rs->sc_dksc;
   1018 	raidPtr = &rs->sc_r;
   1019 
   1020 	db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
   1021 		(int) DISKPART(dev), (int) unit, cmd));
   1022 
   1023 	/* Must be initialized for these... */
   1024 	switch (cmd) {
   1025 	case RAIDFRAME_REWRITEPARITY:
   1026 	case RAIDFRAME_GET_INFO:
   1027 	case RAIDFRAME_RESET_ACCTOTALS:
   1028 	case RAIDFRAME_GET_ACCTOTALS:
   1029 	case RAIDFRAME_KEEP_ACCTOTALS:
   1030 	case RAIDFRAME_GET_SIZE:
   1031 	case RAIDFRAME_FAIL_DISK:
   1032 	case RAIDFRAME_COPYBACK:
   1033 	case RAIDFRAME_CHECK_RECON_STATUS:
   1034 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1035 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1036 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1037 	case RAIDFRAME_ADD_HOT_SPARE:
   1038 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1039 	case RAIDFRAME_INIT_LABELS:
   1040 	case RAIDFRAME_REBUILD_IN_PLACE:
   1041 	case RAIDFRAME_CHECK_PARITY:
   1042 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1043 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1044 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1045 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1046 	case RAIDFRAME_SET_AUTOCONFIG:
   1047 	case RAIDFRAME_SET_ROOT:
   1048 	case RAIDFRAME_DELETE_COMPONENT:
   1049 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1050 	case RAIDFRAME_PARITYMAP_STATUS:
   1051 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
   1052 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
   1053 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
   1054 		if ((rs->sc_flags & RAIDF_INITED) == 0)
   1055 			return (ENXIO);
   1056 	}
   1057 
   1058 	switch (cmd) {
   1059 #ifdef COMPAT_50
   1060 	case RAIDFRAME_GET_INFO50:
   1061 		return rf_get_info50(raidPtr, data);
   1062 
   1063 	case RAIDFRAME_CONFIGURE50:
   1064 		if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
   1065 			return retcode;
   1066 		goto config;
   1067 #endif
   1068 		/* configure the system */
   1069 	case RAIDFRAME_CONFIGURE:
   1070 
   1071 		if (raidPtr->valid) {
   1072 			/* There is a valid RAID set running on this unit! */
   1073 			printf("raid%d: Device already configured!\n",unit);
   1074 			return(EINVAL);
   1075 		}
   1076 
   1077 		/* copy-in the configuration information */
   1078 		/* data points to a pointer to the configuration structure */
   1079 
   1080 		u_cfg = *((RF_Config_t **) data);
   1081 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
   1082 		if (k_cfg == NULL) {
   1083 			return (ENOMEM);
   1084 		}
   1085 		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
   1086 		if (retcode) {
   1087 			RF_Free(k_cfg, sizeof(RF_Config_t));
   1088 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
   1089 				retcode));
   1090 			goto no_config;
   1091 		}
   1092 		goto config;
   1093 	config:
   1094 		rs->sc_flags &= ~RAIDF_SHUTDOWN;
   1095 
   1096 		/* allocate a buffer for the layout-specific data, and copy it
   1097 		 * in */
   1098 		if (k_cfg->layoutSpecificSize) {
   1099 			if (k_cfg->layoutSpecificSize > 10000) {
   1100 				/* sanity check */
   1101 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1102 				retcode = EINVAL;
   1103 				goto no_config;
   1104 			}
   1105 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
   1106 			    (u_char *));
   1107 			if (specific_buf == NULL) {
   1108 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1109 				retcode = ENOMEM;
   1110 				goto no_config;
   1111 			}
   1112 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
   1113 			    k_cfg->layoutSpecificSize);
   1114 			if (retcode) {
   1115 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1116 				RF_Free(specific_buf,
   1117 					k_cfg->layoutSpecificSize);
   1118 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
   1119 					retcode));
   1120 				goto no_config;
   1121 			}
   1122 		} else
   1123 			specific_buf = NULL;
   1124 		k_cfg->layoutSpecific = specific_buf;
   1125 
   1126 		/* should do some kind of sanity check on the configuration.
   1127 		 * Store the sum of all the bytes in the last byte? */
   1128 
   1129 		/* configure the system */
   1130 
   1131 		/*
   1132 		 * Clear the entire RAID descriptor, just to make sure
   1133 		 *  there is no stale data left in the case of a
   1134 		 *  reconfiguration
   1135 		 */
   1136 		memset(raidPtr, 0, sizeof(*raidPtr));
   1137 		raidPtr->softc = rs;
   1138 		raidPtr->raidid = unit;
   1139 
   1140 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
   1141 
   1142 		if (retcode == 0) {
   1143 
   1144 			/* allow this many simultaneous IO's to
   1145 			   this RAID device */
   1146 			raidPtr->openings = RAIDOUTSTANDING;
   1147 
   1148 			raidinit(rs);
   1149 			raid_wakeup(raidPtr);
   1150 			rf_markalldirty(raidPtr);
   1151 		}
   1152 		/* free the buffers.  No return code here. */
   1153 		if (k_cfg->layoutSpecificSize) {
   1154 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
   1155 		}
   1156 		RF_Free(k_cfg, sizeof(RF_Config_t));
   1157 
   1158 	no_config:
   1159 		/*
   1160 		 * If configuration failed, set sc_flags so that we
   1161 		 * will detach the device when we close it.
   1162 		 */
   1163 		if (retcode != 0)
   1164 			rs->sc_flags |= RAIDF_SHUTDOWN;
   1165 		return (retcode);
   1166 
   1167 		/* shutdown the system */
   1168 	case RAIDFRAME_SHUTDOWN:
   1169 
   1170 		part = DISKPART(dev);
   1171 		pmask = (1 << part);
   1172 
   1173 		if ((error = raidlock(rs)) != 0)
   1174 			return (error);
   1175 
   1176 		if (DK_BUSY(dksc, pmask))
   1177 			retcode = EBUSY;
   1178 		else {
   1179 			/* detach and free on close */
   1180 			rs->sc_flags |= RAIDF_SHUTDOWN;
   1181 			retcode = 0;
   1182 		}
   1183 
   1184 		raidunlock(rs);
   1185 
   1186 		return (retcode);
   1187 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1188 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1189 		/* need to read the component label for the disk indicated
   1190 		   by row,column in clabel */
   1191 
   1192 		/*
   1193 		 * Perhaps there should be an option to skip the in-core
   1194 		 * copy and hit the disk, as with disklabel(8).
   1195 		 */
   1196 		RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
   1197 
   1198 		retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
   1199 
   1200 		if (retcode) {
   1201 			RF_Free(clabel, sizeof(*clabel));
   1202 			return retcode;
   1203 		}
   1204 
   1205 		clabel->row = 0; /* Don't allow looking at anything else.*/
   1206 
   1207 		column = clabel->column;
   1208 
   1209 		if ((column < 0) || (column >= raidPtr->numCol +
   1210 		    raidPtr->numSpare)) {
   1211 			RF_Free(clabel, sizeof(*clabel));
   1212 			return EINVAL;
   1213 		}
   1214 
   1215 		RF_Free(clabel, sizeof(*clabel));
   1216 
   1217 		clabel = raidget_component_label(raidPtr, column);
   1218 
   1219 		return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
   1220 
   1221 #if 0
   1222 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1223 		clabel = (RF_ComponentLabel_t *) data;
   1224 
   1225 		/* XXX check the label for valid stuff... */
   1226 		/* Note that some things *should not* get modified --
   1227 		   the user should be re-initing the labels instead of
   1228 		   trying to patch things.
   1229 		   */
   1230 
   1231 		raidid = raidPtr->raidid;
   1232 #ifdef DEBUG
   1233 		printf("raid%d: Got component label:\n", raidid);
   1234 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1235 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1236 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1237 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1238 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1239 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1240 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1241 #endif
   1242 		clabel->row = 0;
   1243 		column = clabel->column;
   1244 
   1245 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1246 			return(EINVAL);
   1247 		}
   1248 
   1249 		/* XXX this isn't allowed to do anything for now :-) */
   1250 
   1251 		/* XXX and before it is, we need to fill in the rest
   1252 		   of the fields!?!?!?! */
   1253 		memcpy(raidget_component_label(raidPtr, column),
   1254 		    clabel, sizeof(*clabel));
   1255 		raidflush_component_label(raidPtr, column);
   1256 		return (0);
   1257 #endif
   1258 
   1259 	case RAIDFRAME_INIT_LABELS:
   1260 		clabel = (RF_ComponentLabel_t *) data;
   1261 		/*
   1262 		   we only want the serial number from
   1263 		   the above.  We get all the rest of the information
   1264 		   from the config that was used to create this RAID
   1265 		   set.
   1266 		   */
   1267 
   1268 		raidPtr->serial_number = clabel->serial_number;
   1269 
   1270 		for(column=0;column<raidPtr->numCol;column++) {
   1271 			diskPtr = &raidPtr->Disks[column];
   1272 			if (!RF_DEAD_DISK(diskPtr->status)) {
   1273 				ci_label = raidget_component_label(raidPtr,
   1274 				    column);
   1275 				/* Zeroing this is important. */
   1276 				memset(ci_label, 0, sizeof(*ci_label));
   1277 				raid_init_component_label(raidPtr, ci_label);
   1278 				ci_label->serial_number =
   1279 				    raidPtr->serial_number;
   1280 				ci_label->row = 0; /* we dont' pretend to support more */
   1281 				rf_component_label_set_partitionsize(ci_label,
   1282 				    diskPtr->partitionSize);
   1283 				ci_label->column = column;
   1284 				raidflush_component_label(raidPtr, column);
   1285 			}
   1286 			/* XXXjld what about the spares? */
   1287 		}
   1288 
   1289 		return (retcode);
   1290 	case RAIDFRAME_SET_AUTOCONFIG:
   1291 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1292 		printf("raid%d: New autoconfig value is: %d\n",
   1293 		       raidPtr->raidid, d);
   1294 		*(int *) data = d;
   1295 		return (retcode);
   1296 
   1297 	case RAIDFRAME_SET_ROOT:
   1298 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1299 		printf("raid%d: New rootpartition value is: %d\n",
   1300 		       raidPtr->raidid, d);
   1301 		*(int *) data = d;
   1302 		return (retcode);
   1303 
   1304 		/* initialize all parity */
   1305 	case RAIDFRAME_REWRITEPARITY:
   1306 
   1307 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1308 			/* Parity for RAID 0 is trivially correct */
   1309 			raidPtr->parity_good = RF_RAID_CLEAN;
   1310 			return(0);
   1311 		}
   1312 
   1313 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1314 			/* Re-write is already in progress! */
   1315 			return(EINVAL);
   1316 		}
   1317 
   1318 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1319 					   rf_RewriteParityThread,
   1320 					   raidPtr,"raid_parity");
   1321 		return (retcode);
   1322 
   1323 
   1324 	case RAIDFRAME_ADD_HOT_SPARE:
   1325 		sparePtr = (RF_SingleComponent_t *) data;
   1326 		memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
   1327 		retcode = rf_add_hot_spare(raidPtr, &component);
   1328 		return(retcode);
   1329 
   1330 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1331 		return(retcode);
   1332 
   1333 	case RAIDFRAME_DELETE_COMPONENT:
   1334 		componentPtr = (RF_SingleComponent_t *)data;
   1335 		memcpy( &component, componentPtr,
   1336 			sizeof(RF_SingleComponent_t));
   1337 		retcode = rf_delete_component(raidPtr, &component);
   1338 		return(retcode);
   1339 
   1340 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1341 		componentPtr = (RF_SingleComponent_t *)data;
   1342 		memcpy( &component, componentPtr,
   1343 			sizeof(RF_SingleComponent_t));
   1344 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1345 		return(retcode);
   1346 
   1347 	case RAIDFRAME_REBUILD_IN_PLACE:
   1348 
   1349 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1350 			/* Can't do this on a RAID 0!! */
   1351 			return(EINVAL);
   1352 		}
   1353 
   1354 		if (raidPtr->recon_in_progress == 1) {
   1355 			/* a reconstruct is already in progress! */
   1356 			return(EINVAL);
   1357 		}
   1358 
   1359 		componentPtr = (RF_SingleComponent_t *) data;
   1360 		memcpy( &component, componentPtr,
   1361 			sizeof(RF_SingleComponent_t));
   1362 		component.row = 0; /* we don't support any more */
   1363 		column = component.column;
   1364 
   1365 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1366 			return(EINVAL);
   1367 		}
   1368 
   1369 		rf_lock_mutex2(raidPtr->mutex);
   1370 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
   1371 		    (raidPtr->numFailures > 0)) {
   1372 			/* XXX 0 above shouldn't be constant!!! */
   1373 			/* some component other than this has failed.
   1374 			   Let's not make things worse than they already
   1375 			   are... */
   1376 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1377 			       raidPtr->raidid);
   1378 			printf("raid%d:     Col: %d   Too many failures.\n",
   1379 			       raidPtr->raidid, column);
   1380 			rf_unlock_mutex2(raidPtr->mutex);
   1381 			return (EINVAL);
   1382 		}
   1383 		if (raidPtr->Disks[column].status ==
   1384 		    rf_ds_reconstructing) {
   1385 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1386 			       raidPtr->raidid);
   1387 			printf("raid%d:    Col: %d   Reconstruction already occurring!\n", raidPtr->raidid, column);
   1388 
   1389 			rf_unlock_mutex2(raidPtr->mutex);
   1390 			return (EINVAL);
   1391 		}
   1392 		if (raidPtr->Disks[column].status == rf_ds_spared) {
   1393 			rf_unlock_mutex2(raidPtr->mutex);
   1394 			return (EINVAL);
   1395 		}
   1396 		rf_unlock_mutex2(raidPtr->mutex);
   1397 
   1398 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1399 		if (rrcopy == NULL)
   1400 			return(ENOMEM);
   1401 
   1402 		rrcopy->raidPtr = (void *) raidPtr;
   1403 		rrcopy->col = column;
   1404 
   1405 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1406 					   rf_ReconstructInPlaceThread,
   1407 					   rrcopy,"raid_reconip");
   1408 		return(retcode);
   1409 
   1410 	case RAIDFRAME_GET_INFO:
   1411 		if (!raidPtr->valid)
   1412 			return (ENODEV);
   1413 		ucfgp = (RF_DeviceConfig_t **) data;
   1414 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1415 			  (RF_DeviceConfig_t *));
   1416 		if (d_cfg == NULL)
   1417 			return (ENOMEM);
   1418 		d_cfg->rows = 1; /* there is only 1 row now */
   1419 		d_cfg->cols = raidPtr->numCol;
   1420 		d_cfg->ndevs = raidPtr->numCol;
   1421 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1422 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1423 			return (ENOMEM);
   1424 		}
   1425 		d_cfg->nspares = raidPtr->numSpare;
   1426 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1427 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1428 			return (ENOMEM);
   1429 		}
   1430 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1431 		d = 0;
   1432 		for (j = 0; j < d_cfg->cols; j++) {
   1433 			d_cfg->devs[d] = raidPtr->Disks[j];
   1434 			d++;
   1435 		}
   1436 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1437 			d_cfg->spares[i] = raidPtr->Disks[j];
   1438 			if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
   1439 				/* XXX: raidctl(8) expects to see this as a used spare */
   1440 				d_cfg->spares[i].status = rf_ds_used_spare;
   1441 			}
   1442 		}
   1443 		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
   1444 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1445 
   1446 		return (retcode);
   1447 
   1448 	case RAIDFRAME_CHECK_PARITY:
   1449 		*(int *) data = raidPtr->parity_good;
   1450 		return (0);
   1451 
   1452 	case RAIDFRAME_PARITYMAP_STATUS:
   1453 		if (rf_paritymap_ineligible(raidPtr))
   1454 			return EINVAL;
   1455 		rf_paritymap_status(raidPtr->parity_map,
   1456 		    (struct rf_pmstat *)data);
   1457 		return 0;
   1458 
   1459 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
   1460 		if (rf_paritymap_ineligible(raidPtr))
   1461 			return EINVAL;
   1462 		if (raidPtr->parity_map == NULL)
   1463 			return ENOENT; /* ??? */
   1464 		if (0 != rf_paritymap_set_params(raidPtr->parity_map,
   1465 			(struct rf_pmparams *)data, 1))
   1466 			return EINVAL;
   1467 		return 0;
   1468 
   1469 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
   1470 		if (rf_paritymap_ineligible(raidPtr))
   1471 			return EINVAL;
   1472 		*(int *) data = rf_paritymap_get_disable(raidPtr);
   1473 		return 0;
   1474 
   1475 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
   1476 		if (rf_paritymap_ineligible(raidPtr))
   1477 			return EINVAL;
   1478 		rf_paritymap_set_disable(raidPtr, *(int *)data);
   1479 		/* XXX should errors be passed up? */
   1480 		return 0;
   1481 
   1482 	case RAIDFRAME_RESET_ACCTOTALS:
   1483 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1484 		return (0);
   1485 
   1486 	case RAIDFRAME_GET_ACCTOTALS:
   1487 		totals = (RF_AccTotals_t *) data;
   1488 		*totals = raidPtr->acc_totals;
   1489 		return (0);
   1490 
   1491 	case RAIDFRAME_KEEP_ACCTOTALS:
   1492 		raidPtr->keep_acc_totals = *(int *)data;
   1493 		return (0);
   1494 
   1495 	case RAIDFRAME_GET_SIZE:
   1496 		*(int *) data = raidPtr->totalSectors;
   1497 		return (0);
   1498 
   1499 		/* fail a disk & optionally start reconstruction */
   1500 	case RAIDFRAME_FAIL_DISK:
   1501 
   1502 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1503 			/* Can't do this on a RAID 0!! */
   1504 			return(EINVAL);
   1505 		}
   1506 
   1507 		rr = (struct rf_recon_req *) data;
   1508 		rr->row = 0;
   1509 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
   1510 			return (EINVAL);
   1511 
   1512 
   1513 		rf_lock_mutex2(raidPtr->mutex);
   1514 		if (raidPtr->status == rf_rs_reconstructing) {
   1515 			/* you can't fail a disk while we're reconstructing! */
   1516 			/* XXX wrong for RAID6 */
   1517 			rf_unlock_mutex2(raidPtr->mutex);
   1518 			return (EINVAL);
   1519 		}
   1520 		if ((raidPtr->Disks[rr->col].status ==
   1521 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
   1522 			/* some other component has failed.  Let's not make
   1523 			   things worse. XXX wrong for RAID6 */
   1524 			rf_unlock_mutex2(raidPtr->mutex);
   1525 			return (EINVAL);
   1526 		}
   1527 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
   1528 			/* Can't fail a spared disk! */
   1529 			rf_unlock_mutex2(raidPtr->mutex);
   1530 			return (EINVAL);
   1531 		}
   1532 		rf_unlock_mutex2(raidPtr->mutex);
   1533 
   1534 		/* make a copy of the recon request so that we don't rely on
   1535 		 * the user's buffer */
   1536 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1537 		if (rrcopy == NULL)
   1538 			return(ENOMEM);
   1539 		memcpy(rrcopy, rr, sizeof(*rr));
   1540 		rrcopy->raidPtr = (void *) raidPtr;
   1541 
   1542 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1543 					   rf_ReconThread,
   1544 					   rrcopy,"raid_recon");
   1545 		return (0);
   1546 
   1547 		/* invoke a copyback operation after recon on whatever disk
   1548 		 * needs it, if any */
   1549 	case RAIDFRAME_COPYBACK:
   1550 
   1551 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1552 			/* This makes no sense on a RAID 0!! */
   1553 			return(EINVAL);
   1554 		}
   1555 
   1556 		if (raidPtr->copyback_in_progress == 1) {
   1557 			/* Copyback is already in progress! */
   1558 			return(EINVAL);
   1559 		}
   1560 
   1561 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1562 					   rf_CopybackThread,
   1563 					   raidPtr,"raid_copyback");
   1564 		return (retcode);
   1565 
   1566 		/* return the percentage completion of reconstruction */
   1567 	case RAIDFRAME_CHECK_RECON_STATUS:
   1568 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1569 			/* This makes no sense on a RAID 0, so tell the
   1570 			   user it's done. */
   1571 			*(int *) data = 100;
   1572 			return(0);
   1573 		}
   1574 		if (raidPtr->status != rf_rs_reconstructing)
   1575 			*(int *) data = 100;
   1576 		else {
   1577 			if (raidPtr->reconControl->numRUsTotal > 0) {
   1578 				*(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
   1579 			} else {
   1580 				*(int *) data = 0;
   1581 			}
   1582 		}
   1583 		return (0);
   1584 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1585 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1586 		if (raidPtr->status != rf_rs_reconstructing) {
   1587 			progressInfo.remaining = 0;
   1588 			progressInfo.completed = 100;
   1589 			progressInfo.total = 100;
   1590 		} else {
   1591 			progressInfo.total =
   1592 				raidPtr->reconControl->numRUsTotal;
   1593 			progressInfo.completed =
   1594 				raidPtr->reconControl->numRUsComplete;
   1595 			progressInfo.remaining = progressInfo.total -
   1596 				progressInfo.completed;
   1597 		}
   1598 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1599 				  sizeof(RF_ProgressInfo_t));
   1600 		return (retcode);
   1601 
   1602 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1603 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1604 			/* This makes no sense on a RAID 0, so tell the
   1605 			   user it's done. */
   1606 			*(int *) data = 100;
   1607 			return(0);
   1608 		}
   1609 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1610 			*(int *) data = 100 *
   1611 				raidPtr->parity_rewrite_stripes_done /
   1612 				raidPtr->Layout.numStripe;
   1613 		} else {
   1614 			*(int *) data = 100;
   1615 		}
   1616 		return (0);
   1617 
   1618 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1619 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1620 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1621 			progressInfo.total = raidPtr->Layout.numStripe;
   1622 			progressInfo.completed =
   1623 				raidPtr->parity_rewrite_stripes_done;
   1624 			progressInfo.remaining = progressInfo.total -
   1625 				progressInfo.completed;
   1626 		} else {
   1627 			progressInfo.remaining = 0;
   1628 			progressInfo.completed = 100;
   1629 			progressInfo.total = 100;
   1630 		}
   1631 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1632 				  sizeof(RF_ProgressInfo_t));
   1633 		return (retcode);
   1634 
   1635 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1636 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1637 			/* This makes no sense on a RAID 0 */
   1638 			*(int *) data = 100;
   1639 			return(0);
   1640 		}
   1641 		if (raidPtr->copyback_in_progress == 1) {
   1642 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1643 				raidPtr->Layout.numStripe;
   1644 		} else {
   1645 			*(int *) data = 100;
   1646 		}
   1647 		return (0);
   1648 
   1649 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1650 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1651 		if (raidPtr->copyback_in_progress == 1) {
   1652 			progressInfo.total = raidPtr->Layout.numStripe;
   1653 			progressInfo.completed =
   1654 				raidPtr->copyback_stripes_done;
   1655 			progressInfo.remaining = progressInfo.total -
   1656 				progressInfo.completed;
   1657 		} else {
   1658 			progressInfo.remaining = 0;
   1659 			progressInfo.completed = 100;
   1660 			progressInfo.total = 100;
   1661 		}
   1662 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1663 				  sizeof(RF_ProgressInfo_t));
   1664 		return (retcode);
   1665 
   1666 		/* the sparetable daemon calls this to wait for the kernel to
   1667 		 * need a spare table. this ioctl does not return until a
   1668 		 * spare table is needed. XXX -- calling mpsleep here in the
   1669 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1670 		 * -- I should either compute the spare table in the kernel,
   1671 		 * or have a different -- XXX XXX -- interface (a different
   1672 		 * character device) for delivering the table     -- XXX */
   1673 #if 0
   1674 	case RAIDFRAME_SPARET_WAIT:
   1675 		rf_lock_mutex2(rf_sparet_wait_mutex);
   1676 		while (!rf_sparet_wait_queue)
   1677 			rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
   1678 		waitreq = rf_sparet_wait_queue;
   1679 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1680 		rf_unlock_mutex2(rf_sparet_wait_mutex);
   1681 
   1682 		/* structure assignment */
   1683 		*((RF_SparetWait_t *) data) = *waitreq;
   1684 
   1685 		RF_Free(waitreq, sizeof(*waitreq));
   1686 		return (0);
   1687 
   1688 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1689 		 * code in it that will cause the dameon to exit */
   1690 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1691 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1692 		waitreq->fcol = -1;
   1693 		rf_lock_mutex2(rf_sparet_wait_mutex);
   1694 		waitreq->next = rf_sparet_wait_queue;
   1695 		rf_sparet_wait_queue = waitreq;
   1696 		rf_broadcast_conf2(rf_sparet_wait_cv);
   1697 		rf_unlock_mutex2(rf_sparet_wait_mutex);
   1698 		return (0);
   1699 
   1700 		/* used by the spare table daemon to deliver a spare table
   1701 		 * into the kernel */
   1702 	case RAIDFRAME_SEND_SPARET:
   1703 
   1704 		/* install the spare table */
   1705 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1706 
   1707 		/* respond to the requestor.  the return status of the spare
   1708 		 * table installation is passed in the "fcol" field */
   1709 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1710 		waitreq->fcol = retcode;
   1711 		rf_lock_mutex2(rf_sparet_wait_mutex);
   1712 		waitreq->next = rf_sparet_resp_queue;
   1713 		rf_sparet_resp_queue = waitreq;
   1714 		rf_broadcast_cond2(rf_sparet_resp_cv);
   1715 		rf_unlock_mutex2(rf_sparet_wait_mutex);
   1716 
   1717 		return (retcode);
   1718 #endif
   1719 
   1720 	default:
   1721 		break; /* fall through to the os-specific code below */
   1722 
   1723 	}
   1724 
   1725 	if (!raidPtr->valid)
   1726 		return (EINVAL);
   1727 
   1728 	/*
   1729 	 * Add support for "regular" device ioctls here.
   1730 	 */
   1731 
   1732 	error = dk_ioctl(dksc, dev, cmd, data, flag, l);
   1733 	if (error != EPASSTHROUGH)
   1734 		return (error);
   1735 
   1736 	switch (cmd) {
   1737 	case DIOCCACHESYNC:
   1738 		return rf_sync_component_caches(raidPtr);
   1739 
   1740 	default:
   1741 		retcode = ENOTTY;
   1742 	}
   1743 	return (retcode);
   1744 
   1745 }
   1746 
   1747 
   1748 /* raidinit -- complete the rest of the initialization for the
   1749    RAIDframe device.  */
   1750 
   1751 
   1752 static void
   1753 raidinit(struct raid_softc *rs)
   1754 {
   1755 	cfdata_t cf;
   1756 	unsigned int unit;
   1757 	struct dk_softc *dksc = &rs->sc_dksc;
   1758 	RF_Raid_t *raidPtr = &rs->sc_r;
   1759 	device_t dev;
   1760 
   1761 	unit = raidPtr->raidid;
   1762 
   1763 	/* XXX doesn't check bounds. */
   1764 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
   1765 
   1766 	/* attach the pseudo device */
   1767 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
   1768 	cf->cf_name = raid_cd.cd_name;
   1769 	cf->cf_atname = raid_cd.cd_name;
   1770 	cf->cf_unit = unit;
   1771 	cf->cf_fstate = FSTATE_STAR;
   1772 
   1773 	dev = config_attach_pseudo(cf);
   1774 	if (dev == NULL) {
   1775 		printf("raid%d: config_attach_pseudo failed\n",
   1776 		    raidPtr->raidid);
   1777 		free(cf, M_RAIDFRAME);
   1778 		return;
   1779 	}
   1780 
   1781 	/* provide a backpointer to the real softc */
   1782 	raidsoftc(dev) = rs;
   1783 
   1784 	/* disk_attach actually creates space for the CPU disklabel, among
   1785 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1786 	 * with disklabels. */
   1787 	dk_init(dksc, dev, DKTYPE_RAID);
   1788 	disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
   1789 
   1790 	/* XXX There may be a weird interaction here between this, and
   1791 	 * protectedSectors, as used in RAIDframe.  */
   1792 
   1793 	rs->sc_size = raidPtr->totalSectors;
   1794 
   1795 	/* Attach dk and disk subsystems */
   1796 	dk_attach(dksc);
   1797 	disk_attach(&dksc->sc_dkdev);
   1798 	rf_set_geometry(rs, raidPtr);
   1799 
   1800 	bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
   1801 
   1802 	/* mark unit as usuable */
   1803 	rs->sc_flags |= RAIDF_INITED;
   1804 
   1805 	dkwedge_discover(&dksc->sc_dkdev);
   1806 }
   1807 
   1808 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   1809 /* wake up the daemon & tell it to get us a spare table
   1810  * XXX
   1811  * the entries in the queues should be tagged with the raidPtr
   1812  * so that in the extremely rare case that two recons happen at once,
   1813  * we know for which device were requesting a spare table
   1814  * XXX
   1815  *
   1816  * XXX This code is not currently used. GO
   1817  */
   1818 int
   1819 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
   1820 {
   1821 	int     retcode;
   1822 
   1823 	rf_lock_mutex2(rf_sparet_wait_mutex);
   1824 	req->next = rf_sparet_wait_queue;
   1825 	rf_sparet_wait_queue = req;
   1826 	rf_broadcast_cond2(rf_sparet_wait_cv);
   1827 
   1828 	/* mpsleep unlocks the mutex */
   1829 	while (!rf_sparet_resp_queue) {
   1830 		rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
   1831 	}
   1832 	req = rf_sparet_resp_queue;
   1833 	rf_sparet_resp_queue = req->next;
   1834 	rf_unlock_mutex2(rf_sparet_wait_mutex);
   1835 
   1836 	retcode = req->fcol;
   1837 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1838 					 * alloc'd */
   1839 	return (retcode);
   1840 }
   1841 #endif
   1842 
   1843 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1844  * bp & passes it down.
   1845  * any calls originating in the kernel must use non-blocking I/O
   1846  * do some extra sanity checking to return "appropriate" error values for
   1847  * certain conditions (to make some standard utilities work)
   1848  *
   1849  * Formerly known as: rf_DoAccessKernel
   1850  */
   1851 void
   1852 raidstart(RF_Raid_t *raidPtr)
   1853 {
   1854 	struct raid_softc *rs;
   1855 	struct dk_softc *dksc;
   1856 
   1857 	rs = raidPtr->softc;
   1858 	dksc = &rs->sc_dksc;
   1859 	/* quick check to see if anything has died recently */
   1860 	rf_lock_mutex2(raidPtr->mutex);
   1861 	if (raidPtr->numNewFailures > 0) {
   1862 		rf_unlock_mutex2(raidPtr->mutex);
   1863 		rf_update_component_labels(raidPtr,
   1864 					   RF_NORMAL_COMPONENT_UPDATE);
   1865 		rf_lock_mutex2(raidPtr->mutex);
   1866 		raidPtr->numNewFailures--;
   1867 	}
   1868 	rf_unlock_mutex2(raidPtr->mutex);
   1869 
   1870 	if ((rs->sc_flags & RAIDF_INITED) == 0) {
   1871 		printf("raid%d: raidstart not ready\n", raidPtr->raidid);
   1872 		return;
   1873 	}
   1874 
   1875 	dk_start(dksc, NULL);
   1876 }
   1877 
   1878 static int
   1879 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
   1880 {
   1881 	RF_SectorCount_t num_blocks, pb, sum;
   1882 	RF_RaidAddr_t raid_addr;
   1883 	daddr_t blocknum;
   1884 	int     do_async;
   1885 	int rc;
   1886 
   1887 	rf_lock_mutex2(raidPtr->mutex);
   1888 	if (raidPtr->openings == 0) {
   1889 		rf_unlock_mutex2(raidPtr->mutex);
   1890 		return EAGAIN;
   1891 	}
   1892 	rf_unlock_mutex2(raidPtr->mutex);
   1893 
   1894 	blocknum = bp->b_rawblkno;
   1895 
   1896 	db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1897 		    (int) blocknum));
   1898 
   1899 	db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1900 	db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1901 
   1902 	/* *THIS* is where we adjust what block we're going to...
   1903 	 * but DO NOT TOUCH bp->b_blkno!!! */
   1904 	raid_addr = blocknum;
   1905 
   1906 	num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1907 	pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1908 	sum = raid_addr + num_blocks + pb;
   1909 	if (1 || rf_debugKernelAccess) {
   1910 		db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1911 			    (int) raid_addr, (int) sum, (int) num_blocks,
   1912 			    (int) pb, (int) bp->b_resid));
   1913 	}
   1914 	if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1915 	    || (sum < num_blocks) || (sum < pb)) {
   1916 		rc = ENOSPC;
   1917 		goto done;
   1918 	}
   1919 	/*
   1920 	 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1921 	 */
   1922 
   1923 	if (bp->b_bcount & raidPtr->sectorMask) {
   1924 		rc = ENOSPC;
   1925 		goto done;
   1926 	}
   1927 	db1_printf(("Calling DoAccess..\n"));
   1928 
   1929 
   1930 	rf_lock_mutex2(raidPtr->mutex);
   1931 	raidPtr->openings--;
   1932 	rf_unlock_mutex2(raidPtr->mutex);
   1933 
   1934 	/*
   1935 	 * Everything is async.
   1936 	 */
   1937 	do_async = 1;
   1938 
   1939 	/* don't ever condition on bp->b_flags & B_WRITE.
   1940 	 * always condition on B_READ instead */
   1941 
   1942 	rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1943 			 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1944 			 do_async, raid_addr, num_blocks,
   1945 			 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1946 
   1947 done:
   1948 	return rc;
   1949 }
   1950 
   1951 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1952 
   1953 int
   1954 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
   1955 {
   1956 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1957 	struct buf *bp;
   1958 
   1959 	req->queue = queue;
   1960 	bp = req->bp;
   1961 
   1962 	switch (req->type) {
   1963 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1964 		/* XXX need to do something extra here.. */
   1965 		/* I'm leaving this in, as I've never actually seen it used,
   1966 		 * and I'd like folks to report it... GO */
   1967 		printf(("WAKEUP CALLED\n"));
   1968 		queue->numOutstanding++;
   1969 
   1970 		bp->b_flags = 0;
   1971 		bp->b_private = req;
   1972 
   1973 		KernelWakeupFunc(bp);
   1974 		break;
   1975 
   1976 	case RF_IO_TYPE_READ:
   1977 	case RF_IO_TYPE_WRITE:
   1978 #if RF_ACC_TRACE > 0
   1979 		if (req->tracerec) {
   1980 			RF_ETIMER_START(req->tracerec->timer);
   1981 		}
   1982 #endif
   1983 		InitBP(bp, queue->rf_cinfo->ci_vp,
   1984 		    op, queue->rf_cinfo->ci_dev,
   1985 		    req->sectorOffset, req->numSector,
   1986 		    req->buf, KernelWakeupFunc, (void *) req,
   1987 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1988 
   1989 		if (rf_debugKernelAccess) {
   1990 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1991 				(long) bp->b_blkno));
   1992 		}
   1993 		queue->numOutstanding++;
   1994 		queue->last_deq_sector = req->sectorOffset;
   1995 		/* acc wouldn't have been let in if there were any pending
   1996 		 * reqs at any other priority */
   1997 		queue->curPriority = req->priority;
   1998 
   1999 		db1_printf(("Going for %c to unit %d col %d\n",
   2000 			    req->type, queue->raidPtr->raidid,
   2001 			    queue->col));
   2002 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   2003 			(int) req->sectorOffset, (int) req->numSector,
   2004 			(int) (req->numSector <<
   2005 			    queue->raidPtr->logBytesPerSector),
   2006 			(int) queue->raidPtr->logBytesPerSector));
   2007 
   2008 		/*
   2009 		 * XXX: drop lock here since this can block at
   2010 		 * least with backing SCSI devices.  Retake it
   2011 		 * to minimize fuss with calling interfaces.
   2012 		 */
   2013 
   2014 		RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
   2015 		bdev_strategy(bp);
   2016 		RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
   2017 		break;
   2018 
   2019 	default:
   2020 		panic("bad req->type in rf_DispatchKernelIO");
   2021 	}
   2022 	db1_printf(("Exiting from DispatchKernelIO\n"));
   2023 
   2024 	return (0);
   2025 }
   2026 /* this is the callback function associated with a I/O invoked from
   2027    kernel code.
   2028  */
   2029 static void
   2030 KernelWakeupFunc(struct buf *bp)
   2031 {
   2032 	RF_DiskQueueData_t *req = NULL;
   2033 	RF_DiskQueue_t *queue;
   2034 
   2035 	db1_printf(("recovering the request queue:\n"));
   2036 
   2037 	req = bp->b_private;
   2038 
   2039 	queue = (RF_DiskQueue_t *) req->queue;
   2040 
   2041 	rf_lock_mutex2(queue->raidPtr->iodone_lock);
   2042 
   2043 #if RF_ACC_TRACE > 0
   2044 	if (req->tracerec) {
   2045 		RF_ETIMER_STOP(req->tracerec->timer);
   2046 		RF_ETIMER_EVAL(req->tracerec->timer);
   2047 		rf_lock_mutex2(rf_tracing_mutex);
   2048 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2049 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2050 		req->tracerec->num_phys_ios++;
   2051 		rf_unlock_mutex2(rf_tracing_mutex);
   2052 	}
   2053 #endif
   2054 
   2055 	/* XXX Ok, let's get aggressive... If b_error is set, let's go
   2056 	 * ballistic, and mark the component as hosed... */
   2057 
   2058 	if (bp->b_error != 0) {
   2059 		/* Mark the disk as dead */
   2060 		/* but only mark it once... */
   2061 		/* and only if it wouldn't leave this RAID set
   2062 		   completely broken */
   2063 		if (((queue->raidPtr->Disks[queue->col].status ==
   2064 		      rf_ds_optimal) ||
   2065 		     (queue->raidPtr->Disks[queue->col].status ==
   2066 		      rf_ds_used_spare)) &&
   2067 		     (queue->raidPtr->numFailures <
   2068 		      queue->raidPtr->Layout.map->faultsTolerated)) {
   2069 			printf("raid%d: IO Error (%d). Marking %s as failed.\n",
   2070 			       queue->raidPtr->raidid,
   2071 			       bp->b_error,
   2072 			       queue->raidPtr->Disks[queue->col].devname);
   2073 			queue->raidPtr->Disks[queue->col].status =
   2074 			    rf_ds_failed;
   2075 			queue->raidPtr->status = rf_rs_degraded;
   2076 			queue->raidPtr->numFailures++;
   2077 			queue->raidPtr->numNewFailures++;
   2078 		} else {	/* Disk is already dead... */
   2079 			/* printf("Disk already marked as dead!\n"); */
   2080 		}
   2081 
   2082 	}
   2083 
   2084 	/* Fill in the error value */
   2085 	req->error = bp->b_error;
   2086 
   2087 	/* Drop this one on the "finished" queue... */
   2088 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   2089 
   2090 	/* Let the raidio thread know there is work to be done. */
   2091 	rf_signal_cond2(queue->raidPtr->iodone_cv);
   2092 
   2093 	rf_unlock_mutex2(queue->raidPtr->iodone_lock);
   2094 }
   2095 
   2096 
   2097 /*
   2098  * initialize a buf structure for doing an I/O in the kernel.
   2099  */
   2100 static void
   2101 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
   2102        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
   2103        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
   2104        struct proc *b_proc)
   2105 {
   2106 	/* bp->b_flags       = B_PHYS | rw_flag; */
   2107 	bp->b_flags = rw_flag;	/* XXX need B_PHYS here too??? */
   2108 	bp->b_oflags = 0;
   2109 	bp->b_cflags = 0;
   2110 	bp->b_bcount = numSect << logBytesPerSector;
   2111 	bp->b_bufsize = bp->b_bcount;
   2112 	bp->b_error = 0;
   2113 	bp->b_dev = dev;
   2114 	bp->b_data = bf;
   2115 	bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
   2116 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   2117 	if (bp->b_bcount == 0) {
   2118 		panic("bp->b_bcount is zero in InitBP!!");
   2119 	}
   2120 	bp->b_proc = b_proc;
   2121 	bp->b_iodone = cbFunc;
   2122 	bp->b_private = cbArg;
   2123 }
   2124 
   2125 /*
   2126  * Wait interruptibly for an exclusive lock.
   2127  *
   2128  * XXX
   2129  * Several drivers do this; it should be abstracted and made MP-safe.
   2130  * (Hmm... where have we seen this warning before :->  GO )
   2131  */
   2132 static int
   2133 raidlock(struct raid_softc *rs)
   2134 {
   2135 	int     error;
   2136 
   2137 	error = 0;
   2138 	mutex_enter(&rs->sc_mutex);
   2139 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2140 		rs->sc_flags |= RAIDF_WANTED;
   2141 		error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
   2142 		if (error != 0)
   2143 			goto done;
   2144 	}
   2145 	rs->sc_flags |= RAIDF_LOCKED;
   2146 done:
   2147 	mutex_exit(&rs->sc_mutex);
   2148 	return (error);
   2149 }
   2150 /*
   2151  * Unlock and wake up any waiters.
   2152  */
   2153 static void
   2154 raidunlock(struct raid_softc *rs)
   2155 {
   2156 
   2157 	mutex_enter(&rs->sc_mutex);
   2158 	rs->sc_flags &= ~RAIDF_LOCKED;
   2159 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2160 		rs->sc_flags &= ~RAIDF_WANTED;
   2161 		cv_broadcast(&rs->sc_cv);
   2162 	}
   2163 	mutex_exit(&rs->sc_mutex);
   2164 }
   2165 
   2166 
   2167 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2168 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2169 #define RF_PARITY_MAP_SIZE   RF_PARITYMAP_NBYTE
   2170 
   2171 static daddr_t
   2172 rf_component_info_offset(void)
   2173 {
   2174 
   2175 	return RF_COMPONENT_INFO_OFFSET;
   2176 }
   2177 
   2178 static daddr_t
   2179 rf_component_info_size(unsigned secsize)
   2180 {
   2181 	daddr_t info_size;
   2182 
   2183 	KASSERT(secsize);
   2184 	if (secsize > RF_COMPONENT_INFO_SIZE)
   2185 		info_size = secsize;
   2186 	else
   2187 		info_size = RF_COMPONENT_INFO_SIZE;
   2188 
   2189 	return info_size;
   2190 }
   2191 
   2192 static daddr_t
   2193 rf_parity_map_offset(RF_Raid_t *raidPtr)
   2194 {
   2195 	daddr_t map_offset;
   2196 
   2197 	KASSERT(raidPtr->bytesPerSector);
   2198 	if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
   2199 		map_offset = raidPtr->bytesPerSector;
   2200 	else
   2201 		map_offset = RF_COMPONENT_INFO_SIZE;
   2202 	map_offset += rf_component_info_offset();
   2203 
   2204 	return map_offset;
   2205 }
   2206 
   2207 static daddr_t
   2208 rf_parity_map_size(RF_Raid_t *raidPtr)
   2209 {
   2210 	daddr_t map_size;
   2211 
   2212 	if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
   2213 		map_size = raidPtr->bytesPerSector;
   2214 	else
   2215 		map_size = RF_PARITY_MAP_SIZE;
   2216 
   2217 	return map_size;
   2218 }
   2219 
   2220 int
   2221 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2222 {
   2223 	RF_ComponentLabel_t *clabel;
   2224 
   2225 	clabel = raidget_component_label(raidPtr, col);
   2226 	clabel->clean = RF_RAID_CLEAN;
   2227 	raidflush_component_label(raidPtr, col);
   2228 	return(0);
   2229 }
   2230 
   2231 
   2232 int
   2233 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2234 {
   2235 	RF_ComponentLabel_t *clabel;
   2236 
   2237 	clabel = raidget_component_label(raidPtr, col);
   2238 	clabel->clean = RF_RAID_DIRTY;
   2239 	raidflush_component_label(raidPtr, col);
   2240 	return(0);
   2241 }
   2242 
   2243 int
   2244 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2245 {
   2246 	KASSERT(raidPtr->bytesPerSector);
   2247 	return raidread_component_label(raidPtr->bytesPerSector,
   2248 	    raidPtr->Disks[col].dev,
   2249 	    raidPtr->raid_cinfo[col].ci_vp,
   2250 	    &raidPtr->raid_cinfo[col].ci_label);
   2251 }
   2252 
   2253 RF_ComponentLabel_t *
   2254 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2255 {
   2256 	return &raidPtr->raid_cinfo[col].ci_label;
   2257 }
   2258 
   2259 int
   2260 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2261 {
   2262 	RF_ComponentLabel_t *label;
   2263 
   2264 	label = &raidPtr->raid_cinfo[col].ci_label;
   2265 	label->mod_counter = raidPtr->mod_counter;
   2266 #ifndef RF_NO_PARITY_MAP
   2267 	label->parity_map_modcount = label->mod_counter;
   2268 #endif
   2269 	return raidwrite_component_label(raidPtr->bytesPerSector,
   2270 	    raidPtr->Disks[col].dev,
   2271 	    raidPtr->raid_cinfo[col].ci_vp, label);
   2272 }
   2273 
   2274 
   2275 static int
   2276 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
   2277     RF_ComponentLabel_t *clabel)
   2278 {
   2279 	return raidread_component_area(dev, b_vp, clabel,
   2280 	    sizeof(RF_ComponentLabel_t),
   2281 	    rf_component_info_offset(),
   2282 	    rf_component_info_size(secsize));
   2283 }
   2284 
   2285 /* ARGSUSED */
   2286 static int
   2287 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
   2288     size_t msize, daddr_t offset, daddr_t dsize)
   2289 {
   2290 	struct buf *bp;
   2291 	int error;
   2292 
   2293 	/* XXX should probably ensure that we don't try to do this if
   2294 	   someone has changed rf_protected_sectors. */
   2295 
   2296 	if (b_vp == NULL) {
   2297 		/* For whatever reason, this component is not valid.
   2298 		   Don't try to read a component label from it. */
   2299 		return(EINVAL);
   2300 	}
   2301 
   2302 	/* get a block of the appropriate size... */
   2303 	bp = geteblk((int)dsize);
   2304 	bp->b_dev = dev;
   2305 
   2306 	/* get our ducks in a row for the read */
   2307 	bp->b_blkno = offset / DEV_BSIZE;
   2308 	bp->b_bcount = dsize;
   2309 	bp->b_flags |= B_READ;
   2310  	bp->b_resid = dsize;
   2311 
   2312 	bdev_strategy(bp);
   2313 	error = biowait(bp);
   2314 
   2315 	if (!error) {
   2316 		memcpy(data, bp->b_data, msize);
   2317 	}
   2318 
   2319 	brelse(bp, 0);
   2320 	return(error);
   2321 }
   2322 
   2323 
   2324 static int
   2325 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
   2326     RF_ComponentLabel_t *clabel)
   2327 {
   2328 	return raidwrite_component_area(dev, b_vp, clabel,
   2329 	    sizeof(RF_ComponentLabel_t),
   2330 	    rf_component_info_offset(),
   2331 	    rf_component_info_size(secsize), 0);
   2332 }
   2333 
   2334 /* ARGSUSED */
   2335 static int
   2336 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
   2337     size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
   2338 {
   2339 	struct buf *bp;
   2340 	int error;
   2341 
   2342 	/* get a block of the appropriate size... */
   2343 	bp = geteblk((int)dsize);
   2344 	bp->b_dev = dev;
   2345 
   2346 	/* get our ducks in a row for the write */
   2347 	bp->b_blkno = offset / DEV_BSIZE;
   2348 	bp->b_bcount = dsize;
   2349 	bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
   2350  	bp->b_resid = dsize;
   2351 
   2352 	memset(bp->b_data, 0, dsize);
   2353 	memcpy(bp->b_data, data, msize);
   2354 
   2355 	bdev_strategy(bp);
   2356 	if (asyncp)
   2357 		return 0;
   2358 	error = biowait(bp);
   2359 	brelse(bp, 0);
   2360 	if (error) {
   2361 #if 1
   2362 		printf("Failed to write RAID component info!\n");
   2363 #endif
   2364 	}
   2365 
   2366 	return(error);
   2367 }
   2368 
   2369 void
   2370 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
   2371 {
   2372 	int c;
   2373 
   2374 	for (c = 0; c < raidPtr->numCol; c++) {
   2375 		/* Skip dead disks. */
   2376 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
   2377 			continue;
   2378 		/* XXXjld: what if an error occurs here? */
   2379 		raidwrite_component_area(raidPtr->Disks[c].dev,
   2380 		    raidPtr->raid_cinfo[c].ci_vp, map,
   2381 		    RF_PARITYMAP_NBYTE,
   2382 		    rf_parity_map_offset(raidPtr),
   2383 		    rf_parity_map_size(raidPtr), 0);
   2384 	}
   2385 }
   2386 
   2387 void
   2388 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
   2389 {
   2390 	struct rf_paritymap_ondisk tmp;
   2391 	int c,first;
   2392 
   2393 	first=1;
   2394 	for (c = 0; c < raidPtr->numCol; c++) {
   2395 		/* Skip dead disks. */
   2396 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
   2397 			continue;
   2398 		raidread_component_area(raidPtr->Disks[c].dev,
   2399 		    raidPtr->raid_cinfo[c].ci_vp, &tmp,
   2400 		    RF_PARITYMAP_NBYTE,
   2401 		    rf_parity_map_offset(raidPtr),
   2402 		    rf_parity_map_size(raidPtr));
   2403 		if (first) {
   2404 			memcpy(map, &tmp, sizeof(*map));
   2405 			first = 0;
   2406 		} else {
   2407 			rf_paritymap_merge(map, &tmp);
   2408 		}
   2409 	}
   2410 }
   2411 
   2412 void
   2413 rf_markalldirty(RF_Raid_t *raidPtr)
   2414 {
   2415 	RF_ComponentLabel_t *clabel;
   2416 	int sparecol;
   2417 	int c;
   2418 	int j;
   2419 	int scol = -1;
   2420 
   2421 	raidPtr->mod_counter++;
   2422 	for (c = 0; c < raidPtr->numCol; c++) {
   2423 		/* we don't want to touch (at all) a disk that has
   2424 		   failed */
   2425 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   2426 			clabel = raidget_component_label(raidPtr, c);
   2427 			if (clabel->status == rf_ds_spared) {
   2428 				/* XXX do something special...
   2429 				   but whatever you do, don't
   2430 				   try to access it!! */
   2431 			} else {
   2432 				raidmarkdirty(raidPtr, c);
   2433 			}
   2434 		}
   2435 	}
   2436 
   2437 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2438 		sparecol = raidPtr->numCol + c;
   2439 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2440 			/*
   2441 
   2442 			   we claim this disk is "optimal" if it's
   2443 			   rf_ds_used_spare, as that means it should be
   2444 			   directly substitutable for the disk it replaced.
   2445 			   We note that too...
   2446 
   2447 			 */
   2448 
   2449 			for(j=0;j<raidPtr->numCol;j++) {
   2450 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2451 					scol = j;
   2452 					break;
   2453 				}
   2454 			}
   2455 
   2456 			clabel = raidget_component_label(raidPtr, sparecol);
   2457 			/* make sure status is noted */
   2458 
   2459 			raid_init_component_label(raidPtr, clabel);
   2460 
   2461 			clabel->row = 0;
   2462 			clabel->column = scol;
   2463 			/* Note: we *don't* change status from rf_ds_used_spare
   2464 			   to rf_ds_optimal */
   2465 			/* clabel.status = rf_ds_optimal; */
   2466 
   2467 			raidmarkdirty(raidPtr, sparecol);
   2468 		}
   2469 	}
   2470 }
   2471 
   2472 
   2473 void
   2474 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
   2475 {
   2476 	RF_ComponentLabel_t *clabel;
   2477 	int sparecol;
   2478 	int c;
   2479 	int j;
   2480 	int scol;
   2481 
   2482 	scol = -1;
   2483 
   2484 	/* XXX should do extra checks to make sure things really are clean,
   2485 	   rather than blindly setting the clean bit... */
   2486 
   2487 	raidPtr->mod_counter++;
   2488 
   2489 	for (c = 0; c < raidPtr->numCol; c++) {
   2490 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   2491 			clabel = raidget_component_label(raidPtr, c);
   2492 			/* make sure status is noted */
   2493 			clabel->status = rf_ds_optimal;
   2494 
   2495 			/* note what unit we are configured as */
   2496 			clabel->last_unit = raidPtr->raidid;
   2497 
   2498 			raidflush_component_label(raidPtr, c);
   2499 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2500 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2501 					raidmarkclean(raidPtr, c);
   2502 				}
   2503 			}
   2504 		}
   2505 		/* else we don't touch it.. */
   2506 	}
   2507 
   2508 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2509 		sparecol = raidPtr->numCol + c;
   2510 		/* Need to ensure that the reconstruct actually completed! */
   2511 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2512 			/*
   2513 
   2514 			   we claim this disk is "optimal" if it's
   2515 			   rf_ds_used_spare, as that means it should be
   2516 			   directly substitutable for the disk it replaced.
   2517 			   We note that too...
   2518 
   2519 			 */
   2520 
   2521 			for(j=0;j<raidPtr->numCol;j++) {
   2522 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2523 					scol = j;
   2524 					break;
   2525 				}
   2526 			}
   2527 
   2528 			/* XXX shouldn't *really* need this... */
   2529 			clabel = raidget_component_label(raidPtr, sparecol);
   2530 			/* make sure status is noted */
   2531 
   2532 			raid_init_component_label(raidPtr, clabel);
   2533 
   2534 			clabel->column = scol;
   2535 			clabel->status = rf_ds_optimal;
   2536 			clabel->last_unit = raidPtr->raidid;
   2537 
   2538 			raidflush_component_label(raidPtr, sparecol);
   2539 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2540 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2541 					raidmarkclean(raidPtr, sparecol);
   2542 				}
   2543 			}
   2544 		}
   2545 	}
   2546 }
   2547 
   2548 void
   2549 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
   2550 {
   2551 
   2552 	if (vp != NULL) {
   2553 		if (auto_configured == 1) {
   2554 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2555 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2556 			vput(vp);
   2557 
   2558 		} else {
   2559 			(void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
   2560 		}
   2561 	}
   2562 }
   2563 
   2564 
   2565 void
   2566 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
   2567 {
   2568 	int r,c;
   2569 	struct vnode *vp;
   2570 	int acd;
   2571 
   2572 
   2573 	/* We take this opportunity to close the vnodes like we should.. */
   2574 
   2575 	for (c = 0; c < raidPtr->numCol; c++) {
   2576 		vp = raidPtr->raid_cinfo[c].ci_vp;
   2577 		acd = raidPtr->Disks[c].auto_configured;
   2578 		rf_close_component(raidPtr, vp, acd);
   2579 		raidPtr->raid_cinfo[c].ci_vp = NULL;
   2580 		raidPtr->Disks[c].auto_configured = 0;
   2581 	}
   2582 
   2583 	for (r = 0; r < raidPtr->numSpare; r++) {
   2584 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
   2585 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
   2586 		rf_close_component(raidPtr, vp, acd);
   2587 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
   2588 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
   2589 	}
   2590 }
   2591 
   2592 
   2593 void
   2594 rf_ReconThread(struct rf_recon_req *req)
   2595 {
   2596 	int     s;
   2597 	RF_Raid_t *raidPtr;
   2598 
   2599 	s = splbio();
   2600 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2601 	raidPtr->recon_in_progress = 1;
   2602 
   2603 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
   2604 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2605 
   2606 	RF_Free(req, sizeof(*req));
   2607 
   2608 	raidPtr->recon_in_progress = 0;
   2609 	splx(s);
   2610 
   2611 	/* That's all... */
   2612 	kthread_exit(0);	/* does not return */
   2613 }
   2614 
   2615 void
   2616 rf_RewriteParityThread(RF_Raid_t *raidPtr)
   2617 {
   2618 	int retcode;
   2619 	int s;
   2620 
   2621 	raidPtr->parity_rewrite_stripes_done = 0;
   2622 	raidPtr->parity_rewrite_in_progress = 1;
   2623 	s = splbio();
   2624 	retcode = rf_RewriteParity(raidPtr);
   2625 	splx(s);
   2626 	if (retcode) {
   2627 		printf("raid%d: Error re-writing parity (%d)!\n",
   2628 		    raidPtr->raidid, retcode);
   2629 	} else {
   2630 		/* set the clean bit!  If we shutdown correctly,
   2631 		   the clean bit on each component label will get
   2632 		   set */
   2633 		raidPtr->parity_good = RF_RAID_CLEAN;
   2634 	}
   2635 	raidPtr->parity_rewrite_in_progress = 0;
   2636 
   2637 	/* Anyone waiting for us to stop?  If so, inform them... */
   2638 	if (raidPtr->waitShutdown) {
   2639 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2640 	}
   2641 
   2642 	/* That's all... */
   2643 	kthread_exit(0);	/* does not return */
   2644 }
   2645 
   2646 
   2647 void
   2648 rf_CopybackThread(RF_Raid_t *raidPtr)
   2649 {
   2650 	int s;
   2651 
   2652 	raidPtr->copyback_in_progress = 1;
   2653 	s = splbio();
   2654 	rf_CopybackReconstructedData(raidPtr);
   2655 	splx(s);
   2656 	raidPtr->copyback_in_progress = 0;
   2657 
   2658 	/* That's all... */
   2659 	kthread_exit(0);	/* does not return */
   2660 }
   2661 
   2662 
   2663 void
   2664 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
   2665 {
   2666 	int s;
   2667 	RF_Raid_t *raidPtr;
   2668 
   2669 	s = splbio();
   2670 	raidPtr = req->raidPtr;
   2671 	raidPtr->recon_in_progress = 1;
   2672 	rf_ReconstructInPlace(raidPtr, req->col);
   2673 	RF_Free(req, sizeof(*req));
   2674 	raidPtr->recon_in_progress = 0;
   2675 	splx(s);
   2676 
   2677 	/* That's all... */
   2678 	kthread_exit(0);	/* does not return */
   2679 }
   2680 
   2681 static RF_AutoConfig_t *
   2682 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
   2683     const char *cname, RF_SectorCount_t size, uint64_t numsecs,
   2684     unsigned secsize)
   2685 {
   2686 	int good_one = 0;
   2687 	RF_ComponentLabel_t *clabel;
   2688 	RF_AutoConfig_t *ac;
   2689 
   2690 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
   2691 	if (clabel == NULL) {
   2692 oomem:
   2693 		    while(ac_list) {
   2694 			    ac = ac_list;
   2695 			    if (ac->clabel)
   2696 				    free(ac->clabel, M_RAIDFRAME);
   2697 			    ac_list = ac_list->next;
   2698 			    free(ac, M_RAIDFRAME);
   2699 		    }
   2700 		    printf("RAID auto config: out of memory!\n");
   2701 		    return NULL; /* XXX probably should panic? */
   2702 	}
   2703 
   2704 	if (!raidread_component_label(secsize, dev, vp, clabel)) {
   2705 		/* Got the label.  Does it look reasonable? */
   2706 		if (rf_reasonable_label(clabel, numsecs) &&
   2707 		    (rf_component_label_partitionsize(clabel) <= size)) {
   2708 #ifdef DEBUG
   2709 			printf("Component on: %s: %llu\n",
   2710 				cname, (unsigned long long)size);
   2711 			rf_print_component_label(clabel);
   2712 #endif
   2713 			/* if it's reasonable, add it, else ignore it. */
   2714 			ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
   2715 				M_NOWAIT);
   2716 			if (ac == NULL) {
   2717 				free(clabel, M_RAIDFRAME);
   2718 				goto oomem;
   2719 			}
   2720 			strlcpy(ac->devname, cname, sizeof(ac->devname));
   2721 			ac->dev = dev;
   2722 			ac->vp = vp;
   2723 			ac->clabel = clabel;
   2724 			ac->next = ac_list;
   2725 			ac_list = ac;
   2726 			good_one = 1;
   2727 		}
   2728 	}
   2729 	if (!good_one) {
   2730 		/* cleanup */
   2731 		free(clabel, M_RAIDFRAME);
   2732 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2733 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2734 		vput(vp);
   2735 	}
   2736 	return ac_list;
   2737 }
   2738 
   2739 RF_AutoConfig_t *
   2740 rf_find_raid_components(void)
   2741 {
   2742 	struct vnode *vp;
   2743 	struct disklabel label;
   2744 	device_t dv;
   2745 	deviter_t di;
   2746 	dev_t dev;
   2747 	int bmajor, bminor, wedge, rf_part_found;
   2748 	int error;
   2749 	int i;
   2750 	RF_AutoConfig_t *ac_list;
   2751 	uint64_t numsecs;
   2752 	unsigned secsize;
   2753 	int dowedges;
   2754 
   2755 	/* initialize the AutoConfig list */
   2756 	ac_list = NULL;
   2757 
   2758 	/*
   2759 	 * we begin by trolling through *all* the devices on the system *twice*
   2760 	 * first we scan for wedges, second for other devices. This avoids
   2761 	 * using a raw partition instead of a wedge that covers the whole disk
   2762 	 */
   2763 
   2764 	for (dowedges=1; dowedges>=0; --dowedges) {
   2765 		for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
   2766 		     dv = deviter_next(&di)) {
   2767 
   2768 			/* we are only interested in disks... */
   2769 			if (device_class(dv) != DV_DISK)
   2770 				continue;
   2771 
   2772 			/* we don't care about floppies... */
   2773 			if (device_is_a(dv, "fd")) {
   2774 				continue;
   2775 			}
   2776 
   2777 			/* we don't care about CD's... */
   2778 			if (device_is_a(dv, "cd")) {
   2779 				continue;
   2780 			}
   2781 
   2782 			/* we don't care about md's... */
   2783 			if (device_is_a(dv, "md")) {
   2784 				continue;
   2785 			}
   2786 
   2787 			/* hdfd is the Atari/Hades floppy driver */
   2788 			if (device_is_a(dv, "hdfd")) {
   2789 				continue;
   2790 			}
   2791 
   2792 			/* fdisa is the Atari/Milan floppy driver */
   2793 			if (device_is_a(dv, "fdisa")) {
   2794 				continue;
   2795 			}
   2796 
   2797 			/* are we in the wedges pass ? */
   2798 			wedge = device_is_a(dv, "dk");
   2799 			if (wedge != dowedges) {
   2800 				continue;
   2801 			}
   2802 
   2803 			/* need to find the device_name_to_block_device_major stuff */
   2804 			bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
   2805 
   2806 			rf_part_found = 0; /*No raid partition as yet*/
   2807 
   2808 			/* get a vnode for the raw partition of this disk */
   2809 			bminor = minor(device_unit(dv));
   2810 			dev = wedge ? makedev(bmajor, bminor) :
   2811 			    MAKEDISKDEV(bmajor, bminor, RAW_PART);
   2812 			if (bdevvp(dev, &vp))
   2813 				panic("RAID can't alloc vnode");
   2814 
   2815 			error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
   2816 
   2817 			if (error) {
   2818 				/* "Who cares."  Continue looking
   2819 				   for something that exists*/
   2820 				vput(vp);
   2821 				continue;
   2822 			}
   2823 
   2824 			error = getdisksize(vp, &numsecs, &secsize);
   2825 			if (error) {
   2826 				printf("RAIDframe: can't get disk size for "
   2827 				    "dev %s (%d)\n", device_xname(dv), error);
   2828 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2829 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2830 				vput(vp);
   2831 				continue;
   2832 			}
   2833 			if (wedge) {
   2834 				struct dkwedge_info dkw;
   2835 				error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
   2836 				    NOCRED);
   2837 				if (error) {
   2838 					printf("RAIDframe: can't get wedge info for "
   2839 					    "dev %s (%d)\n", device_xname(dv), error);
   2840 					vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2841 					VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2842 					vput(vp);
   2843 					continue;
   2844 				}
   2845 
   2846 				if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
   2847 					vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2848 					VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2849 					vput(vp);
   2850 					continue;
   2851 				}
   2852 
   2853 				ac_list = rf_get_component(ac_list, dev, vp,
   2854 				    device_xname(dv), dkw.dkw_size, numsecs, secsize);
   2855 				rf_part_found = 1; /*There is a raid component on this disk*/
   2856 				continue;
   2857 			}
   2858 
   2859 			/* Ok, the disk exists.  Go get the disklabel. */
   2860 			error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
   2861 			if (error) {
   2862 				/*
   2863 				 * XXX can't happen - open() would
   2864 				 * have errored out (or faked up one)
   2865 				 */
   2866 				if (error != ENOTTY)
   2867 					printf("RAIDframe: can't get label for dev "
   2868 					    "%s (%d)\n", device_xname(dv), error);
   2869 			}
   2870 
   2871 			/* don't need this any more.  We'll allocate it again
   2872 			   a little later if we really do... */
   2873 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2874 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2875 			vput(vp);
   2876 
   2877 			if (error)
   2878 				continue;
   2879 
   2880 			rf_part_found = 0; /*No raid partitions yet*/
   2881 			for (i = 0; i < label.d_npartitions; i++) {
   2882 				char cname[sizeof(ac_list->devname)];
   2883 
   2884 				/* We only support partitions marked as RAID */
   2885 				if (label.d_partitions[i].p_fstype != FS_RAID)
   2886 					continue;
   2887 
   2888 				dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
   2889 				if (bdevvp(dev, &vp))
   2890 					panic("RAID can't alloc vnode");
   2891 
   2892 				error = VOP_OPEN(vp, FREAD, NOCRED);
   2893 				if (error) {
   2894 					/* Whatever... */
   2895 					vput(vp);
   2896 					continue;
   2897 				}
   2898 				snprintf(cname, sizeof(cname), "%s%c",
   2899 				    device_xname(dv), 'a' + i);
   2900 				ac_list = rf_get_component(ac_list, dev, vp, cname,
   2901 					label.d_partitions[i].p_size, numsecs, secsize);
   2902 				rf_part_found = 1; /*There is at least one raid partition on this disk*/
   2903 			}
   2904 
   2905 			/*
   2906 			 *If there is no raid component on this disk, either in a
   2907 			 *disklabel or inside a wedge, check the raw partition as well,
   2908 			 *as it is possible to configure raid components on raw disk
   2909 			 *devices.
   2910 			 */
   2911 
   2912 			if (!rf_part_found) {
   2913 				char cname[sizeof(ac_list->devname)];
   2914 
   2915 				dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
   2916 				if (bdevvp(dev, &vp))
   2917 					panic("RAID can't alloc vnode");
   2918 
   2919 				error = VOP_OPEN(vp, FREAD, NOCRED);
   2920 				if (error) {
   2921 					/* Whatever... */
   2922 					vput(vp);
   2923 					continue;
   2924 				}
   2925 				snprintf(cname, sizeof(cname), "%s%c",
   2926 				    device_xname(dv), 'a' + RAW_PART);
   2927 				ac_list = rf_get_component(ac_list, dev, vp, cname,
   2928 					label.d_partitions[RAW_PART].p_size, numsecs, secsize);
   2929 			}
   2930 		}
   2931 		deviter_release(&di);
   2932 	}
   2933 	return ac_list;
   2934 }
   2935 
   2936 
   2937 int
   2938 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
   2939 {
   2940 
   2941 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2942 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2943 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2944 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2945 	    clabel->row >=0 &&
   2946 	    clabel->column >= 0 &&
   2947 	    clabel->num_rows > 0 &&
   2948 	    clabel->num_columns > 0 &&
   2949 	    clabel->row < clabel->num_rows &&
   2950 	    clabel->column < clabel->num_columns &&
   2951 	    clabel->blockSize > 0 &&
   2952 	    /*
   2953 	     * numBlocksHi may contain garbage, but it is ok since
   2954 	     * the type is unsigned.  If it is really garbage,
   2955 	     * rf_fix_old_label_size() will fix it.
   2956 	     */
   2957 	    rf_component_label_numblocks(clabel) > 0) {
   2958 		/*
   2959 		 * label looks reasonable enough...
   2960 		 * let's make sure it has no old garbage.
   2961 		 */
   2962 		if (numsecs)
   2963 			rf_fix_old_label_size(clabel, numsecs);
   2964 		return(1);
   2965 	}
   2966 	return(0);
   2967 }
   2968 
   2969 
   2970 /*
   2971  * For reasons yet unknown, some old component labels have garbage in
   2972  * the newer numBlocksHi region, and this causes lossage.  Since those
   2973  * disks will also have numsecs set to less than 32 bits of sectors,
   2974  * we can determine when this corruption has occurred, and fix it.
   2975  *
   2976  * The exact same problem, with the same unknown reason, happens to
   2977  * the partitionSizeHi member as well.
   2978  */
   2979 static void
   2980 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
   2981 {
   2982 
   2983 	if (numsecs < ((uint64_t)1 << 32)) {
   2984 		if (clabel->numBlocksHi) {
   2985 			printf("WARNING: total sectors < 32 bits, yet "
   2986 			       "numBlocksHi set\n"
   2987 			       "WARNING: resetting numBlocksHi to zero.\n");
   2988 			clabel->numBlocksHi = 0;
   2989 		}
   2990 
   2991 		if (clabel->partitionSizeHi) {
   2992 			printf("WARNING: total sectors < 32 bits, yet "
   2993 			       "partitionSizeHi set\n"
   2994 			       "WARNING: resetting partitionSizeHi to zero.\n");
   2995 			clabel->partitionSizeHi = 0;
   2996 		}
   2997 	}
   2998 }
   2999 
   3000 
   3001 #ifdef DEBUG
   3002 void
   3003 rf_print_component_label(RF_ComponentLabel_t *clabel)
   3004 {
   3005 	uint64_t numBlocks;
   3006 	static const char *rp[] = {
   3007 	    "No", "Force", "Soft", "*invalid*"
   3008 	};
   3009 
   3010 
   3011 	numBlocks = rf_component_label_numblocks(clabel);
   3012 
   3013 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   3014 	       clabel->row, clabel->column,
   3015 	       clabel->num_rows, clabel->num_columns);
   3016 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   3017 	       clabel->version, clabel->serial_number,
   3018 	       clabel->mod_counter);
   3019 	printf("   Clean: %s Status: %d\n",
   3020 	       clabel->clean ? "Yes" : "No", clabel->status);
   3021 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   3022 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   3023 	printf("   RAID Level: %c  blocksize: %d numBlocks: %"PRIu64"\n",
   3024 	       (char) clabel->parityConfig, clabel->blockSize, numBlocks);
   3025 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
   3026 	printf("   Root partition: %s\n", rp[clabel->root_partition & 3]);
   3027 	printf("   Last configured as: raid%d\n", clabel->last_unit);
   3028 #if 0
   3029 	   printf("   Config order: %d\n", clabel->config_order);
   3030 #endif
   3031 
   3032 }
   3033 #endif
   3034 
   3035 RF_ConfigSet_t *
   3036 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
   3037 {
   3038 	RF_AutoConfig_t *ac;
   3039 	RF_ConfigSet_t *config_sets;
   3040 	RF_ConfigSet_t *cset;
   3041 	RF_AutoConfig_t *ac_next;
   3042 
   3043 
   3044 	config_sets = NULL;
   3045 
   3046 	/* Go through the AutoConfig list, and figure out which components
   3047 	   belong to what sets.  */
   3048 	ac = ac_list;
   3049 	while(ac!=NULL) {
   3050 		/* we're going to putz with ac->next, so save it here
   3051 		   for use at the end of the loop */
   3052 		ac_next = ac->next;
   3053 
   3054 		if (config_sets == NULL) {
   3055 			/* will need at least this one... */
   3056 			config_sets = (RF_ConfigSet_t *)
   3057 				malloc(sizeof(RF_ConfigSet_t),
   3058 				       M_RAIDFRAME, M_NOWAIT);
   3059 			if (config_sets == NULL) {
   3060 				panic("rf_create_auto_sets: No memory!");
   3061 			}
   3062 			/* this one is easy :) */
   3063 			config_sets->ac = ac;
   3064 			config_sets->next = NULL;
   3065 			config_sets->rootable = 0;
   3066 			ac->next = NULL;
   3067 		} else {
   3068 			/* which set does this component fit into? */
   3069 			cset = config_sets;
   3070 			while(cset!=NULL) {
   3071 				if (rf_does_it_fit(cset, ac)) {
   3072 					/* looks like it matches... */
   3073 					ac->next = cset->ac;
   3074 					cset->ac = ac;
   3075 					break;
   3076 				}
   3077 				cset = cset->next;
   3078 			}
   3079 			if (cset==NULL) {
   3080 				/* didn't find a match above... new set..*/
   3081 				cset = (RF_ConfigSet_t *)
   3082 					malloc(sizeof(RF_ConfigSet_t),
   3083 					       M_RAIDFRAME, M_NOWAIT);
   3084 				if (cset == NULL) {
   3085 					panic("rf_create_auto_sets: No memory!");
   3086 				}
   3087 				cset->ac = ac;
   3088 				ac->next = NULL;
   3089 				cset->next = config_sets;
   3090 				cset->rootable = 0;
   3091 				config_sets = cset;
   3092 			}
   3093 		}
   3094 		ac = ac_next;
   3095 	}
   3096 
   3097 
   3098 	return(config_sets);
   3099 }
   3100 
   3101 static int
   3102 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
   3103 {
   3104 	RF_ComponentLabel_t *clabel1, *clabel2;
   3105 
   3106 	/* If this one matches the *first* one in the set, that's good
   3107 	   enough, since the other members of the set would have been
   3108 	   through here too... */
   3109 	/* note that we are not checking partitionSize here..
   3110 
   3111 	   Note that we are also not checking the mod_counters here.
   3112 	   If everything else matches except the mod_counter, that's
   3113 	   good enough for this test.  We will deal with the mod_counters
   3114 	   a little later in the autoconfiguration process.
   3115 
   3116 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   3117 
   3118 	   The reason we don't check for this is that failed disks
   3119 	   will have lower modification counts.  If those disks are
   3120 	   not added to the set they used to belong to, then they will
   3121 	   form their own set, which may result in 2 different sets,
   3122 	   for example, competing to be configured at raid0, and
   3123 	   perhaps competing to be the root filesystem set.  If the
   3124 	   wrong ones get configured, or both attempt to become /,
   3125 	   weird behaviour and or serious lossage will occur.  Thus we
   3126 	   need to bring them into the fold here, and kick them out at
   3127 	   a later point.
   3128 
   3129 	*/
   3130 
   3131 	clabel1 = cset->ac->clabel;
   3132 	clabel2 = ac->clabel;
   3133 	if ((clabel1->version == clabel2->version) &&
   3134 	    (clabel1->serial_number == clabel2->serial_number) &&
   3135 	    (clabel1->num_rows == clabel2->num_rows) &&
   3136 	    (clabel1->num_columns == clabel2->num_columns) &&
   3137 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   3138 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   3139 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   3140 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   3141 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   3142 	    (clabel1->blockSize == clabel2->blockSize) &&
   3143 	    rf_component_label_numblocks(clabel1) ==
   3144 	    rf_component_label_numblocks(clabel2) &&
   3145 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   3146 	    (clabel1->root_partition == clabel2->root_partition) &&
   3147 	    (clabel1->last_unit == clabel2->last_unit) &&
   3148 	    (clabel1->config_order == clabel2->config_order)) {
   3149 		/* if it get's here, it almost *has* to be a match */
   3150 	} else {
   3151 		/* it's not consistent with somebody in the set..
   3152 		   punt */
   3153 		return(0);
   3154 	}
   3155 	/* all was fine.. it must fit... */
   3156 	return(1);
   3157 }
   3158 
   3159 int
   3160 rf_have_enough_components(RF_ConfigSet_t *cset)
   3161 {
   3162 	RF_AutoConfig_t *ac;
   3163 	RF_AutoConfig_t *auto_config;
   3164 	RF_ComponentLabel_t *clabel;
   3165 	int c;
   3166 	int num_cols;
   3167 	int num_missing;
   3168 	int mod_counter;
   3169 	int mod_counter_found;
   3170 	int even_pair_failed;
   3171 	char parity_type;
   3172 
   3173 
   3174 	/* check to see that we have enough 'live' components
   3175 	   of this set.  If so, we can configure it if necessary */
   3176 
   3177 	num_cols = cset->ac->clabel->num_columns;
   3178 	parity_type = cset->ac->clabel->parityConfig;
   3179 
   3180 	/* XXX Check for duplicate components!?!?!? */
   3181 
   3182 	/* Determine what the mod_counter is supposed to be for this set. */
   3183 
   3184 	mod_counter_found = 0;
   3185 	mod_counter = 0;
   3186 	ac = cset->ac;
   3187 	while(ac!=NULL) {
   3188 		if (mod_counter_found==0) {
   3189 			mod_counter = ac->clabel->mod_counter;
   3190 			mod_counter_found = 1;
   3191 		} else {
   3192 			if (ac->clabel->mod_counter > mod_counter) {
   3193 				mod_counter = ac->clabel->mod_counter;
   3194 			}
   3195 		}
   3196 		ac = ac->next;
   3197 	}
   3198 
   3199 	num_missing = 0;
   3200 	auto_config = cset->ac;
   3201 
   3202 	even_pair_failed = 0;
   3203 	for(c=0; c<num_cols; c++) {
   3204 		ac = auto_config;
   3205 		while(ac!=NULL) {
   3206 			if ((ac->clabel->column == c) &&
   3207 			    (ac->clabel->mod_counter == mod_counter)) {
   3208 				/* it's this one... */
   3209 #ifdef DEBUG
   3210 				printf("Found: %s at %d\n",
   3211 				       ac->devname,c);
   3212 #endif
   3213 				break;
   3214 			}
   3215 			ac=ac->next;
   3216 		}
   3217 		if (ac==NULL) {
   3218 				/* Didn't find one here! */
   3219 				/* special case for RAID 1, especially
   3220 				   where there are more than 2
   3221 				   components (where RAIDframe treats
   3222 				   things a little differently :( ) */
   3223 			if (parity_type == '1') {
   3224 				if (c%2 == 0) { /* even component */
   3225 					even_pair_failed = 1;
   3226 				} else { /* odd component.  If
   3227 					    we're failed, and
   3228 					    so is the even
   3229 					    component, it's
   3230 					    "Good Night, Charlie" */
   3231 					if (even_pair_failed == 1) {
   3232 						return(0);
   3233 					}
   3234 				}
   3235 			} else {
   3236 				/* normal accounting */
   3237 				num_missing++;
   3238 			}
   3239 		}
   3240 		if ((parity_type == '1') && (c%2 == 1)) {
   3241 				/* Just did an even component, and we didn't
   3242 				   bail.. reset the even_pair_failed flag,
   3243 				   and go on to the next component.... */
   3244 			even_pair_failed = 0;
   3245 		}
   3246 	}
   3247 
   3248 	clabel = cset->ac->clabel;
   3249 
   3250 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3251 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3252 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3253 		/* XXX this needs to be made *much* more general */
   3254 		/* Too many failures */
   3255 		return(0);
   3256 	}
   3257 	/* otherwise, all is well, and we've got enough to take a kick
   3258 	   at autoconfiguring this set */
   3259 	return(1);
   3260 }
   3261 
   3262 void
   3263 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
   3264 			RF_Raid_t *raidPtr)
   3265 {
   3266 	RF_ComponentLabel_t *clabel;
   3267 	int i;
   3268 
   3269 	clabel = ac->clabel;
   3270 
   3271 	/* 1. Fill in the common stuff */
   3272 	config->numRow = clabel->num_rows = 1;
   3273 	config->numCol = clabel->num_columns;
   3274 	config->numSpare = 0; /* XXX should this be set here? */
   3275 	config->sectPerSU = clabel->sectPerSU;
   3276 	config->SUsPerPU = clabel->SUsPerPU;
   3277 	config->SUsPerRU = clabel->SUsPerRU;
   3278 	config->parityConfig = clabel->parityConfig;
   3279 	/* XXX... */
   3280 	strcpy(config->diskQueueType,"fifo");
   3281 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3282 	config->layoutSpecificSize = 0; /* XXX ?? */
   3283 
   3284 	while(ac!=NULL) {
   3285 		/* row/col values will be in range due to the checks
   3286 		   in reasonable_label() */
   3287 		strcpy(config->devnames[0][ac->clabel->column],
   3288 		       ac->devname);
   3289 		ac = ac->next;
   3290 	}
   3291 
   3292 	for(i=0;i<RF_MAXDBGV;i++) {
   3293 		config->debugVars[i][0] = 0;
   3294 	}
   3295 }
   3296 
   3297 int
   3298 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
   3299 {
   3300 	RF_ComponentLabel_t *clabel;
   3301 	int column;
   3302 	int sparecol;
   3303 
   3304 	raidPtr->autoconfigure = new_value;
   3305 
   3306 	for(column=0; column<raidPtr->numCol; column++) {
   3307 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3308 			clabel = raidget_component_label(raidPtr, column);
   3309 			clabel->autoconfigure = new_value;
   3310 			raidflush_component_label(raidPtr, column);
   3311 		}
   3312 	}
   3313 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3314 		sparecol = raidPtr->numCol + column;
   3315 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3316 			clabel = raidget_component_label(raidPtr, sparecol);
   3317 			clabel->autoconfigure = new_value;
   3318 			raidflush_component_label(raidPtr, sparecol);
   3319 		}
   3320 	}
   3321 	return(new_value);
   3322 }
   3323 
   3324 int
   3325 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
   3326 {
   3327 	RF_ComponentLabel_t *clabel;
   3328 	int column;
   3329 	int sparecol;
   3330 
   3331 	raidPtr->root_partition = new_value;
   3332 	for(column=0; column<raidPtr->numCol; column++) {
   3333 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3334 			clabel = raidget_component_label(raidPtr, column);
   3335 			clabel->root_partition = new_value;
   3336 			raidflush_component_label(raidPtr, column);
   3337 		}
   3338 	}
   3339 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3340 		sparecol = raidPtr->numCol + column;
   3341 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3342 			clabel = raidget_component_label(raidPtr, sparecol);
   3343 			clabel->root_partition = new_value;
   3344 			raidflush_component_label(raidPtr, sparecol);
   3345 		}
   3346 	}
   3347 	return(new_value);
   3348 }
   3349 
   3350 void
   3351 rf_release_all_vps(RF_ConfigSet_t *cset)
   3352 {
   3353 	RF_AutoConfig_t *ac;
   3354 
   3355 	ac = cset->ac;
   3356 	while(ac!=NULL) {
   3357 		/* Close the vp, and give it back */
   3358 		if (ac->vp) {
   3359 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3360 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
   3361 			vput(ac->vp);
   3362 			ac->vp = NULL;
   3363 		}
   3364 		ac = ac->next;
   3365 	}
   3366 }
   3367 
   3368 
   3369 void
   3370 rf_cleanup_config_set(RF_ConfigSet_t *cset)
   3371 {
   3372 	RF_AutoConfig_t *ac;
   3373 	RF_AutoConfig_t *next_ac;
   3374 
   3375 	ac = cset->ac;
   3376 	while(ac!=NULL) {
   3377 		next_ac = ac->next;
   3378 		/* nuke the label */
   3379 		free(ac->clabel, M_RAIDFRAME);
   3380 		/* cleanup the config structure */
   3381 		free(ac, M_RAIDFRAME);
   3382 		/* "next.." */
   3383 		ac = next_ac;
   3384 	}
   3385 	/* and, finally, nuke the config set */
   3386 	free(cset, M_RAIDFRAME);
   3387 }
   3388 
   3389 
   3390 void
   3391 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   3392 {
   3393 	/* current version number */
   3394 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3395 	clabel->serial_number = raidPtr->serial_number;
   3396 	clabel->mod_counter = raidPtr->mod_counter;
   3397 
   3398 	clabel->num_rows = 1;
   3399 	clabel->num_columns = raidPtr->numCol;
   3400 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3401 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3402 
   3403 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3404 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3405 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3406 
   3407 	clabel->blockSize = raidPtr->bytesPerSector;
   3408 	rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
   3409 
   3410 	/* XXX not portable */
   3411 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3412 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3413 	clabel->autoconfigure = raidPtr->autoconfigure;
   3414 	clabel->root_partition = raidPtr->root_partition;
   3415 	clabel->last_unit = raidPtr->raidid;
   3416 	clabel->config_order = raidPtr->config_order;
   3417 
   3418 #ifndef RF_NO_PARITY_MAP
   3419 	rf_paritymap_init_label(raidPtr->parity_map, clabel);
   3420 #endif
   3421 }
   3422 
   3423 struct raid_softc *
   3424 rf_auto_config_set(RF_ConfigSet_t *cset)
   3425 {
   3426 	RF_Raid_t *raidPtr;
   3427 	RF_Config_t *config;
   3428 	int raidID;
   3429 	struct raid_softc *sc;
   3430 
   3431 #ifdef DEBUG
   3432 	printf("RAID autoconfigure\n");
   3433 #endif
   3434 
   3435 	/* 1. Create a config structure */
   3436 	config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
   3437 	if (config == NULL) {
   3438 		printf("%s: Out of mem - config!?!?\n", __func__);
   3439 				/* XXX do something more intelligent here. */
   3440 		return NULL;
   3441 	}
   3442 
   3443 	/*
   3444 	   2. Figure out what RAID ID this one is supposed to live at
   3445 	   See if we can get the same RAID dev that it was configured
   3446 	   on last time..
   3447 	*/
   3448 
   3449 	raidID = cset->ac->clabel->last_unit;
   3450 	for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
   3451 	     sc = raidget(++raidID, false))
   3452 		continue;
   3453 #ifdef DEBUG
   3454 	printf("Configuring raid%d:\n",raidID);
   3455 #endif
   3456 
   3457 	if (sc == NULL)
   3458 		sc = raidget(raidID, true);
   3459 	if (sc == NULL) {
   3460 		printf("%s: Out of mem - softc!?!?\n", __func__);
   3461 				/* XXX do something more intelligent here. */
   3462 		free(config, M_RAIDFRAME);
   3463 		return NULL;
   3464 	}
   3465 
   3466 	raidPtr = &sc->sc_r;
   3467 
   3468 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3469 	raidPtr->softc = sc;
   3470 	raidPtr->raidid = raidID;
   3471 	raidPtr->openings = RAIDOUTSTANDING;
   3472 
   3473 	/* 3. Build the configuration structure */
   3474 	rf_create_configuration(cset->ac, config, raidPtr);
   3475 
   3476 	/* 4. Do the configuration */
   3477 	if (rf_Configure(raidPtr, config, cset->ac) == 0) {
   3478 		raidinit(sc);
   3479 
   3480 		rf_markalldirty(raidPtr);
   3481 		raidPtr->autoconfigure = 1; /* XXX do this here? */
   3482 		switch (cset->ac->clabel->root_partition) {
   3483 		case 1:	/* Force Root */
   3484 		case 2:	/* Soft Root: root when boot partition part of raid */
   3485 			/*
   3486 			 * everything configured just fine.  Make a note
   3487 			 * that this set is eligible to be root,
   3488 			 * or forced to be root
   3489 			 */
   3490 			cset->rootable = cset->ac->clabel->root_partition;
   3491 			/* XXX do this here? */
   3492 			raidPtr->root_partition = cset->rootable;
   3493 			break;
   3494 		default:
   3495 			break;
   3496 		}
   3497 	} else {
   3498 		raidput(sc);
   3499 		sc = NULL;
   3500 	}
   3501 
   3502 	/* 5. Cleanup */
   3503 	free(config, M_RAIDFRAME);
   3504 	return sc;
   3505 }
   3506 
   3507 void
   3508 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
   3509 	     size_t xmin, size_t xmax)
   3510 {
   3511 	pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
   3512 	pool_sethiwat(p, xmax);
   3513 	pool_prime(p, xmin);
   3514 	pool_setlowat(p, xmin);
   3515 }
   3516 
   3517 /*
   3518  * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
   3519  * to see if there is IO pending and if that IO could possibly be done
   3520  * for a given RAID set.  Returns 0 if IO is waiting and can be done, 1
   3521  * otherwise.
   3522  *
   3523  */
   3524 int
   3525 rf_buf_queue_check(RF_Raid_t *raidPtr)
   3526 {
   3527 	struct raid_softc *rs;
   3528 	struct dk_softc *dksc;
   3529 
   3530 	rs = raidPtr->softc;
   3531 	dksc = &rs->sc_dksc;
   3532 
   3533 	if ((rs->sc_flags & RAIDF_INITED) == 0)
   3534 		return 1;
   3535 
   3536 	if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
   3537 		/* there is work to do */
   3538 		return 0;
   3539 	}
   3540 	/* default is nothing to do */
   3541 	return 1;
   3542 }
   3543 
   3544 int
   3545 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
   3546 {
   3547 	uint64_t numsecs;
   3548 	unsigned secsize;
   3549 	int error;
   3550 
   3551 	error = getdisksize(vp, &numsecs, &secsize);
   3552 	if (error == 0) {
   3553 		diskPtr->blockSize = secsize;
   3554 		diskPtr->numBlocks = numsecs - rf_protectedSectors;
   3555 		diskPtr->partitionSize = numsecs;
   3556 		return 0;
   3557 	}
   3558 	return error;
   3559 }
   3560 
   3561 static int
   3562 raid_match(device_t self, cfdata_t cfdata, void *aux)
   3563 {
   3564 	return 1;
   3565 }
   3566 
   3567 static void
   3568 raid_attach(device_t parent, device_t self, void *aux)
   3569 {
   3570 }
   3571 
   3572 
   3573 static int
   3574 raid_detach(device_t self, int flags)
   3575 {
   3576 	int error;
   3577 	struct raid_softc *rs = raidsoftc(self);
   3578 
   3579 	if (rs == NULL)
   3580 		return ENXIO;
   3581 
   3582 	if ((error = raidlock(rs)) != 0)
   3583 		return (error);
   3584 
   3585 	error = raid_detach_unlocked(rs);
   3586 
   3587 	raidunlock(rs);
   3588 
   3589 	/* XXX raid can be referenced here */
   3590 
   3591 	if (error)
   3592 		return error;
   3593 
   3594 	/* Free the softc */
   3595 	raidput(rs);
   3596 
   3597 	return 0;
   3598 }
   3599 
   3600 static void
   3601 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
   3602 {
   3603 	struct dk_softc *dksc = &rs->sc_dksc;
   3604 	struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
   3605 
   3606 	memset(dg, 0, sizeof(*dg));
   3607 
   3608 	dg->dg_secperunit = raidPtr->totalSectors;
   3609 	dg->dg_secsize = raidPtr->bytesPerSector;
   3610 	dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   3611 	dg->dg_ntracks = 4 * raidPtr->numCol;
   3612 
   3613 	disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
   3614 }
   3615 
   3616 /*
   3617  * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
   3618  * We end up returning whatever error was returned by the first cache flush
   3619  * that fails.
   3620  */
   3621 
   3622 int
   3623 rf_sync_component_caches(RF_Raid_t *raidPtr)
   3624 {
   3625 	int c, sparecol;
   3626 	int e,error;
   3627 	int force = 1;
   3628 
   3629 	error = 0;
   3630 	for (c = 0; c < raidPtr->numCol; c++) {
   3631 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   3632 			e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
   3633 					  &force, FWRITE, NOCRED);
   3634 			if (e) {
   3635 				if (e != ENODEV)
   3636 					printf("raid%d: cache flush to component %s failed.\n",
   3637 					       raidPtr->raidid, raidPtr->Disks[c].devname);
   3638 				if (error == 0) {
   3639 					error = e;
   3640 				}
   3641 			}
   3642 		}
   3643 	}
   3644 
   3645 	for( c = 0; c < raidPtr->numSpare ; c++) {
   3646 		sparecol = raidPtr->numCol + c;
   3647 		/* Need to ensure that the reconstruct actually completed! */
   3648 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3649 			e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
   3650 					  DIOCCACHESYNC, &force, FWRITE, NOCRED);
   3651 			if (e) {
   3652 				if (e != ENODEV)
   3653 					printf("raid%d: cache flush to component %s failed.\n",
   3654 					       raidPtr->raidid, raidPtr->Disks[sparecol].devname);
   3655 				if (error == 0) {
   3656 					error = e;
   3657 				}
   3658 			}
   3659 		}
   3660 	}
   3661 	return error;
   3662 }
   3663 
   3664 /*
   3665  * Module interface
   3666  */
   3667 
   3668 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr");
   3669 
   3670 #ifdef _MODULE
   3671 CFDRIVER_DECL(raid, DV_DISK, NULL);
   3672 #endif
   3673 
   3674 static int raid_modcmd(modcmd_t, void *);
   3675 static int raid_modcmd_init(void);
   3676 static int raid_modcmd_fini(void);
   3677 
   3678 static int
   3679 raid_modcmd(modcmd_t cmd, void *data)
   3680 {
   3681 	int error;
   3682 
   3683 	error = 0;
   3684 	switch (cmd) {
   3685 	case MODULE_CMD_INIT:
   3686 		error = raid_modcmd_init();
   3687 		break;
   3688 	case MODULE_CMD_FINI:
   3689 		error = raid_modcmd_fini();
   3690 		break;
   3691 	default:
   3692 		error = ENOTTY;
   3693 		break;
   3694 	}
   3695 	return error;
   3696 }
   3697 
   3698 static int
   3699 raid_modcmd_init(void)
   3700 {
   3701 	int error;
   3702 	int bmajor, cmajor;
   3703 
   3704 	mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
   3705 	mutex_enter(&raid_lock);
   3706 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   3707 	rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
   3708 	rf_init_cond2(rf_sparet_wait_cv, "sparetw");
   3709 	rf_init_cond2(rf_sparet_resp_cv, "rfgst");
   3710 
   3711 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
   3712 #endif
   3713 
   3714 	bmajor = cmajor = -1;
   3715 	error = devsw_attach("raid", &raid_bdevsw, &bmajor,
   3716 	    &raid_cdevsw, &cmajor);
   3717 	if (error != 0 && error != EEXIST) {
   3718 		aprint_error("%s: devsw_attach failed %d\n", __func__, error);
   3719 		mutex_exit(&raid_lock);
   3720 		return error;
   3721 	}
   3722 #ifdef _MODULE
   3723 	error = config_cfdriver_attach(&raid_cd);
   3724 	if (error != 0) {
   3725 		aprint_error("%s: config_cfdriver_attach failed %d\n",
   3726 		    __func__, error);
   3727 		devsw_detach(&raid_bdevsw, &raid_cdevsw);
   3728 		mutex_exit(&raid_lock);
   3729 		return error;
   3730 	}
   3731 #endif
   3732 	error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
   3733 	if (error != 0) {
   3734 		aprint_error("%s: config_cfattach_attach failed %d\n",
   3735 		    __func__, error);
   3736 #ifdef _MODULE
   3737 		config_cfdriver_detach(&raid_cd);
   3738 #endif
   3739 		devsw_detach(&raid_bdevsw, &raid_cdevsw);
   3740 		mutex_exit(&raid_lock);
   3741 		return error;
   3742 	}
   3743 
   3744 	raidautoconfigdone = false;
   3745 
   3746 	mutex_exit(&raid_lock);
   3747 
   3748 	if (error == 0) {
   3749 		if (rf_BootRaidframe(true) == 0)
   3750 			aprint_verbose("Kernelized RAIDframe activated\n");
   3751 		else
   3752 			panic("Serious error activating RAID!!");
   3753 	}
   3754 
   3755 	/*
   3756 	 * Register a finalizer which will be used to auto-config RAID
   3757 	 * sets once all real hardware devices have been found.
   3758 	 */
   3759 	error = config_finalize_register(NULL, rf_autoconfig);
   3760 	if (error != 0) {
   3761 		aprint_error("WARNING: unable to register RAIDframe "
   3762 		    "finalizer\n");
   3763 		error = 0;
   3764 	}
   3765 
   3766 	return error;
   3767 }
   3768 
   3769 static int
   3770 raid_modcmd_fini(void)
   3771 {
   3772 	int error;
   3773 
   3774 	mutex_enter(&raid_lock);
   3775 
   3776 	/* Don't allow unload if raid device(s) exist.  */
   3777 	if (!LIST_EMPTY(&raids)) {
   3778 		mutex_exit(&raid_lock);
   3779 		return EBUSY;
   3780 	}
   3781 
   3782 	error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
   3783 	if (error != 0) {
   3784 		aprint_error("%s: cannot detach cfattach\n",__func__);
   3785 		mutex_exit(&raid_lock);
   3786 		return error;
   3787 	}
   3788 #ifdef _MODULE
   3789 	error = config_cfdriver_detach(&raid_cd);
   3790 	if (error != 0) {
   3791 		aprint_error("%s: cannot detach cfdriver\n",__func__);
   3792 		config_cfattach_attach(raid_cd.cd_name, &raid_ca);
   3793 		mutex_exit(&raid_lock);
   3794 		return error;
   3795 	}
   3796 #endif
   3797 	error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
   3798 	if (error != 0) {
   3799 		aprint_error("%s: cannot detach devsw\n",__func__);
   3800 #ifdef _MODULE
   3801 		config_cfdriver_attach(&raid_cd);
   3802 #endif
   3803 		config_cfattach_attach(raid_cd.cd_name, &raid_ca);
   3804 		mutex_exit(&raid_lock);
   3805 		return error;
   3806 	}
   3807 	rf_BootRaidframe(false);
   3808 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   3809 	rf_destroy_mutex2(rf_sparet_wait_mutex);
   3810 	rf_destroy_cond2(rf_sparet_wait_cv);
   3811 	rf_destroy_cond2(rf_sparet_resp_cv);
   3812 #endif
   3813 	mutex_exit(&raid_lock);
   3814 	mutex_destroy(&raid_lock);
   3815 
   3816 	return error;
   3817 }
   3818