Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.224.2.1
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.224.2.1 2007/12/19 20:07:53 ghen Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1990, 1993
     40  *      The Regents of the University of California.  All rights reserved.
     41  *
     42  * This code is derived from software contributed to Berkeley by
     43  * the Systems Programming Group of the University of Utah Computer
     44  * Science Department.
     45  *
     46  * Redistribution and use in source and binary forms, with or without
     47  * modification, are permitted provided that the following conditions
     48  * are met:
     49  * 1. Redistributions of source code must retain the above copyright
     50  *    notice, this list of conditions and the following disclaimer.
     51  * 2. Redistributions in binary form must reproduce the above copyright
     52  *    notice, this list of conditions and the following disclaimer in the
     53  *    documentation and/or other materials provided with the distribution.
     54  * 3. Neither the name of the University nor the names of its contributors
     55  *    may be used to endorse or promote products derived from this software
     56  *    without specific prior written permission.
     57  *
     58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     68  * SUCH DAMAGE.
     69  *
     70  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     71  *
     72  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     73  */
     74 
     75 /*
     76  * Copyright (c) 1988 University of Utah.
     77  *
     78  * This code is derived from software contributed to Berkeley by
     79  * the Systems Programming Group of the University of Utah Computer
     80  * Science Department.
     81  *
     82  * Redistribution and use in source and binary forms, with or without
     83  * modification, are permitted provided that the following conditions
     84  * are met:
     85  * 1. Redistributions of source code must retain the above copyright
     86  *    notice, this list of conditions and the following disclaimer.
     87  * 2. Redistributions in binary form must reproduce the above copyright
     88  *    notice, this list of conditions and the following disclaimer in the
     89  *    documentation and/or other materials provided with the distribution.
     90  * 3. All advertising materials mentioning features or use of this software
     91  *    must display the following acknowledgement:
     92  *      This product includes software developed by the University of
     93  *      California, Berkeley and its contributors.
     94  * 4. Neither the name of the University nor the names of its contributors
     95  *    may be used to endorse or promote products derived from this software
     96  *    without specific prior written permission.
     97  *
     98  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     99  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    100  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    101  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
    102  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    103  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    104  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    105  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    106  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    107  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    108  * SUCH DAMAGE.
    109  *
    110  * from: Utah $Hdr: cd.c 1.6 90/11/28$
    111  *
    112  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
    113  */
    114 
    115 /*
    116  * Copyright (c) 1995 Carnegie-Mellon University.
    117  * All rights reserved.
    118  *
    119  * Authors: Mark Holland, Jim Zelenka
    120  *
    121  * Permission to use, copy, modify and distribute this software and
    122  * its documentation is hereby granted, provided that both the copyright
    123  * notice and this permission notice appear in all copies of the
    124  * software, derivative works or modified versions, and any portions
    125  * thereof, and that both notices appear in supporting documentation.
    126  *
    127  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
    128  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
    129  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
    130  *
    131  * Carnegie Mellon requests users of this software to return to
    132  *
    133  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    134  *  School of Computer Science
    135  *  Carnegie Mellon University
    136  *  Pittsburgh PA 15213-3890
    137  *
    138  * any improvements or extensions that they make and grant Carnegie the
    139  * rights to redistribute these changes.
    140  */
    141 
    142 /***********************************************************
    143  *
    144  * rf_kintf.c -- the kernel interface routines for RAIDframe
    145  *
    146  ***********************************************************/
    147 
    148 #include <sys/cdefs.h>
    149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.224.2.1 2007/12/19 20:07:53 ghen Exp $");
    150 
    151 #include <sys/param.h>
    152 #include <sys/errno.h>
    153 #include <sys/pool.h>
    154 #include <sys/proc.h>
    155 #include <sys/queue.h>
    156 #include <sys/disk.h>
    157 #include <sys/device.h>
    158 #include <sys/stat.h>
    159 #include <sys/ioctl.h>
    160 #include <sys/fcntl.h>
    161 #include <sys/systm.h>
    162 #include <sys/namei.h>
    163 #include <sys/vnode.h>
    164 #include <sys/disklabel.h>
    165 #include <sys/conf.h>
    166 #include <sys/lock.h>
    167 #include <sys/buf.h>
    168 #include <sys/bufq.h>
    169 #include <sys/user.h>
    170 #include <sys/reboot.h>
    171 #include <sys/kauth.h>
    172 
    173 #include <dev/raidframe/raidframevar.h>
    174 #include <dev/raidframe/raidframeio.h>
    175 #include "raid.h"
    176 #include "opt_raid_autoconfig.h"
    177 #include "rf_raid.h"
    178 #include "rf_copyback.h"
    179 #include "rf_dag.h"
    180 #include "rf_dagflags.h"
    181 #include "rf_desc.h"
    182 #include "rf_diskqueue.h"
    183 #include "rf_etimer.h"
    184 #include "rf_general.h"
    185 #include "rf_kintf.h"
    186 #include "rf_options.h"
    187 #include "rf_driver.h"
    188 #include "rf_parityscan.h"
    189 #include "rf_threadstuff.h"
    190 
    191 #ifdef DEBUG
    192 int     rf_kdebug_level = 0;
    193 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    194 #else				/* DEBUG */
    195 #define db1_printf(a) { }
    196 #endif				/* DEBUG */
    197 
    198 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    199 
    200 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    201 
    202 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    203 						 * spare table */
    204 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    205 						 * installation process */
    206 
    207 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
    208 
    209 /* prototypes */
    210 static void KernelWakeupFunc(struct buf *);
    211 static void InitBP(struct buf *, struct vnode *, unsigned,
    212     dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *),
    213     void *, int, struct proc *);
    214 static void raidinit(RF_Raid_t *);
    215 
    216 void raidattach(int);
    217 static int raid_match(struct device *, struct cfdata *, void *);
    218 static void raid_attach(struct device *, struct device *, void *);
    219 static int raid_detach(struct device *, int);
    220 
    221 dev_type_open(raidopen);
    222 dev_type_close(raidclose);
    223 dev_type_read(raidread);
    224 dev_type_write(raidwrite);
    225 dev_type_ioctl(raidioctl);
    226 dev_type_strategy(raidstrategy);
    227 dev_type_dump(raiddump);
    228 dev_type_size(raidsize);
    229 
    230 const struct bdevsw raid_bdevsw = {
    231 	raidopen, raidclose, raidstrategy, raidioctl,
    232 	raiddump, raidsize, D_DISK
    233 };
    234 
    235 const struct cdevsw raid_cdevsw = {
    236 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    237 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
    238 };
    239 
    240 /* XXX Not sure if the following should be replacing the raidPtrs above,
    241    or if it should be used in conjunction with that...
    242 */
    243 
    244 struct raid_softc {
    245 	struct device *sc_dev;
    246 	int     sc_flags;	/* flags */
    247 	int     sc_cflags;	/* configuration flags */
    248 	uint64_t sc_size;	/* size of the raid device */
    249 	char    sc_xname[20];	/* XXX external name */
    250 	struct disk sc_dkdev;	/* generic disk device info */
    251 	struct bufq_state *buf_queue;	/* used for the device queue */
    252 };
    253 /* sc_flags */
    254 #define RAIDF_INITED	0x01	/* unit has been initialized */
    255 #define RAIDF_WLABEL	0x02	/* label area is writable */
    256 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    257 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    258 #define RAIDF_LOCKED	0x80	/* unit is locked */
    259 
    260 #define	raidunit(x)	DISKUNIT(x)
    261 int numraid = 0;
    262 
    263 extern struct cfdriver raid_cd;
    264 CFATTACH_DECL(raid, sizeof(struct raid_softc),
    265     raid_match, raid_attach, raid_detach, NULL);
    266 
    267 /*
    268  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    269  * Be aware that large numbers can allow the driver to consume a lot of
    270  * kernel memory, especially on writes, and in degraded mode reads.
    271  *
    272  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    273  * a single 64K write will typically require 64K for the old data,
    274  * 64K for the old parity, and 64K for the new parity, for a total
    275  * of 192K (if the parity buffer is not re-used immediately).
    276  * Even it if is used immediately, that's still 128K, which when multiplied
    277  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    278  *
    279  * Now in degraded mode, for example, a 64K read on the above setup may
    280  * require data reconstruction, which will require *all* of the 4 remaining
    281  * disks to participate -- 4 * 32K/disk == 128K again.
    282  */
    283 
    284 #ifndef RAIDOUTSTANDING
    285 #define RAIDOUTSTANDING   6
    286 #endif
    287 
    288 #define RAIDLABELDEV(dev)	\
    289 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    290 
    291 /* declared here, and made public, for the benefit of KVM stuff.. */
    292 struct raid_softc *raid_softc;
    293 
    294 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    295 				     struct disklabel *);
    296 static void raidgetdisklabel(dev_t);
    297 static void raidmakedisklabel(struct raid_softc *);
    298 
    299 static int raidlock(struct raid_softc *);
    300 static void raidunlock(struct raid_softc *);
    301 
    302 static void rf_markalldirty(RF_Raid_t *);
    303 
    304 void rf_ReconThread(struct rf_recon_req *);
    305 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    306 void rf_CopybackThread(RF_Raid_t *raidPtr);
    307 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    308 int rf_autoconfig(struct device *self);
    309 void rf_buildroothack(RF_ConfigSet_t *);
    310 
    311 RF_AutoConfig_t *rf_find_raid_components(void);
    312 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    313 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    314 static int rf_reasonable_label(RF_ComponentLabel_t *);
    315 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    316 int rf_set_autoconfig(RF_Raid_t *, int);
    317 int rf_set_rootpartition(RF_Raid_t *, int);
    318 void rf_release_all_vps(RF_ConfigSet_t *);
    319 void rf_cleanup_config_set(RF_ConfigSet_t *);
    320 int rf_have_enough_components(RF_ConfigSet_t *);
    321 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    322 
    323 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    324 				  allow autoconfig to take place.
    325 				  Note that this is overridden by having
    326 				  RAID_AUTOCONFIG as an option in the
    327 				  kernel config file.  */
    328 
    329 struct RF_Pools_s rf_pools;
    330 
    331 void
    332 raidattach(int num)
    333 {
    334 	int raidID;
    335 	int i, rc;
    336 
    337 #ifdef DEBUG
    338 	printf("raidattach: Asked for %d units\n", num);
    339 #endif
    340 
    341 	if (num <= 0) {
    342 #ifdef DIAGNOSTIC
    343 		panic("raidattach: count <= 0");
    344 #endif
    345 		return;
    346 	}
    347 	/* This is where all the initialization stuff gets done. */
    348 
    349 	numraid = num;
    350 
    351 	/* Make some space for requested number of units... */
    352 
    353 	RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
    354 	if (raidPtrs == NULL) {
    355 		panic("raidPtrs is NULL!!");
    356 	}
    357 
    358 	rf_mutex_init(&rf_sparet_wait_mutex);
    359 
    360 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    361 
    362 	for (i = 0; i < num; i++)
    363 		raidPtrs[i] = NULL;
    364 	rc = rf_BootRaidframe();
    365 	if (rc == 0)
    366 		printf("Kernelized RAIDframe activated\n");
    367 	else
    368 		panic("Serious error booting RAID!!");
    369 
    370 	/* put together some datastructures like the CCD device does.. This
    371 	 * lets us lock the device and what-not when it gets opened. */
    372 
    373 	raid_softc = (struct raid_softc *)
    374 		malloc(num * sizeof(struct raid_softc),
    375 		       M_RAIDFRAME, M_NOWAIT);
    376 	if (raid_softc == NULL) {
    377 		printf("WARNING: no memory for RAIDframe driver\n");
    378 		return;
    379 	}
    380 
    381 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    382 
    383 	for (raidID = 0; raidID < num; raidID++) {
    384 		bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
    385 
    386 		RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
    387 			  (RF_Raid_t *));
    388 		if (raidPtrs[raidID] == NULL) {
    389 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    390 			numraid = raidID;
    391 			return;
    392 		}
    393 	}
    394 
    395 	if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
    396 		printf("config_cfattach_attach failed?\n");
    397 	}
    398 
    399 #ifdef RAID_AUTOCONFIG
    400 	raidautoconfig = 1;
    401 #endif
    402 
    403 	/*
    404 	 * Register a finalizer which will be used to auto-config RAID
    405 	 * sets once all real hardware devices have been found.
    406 	 */
    407 	if (config_finalize_register(NULL, rf_autoconfig) != 0)
    408 		printf("WARNING: unable to register RAIDframe finalizer\n");
    409 }
    410 
    411 int
    412 rf_autoconfig(struct device *self)
    413 {
    414 	RF_AutoConfig_t *ac_list;
    415 	RF_ConfigSet_t *config_sets;
    416 	int i;
    417 
    418 	if (raidautoconfig == 0)
    419 		return (0);
    420 
    421 	/* XXX This code can only be run once. */
    422 	raidautoconfig = 0;
    423 
    424 	/* 1. locate all RAID components on the system */
    425 #ifdef DEBUG
    426 	printf("Searching for RAID components...\n");
    427 #endif
    428 	ac_list = rf_find_raid_components();
    429 
    430 	/* 2. Sort them into their respective sets. */
    431 	config_sets = rf_create_auto_sets(ac_list);
    432 
    433 	/*
    434 	 * 3. Evaluate each set andconfigure the valid ones.
    435 	 * This gets done in rf_buildroothack().
    436 	 */
    437 	rf_buildroothack(config_sets);
    438 
    439 	for (i = 0; i < numraid; i++)
    440 		if (raidPtrs[i] != NULL && raidPtrs[i]->valid)
    441 			dkwedge_discover(&raid_softc[i].sc_dkdev);
    442 
    443 	return 1;
    444 }
    445 
    446 void
    447 rf_buildroothack(RF_ConfigSet_t *config_sets)
    448 {
    449 	RF_ConfigSet_t *cset;
    450 	RF_ConfigSet_t *next_cset;
    451 	int retcode;
    452 	int raidID;
    453 	int rootID;
    454 	int num_root;
    455 
    456 	rootID = 0;
    457 	num_root = 0;
    458 	cset = config_sets;
    459 	while(cset != NULL ) {
    460 		next_cset = cset->next;
    461 		if (rf_have_enough_components(cset) &&
    462 		    cset->ac->clabel->autoconfigure==1) {
    463 			retcode = rf_auto_config_set(cset,&raidID);
    464 			if (!retcode) {
    465 #ifdef DEBUG
    466 				printf("raid%d: configured ok\n", raidID);
    467 #endif
    468 				if (cset->rootable) {
    469 					rootID = raidID;
    470 					num_root++;
    471 				}
    472 			} else {
    473 				/* The autoconfig didn't work :( */
    474 #ifdef DEBUG
    475 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    476 #endif
    477 				rf_release_all_vps(cset);
    478 			}
    479 		} else {
    480 #ifdef DEBUG
    481 			printf("raid%d: not enough components\n", raidID);
    482 #endif
    483 			/* we're not autoconfiguring this set...
    484 			   release the associated resources */
    485 			rf_release_all_vps(cset);
    486 		}
    487 		/* cleanup */
    488 		rf_cleanup_config_set(cset);
    489 		cset = next_cset;
    490 	}
    491 
    492 	/* if the user has specified what the root device should be
    493 	   then we don't touch booted_device or boothowto... */
    494 
    495 	if (rootspec != NULL)
    496 		return;
    497 
    498 	/* we found something bootable... */
    499 
    500 	if (num_root == 1) {
    501 		booted_device = raid_softc[rootID].sc_dev;
    502 	} else if (num_root > 1) {
    503 		/* we can't guess.. require the user to answer... */
    504 		boothowto |= RB_ASKNAME;
    505 	}
    506 }
    507 
    508 
    509 int
    510 raidsize(dev_t dev)
    511 {
    512 	struct raid_softc *rs;
    513 	struct disklabel *lp;
    514 	int     part, unit, omask, size;
    515 
    516 	unit = raidunit(dev);
    517 	if (unit >= numraid)
    518 		return (-1);
    519 	rs = &raid_softc[unit];
    520 
    521 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    522 		return (-1);
    523 
    524 	part = DISKPART(dev);
    525 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    526 	lp = rs->sc_dkdev.dk_label;
    527 
    528 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
    529 		return (-1);
    530 
    531 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    532 		size = -1;
    533 	else
    534 		size = lp->d_partitions[part].p_size *
    535 		    (lp->d_secsize / DEV_BSIZE);
    536 
    537 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
    538 		return (-1);
    539 
    540 	return (size);
    541 
    542 }
    543 
    544 int
    545 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
    546 {
    547 	int     unit = raidunit(dev);
    548 	struct raid_softc *rs;
    549 	const struct bdevsw *bdev;
    550 	struct disklabel *lp;
    551 	RF_Raid_t *raidPtr;
    552 	daddr_t offset;
    553 	int     part, c, sparecol, j, scol, dumpto;
    554 	int     error = 0;
    555 
    556 	if (unit >= numraid)
    557 		return (ENXIO);
    558 
    559 	rs = &raid_softc[unit];
    560 	raidPtr = raidPtrs[unit];
    561 
    562 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    563 		return ENXIO;
    564 
    565 	/* we only support dumping to RAID 1 sets */
    566 	if (raidPtr->Layout.numDataCol != 1 ||
    567 	    raidPtr->Layout.numParityCol != 1)
    568 		return EINVAL;
    569 
    570 
    571 	if ((error = raidlock(rs)) != 0)
    572 		return error;
    573 
    574 	if (size % DEV_BSIZE != 0) {
    575 		error = EINVAL;
    576 		goto out;
    577 	}
    578 
    579 	if (blkno + size / DEV_BSIZE > rs->sc_size) {
    580 		printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
    581 		    "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
    582 		    size / DEV_BSIZE, rs->sc_size);
    583 		error = EINVAL;
    584 		goto out;
    585 	}
    586 
    587 	part = DISKPART(dev);
    588 	lp = rs->sc_dkdev.dk_label;
    589 	offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
    590 
    591 	/* figure out what device is alive.. */
    592 
    593 	/*
    594 	   Look for a component to dump to.  The preference for the
    595 	   component to dump to is as follows:
    596 	   1) the master
    597 	   2) a used_spare of the master
    598 	   3) the slave
    599 	   4) a used_spare of the slave
    600 	*/
    601 
    602 	dumpto = -1;
    603 	for (c = 0; c < raidPtr->numCol; c++) {
    604 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
    605 			/* this might be the one */
    606 			dumpto = c;
    607 			break;
    608 		}
    609 	}
    610 
    611 	/*
    612 	   At this point we have possibly selected a live master or a
    613 	   live slave.  We now check to see if there is a spared
    614 	   master (or a spared slave), if we didn't find a live master
    615 	   or a live slave.
    616 	*/
    617 
    618 	for (c = 0; c < raidPtr->numSpare; c++) {
    619 		sparecol = raidPtr->numCol + c;
    620 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
    621 			/* How about this one? */
    622 			scol = -1;
    623 			for(j=0;j<raidPtr->numCol;j++) {
    624 				if (raidPtr->Disks[j].spareCol == sparecol) {
    625 					scol = j;
    626 					break;
    627 				}
    628 			}
    629 			if (scol == 0) {
    630 				/*
    631 				   We must have found a spared master!
    632 				   We'll take that over anything else
    633 				   found so far.  (We couldn't have
    634 				   found a real master before, since
    635 				   this is a used spare, and it's
    636 				   saying that it's replacing the
    637 				   master.)  On reboot (with
    638 				   autoconfiguration turned on)
    639 				   sparecol will become the 1st
    640 				   component (component0) of this set.
    641 				*/
    642 				dumpto = sparecol;
    643 				break;
    644 			} else if (scol != -1) {
    645 				/*
    646 				   Must be a spared slave.  We'll dump
    647 				   to that if we havn't found anything
    648 				   else so far.
    649 				*/
    650 				if (dumpto == -1)
    651 					dumpto = sparecol;
    652 			}
    653 		}
    654 	}
    655 
    656 	if (dumpto == -1) {
    657 		/* we couldn't find any live components to dump to!?!?
    658 		 */
    659 		error = EINVAL;
    660 		goto out;
    661 	}
    662 
    663 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
    664 
    665 	/*
    666 	   Note that blkno is relative to this particular partition.
    667 	   By adding the offset of this partition in the RAID
    668 	   set, and also adding RF_PROTECTED_SECTORS, we get a
    669 	   value that is relative to the partition used for the
    670 	   underlying component.
    671 	*/
    672 
    673 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
    674 				blkno + offset, va, size);
    675 
    676 out:
    677 	raidunlock(rs);
    678 
    679 	return error;
    680 }
    681 /* ARGSUSED */
    682 int
    683 raidopen(dev_t dev, int flags, int fmt,
    684     struct lwp *l)
    685 {
    686 	int     unit = raidunit(dev);
    687 	struct raid_softc *rs;
    688 	struct disklabel *lp;
    689 	int     part, pmask;
    690 	int     error = 0;
    691 
    692 	if (unit >= numraid)
    693 		return (ENXIO);
    694 	rs = &raid_softc[unit];
    695 
    696 	if ((error = raidlock(rs)) != 0)
    697 		return (error);
    698 	lp = rs->sc_dkdev.dk_label;
    699 
    700 	part = DISKPART(dev);
    701 
    702 	/*
    703 	 * If there are wedges, and this is not RAW_PART, then we
    704 	 * need to fail.
    705 	 */
    706 	if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
    707 		error = EBUSY;
    708 		goto bad;
    709 	}
    710 	pmask = (1 << part);
    711 
    712 	if ((rs->sc_flags & RAIDF_INITED) &&
    713 	    (rs->sc_dkdev.dk_openmask == 0))
    714 		raidgetdisklabel(dev);
    715 
    716 	/* make sure that this partition exists */
    717 
    718 	if (part != RAW_PART) {
    719 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    720 		    ((part >= lp->d_npartitions) ||
    721 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    722 			error = ENXIO;
    723 			goto bad;
    724 		}
    725 	}
    726 	/* Prevent this unit from being unconfigured while open. */
    727 	switch (fmt) {
    728 	case S_IFCHR:
    729 		rs->sc_dkdev.dk_copenmask |= pmask;
    730 		break;
    731 
    732 	case S_IFBLK:
    733 		rs->sc_dkdev.dk_bopenmask |= pmask;
    734 		break;
    735 	}
    736 
    737 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    738 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    739 		/* First one... mark things as dirty... Note that we *MUST*
    740 		 have done a configure before this.  I DO NOT WANT TO BE
    741 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    742 		 THAT THEY BELONG TOGETHER!!!!! */
    743 		/* XXX should check to see if we're only open for reading
    744 		   here... If so, we needn't do this, but then need some
    745 		   other way of keeping track of what's happened.. */
    746 
    747 		rf_markalldirty( raidPtrs[unit] );
    748 	}
    749 
    750 
    751 	rs->sc_dkdev.dk_openmask =
    752 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    753 
    754 bad:
    755 	raidunlock(rs);
    756 
    757 	return (error);
    758 
    759 
    760 }
    761 /* ARGSUSED */
    762 int
    763 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
    764 {
    765 	int     unit = raidunit(dev);
    766 	struct cfdata *cf;
    767 	struct raid_softc *rs;
    768 	int     error = 0;
    769 	int     part;
    770 
    771 	if (unit >= numraid)
    772 		return (ENXIO);
    773 	rs = &raid_softc[unit];
    774 
    775 	if ((error = raidlock(rs)) != 0)
    776 		return (error);
    777 
    778 	part = DISKPART(dev);
    779 
    780 	/* ...that much closer to allowing unconfiguration... */
    781 	switch (fmt) {
    782 	case S_IFCHR:
    783 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    784 		break;
    785 
    786 	case S_IFBLK:
    787 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    788 		break;
    789 	}
    790 	rs->sc_dkdev.dk_openmask =
    791 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    792 
    793 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    794 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    795 		/* Last one... device is not unconfigured yet.
    796 		   Device shutdown has taken care of setting the
    797 		   clean bits if RAIDF_INITED is not set
    798 		   mark things as clean... */
    799 
    800 		rf_update_component_labels(raidPtrs[unit],
    801 						 RF_FINAL_COMPONENT_UPDATE);
    802 		if (doing_shutdown) {
    803 			/* last one, and we're going down, so
    804 			   lights out for this RAID set too. */
    805 			error = rf_Shutdown(raidPtrs[unit]);
    806 
    807 			/* It's no longer initialized... */
    808 			rs->sc_flags &= ~RAIDF_INITED;
    809 
    810 			/* detach the device */
    811 
    812 			cf = device_cfdata(rs->sc_dev);
    813 			error = config_detach(rs->sc_dev, DETACH_QUIET);
    814 			free(cf, M_RAIDFRAME);
    815 
    816 			/* Detach the disk. */
    817 			pseudo_disk_detach(&rs->sc_dkdev);
    818 		}
    819 	}
    820 
    821 	raidunlock(rs);
    822 	return (0);
    823 
    824 }
    825 
    826 void
    827 raidstrategy(struct buf *bp)
    828 {
    829 	int s;
    830 
    831 	unsigned int raidID = raidunit(bp->b_dev);
    832 	RF_Raid_t *raidPtr;
    833 	struct raid_softc *rs = &raid_softc[raidID];
    834 	int     wlabel;
    835 
    836 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    837 		bp->b_error = ENXIO;
    838 		bp->b_flags |= B_ERROR;
    839 		goto done;
    840 	}
    841 	if (raidID >= numraid || !raidPtrs[raidID]) {
    842 		bp->b_error = ENODEV;
    843 		bp->b_flags |= B_ERROR;
    844 		goto done;
    845 	}
    846 	raidPtr = raidPtrs[raidID];
    847 	if (!raidPtr->valid) {
    848 		bp->b_error = ENODEV;
    849 		bp->b_flags |= B_ERROR;
    850 		goto done;
    851 	}
    852 	if (bp->b_bcount == 0) {
    853 		db1_printf(("b_bcount is zero..\n"));
    854 		goto done;
    855 	}
    856 
    857 	/*
    858 	 * Do bounds checking and adjust transfer.  If there's an
    859 	 * error, the bounds check will flag that for us.
    860 	 */
    861 
    862 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    863 	if (DISKPART(bp->b_dev) == RAW_PART) {
    864 		uint64_t size; /* device size in DEV_BSIZE unit */
    865 
    866 		if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
    867 			size = raidPtr->totalSectors <<
    868 			    (raidPtr->logBytesPerSector - DEV_BSHIFT);
    869 		} else {
    870 			size = raidPtr->totalSectors >>
    871 			    (DEV_BSHIFT - raidPtr->logBytesPerSector);
    872 		}
    873 		if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
    874 			goto done;
    875 		}
    876 	} else {
    877 		if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
    878 			db1_printf(("Bounds check failed!!:%d %d\n",
    879 				(int) bp->b_blkno, (int) wlabel));
    880 			goto done;
    881 		}
    882 	}
    883 	s = splbio();
    884 
    885 	bp->b_resid = 0;
    886 
    887 	/* stuff it onto our queue */
    888 	BUFQ_PUT(rs->buf_queue, bp);
    889 
    890 	/* scheduled the IO to happen at the next convenient time */
    891 	wakeup(&(raidPtrs[raidID]->iodone));
    892 
    893 	splx(s);
    894 	return;
    895 
    896 done:
    897 	bp->b_resid = bp->b_bcount;
    898 	biodone(bp);
    899 }
    900 /* ARGSUSED */
    901 int
    902 raidread(dev_t dev, struct uio *uio, int flags)
    903 {
    904 	int     unit = raidunit(dev);
    905 	struct raid_softc *rs;
    906 
    907 	if (unit >= numraid)
    908 		return (ENXIO);
    909 	rs = &raid_softc[unit];
    910 
    911 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    912 		return (ENXIO);
    913 
    914 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    915 
    916 }
    917 /* ARGSUSED */
    918 int
    919 raidwrite(dev_t dev, struct uio *uio, int flags)
    920 {
    921 	int     unit = raidunit(dev);
    922 	struct raid_softc *rs;
    923 
    924 	if (unit >= numraid)
    925 		return (ENXIO);
    926 	rs = &raid_softc[unit];
    927 
    928 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    929 		return (ENXIO);
    930 
    931 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    932 
    933 }
    934 
    935 int
    936 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l)
    937 {
    938 	int     unit = raidunit(dev);
    939 	int     error = 0;
    940 	int     part, pmask;
    941 	struct cfdata *cf;
    942 	struct raid_softc *rs;
    943 	RF_Config_t *k_cfg, *u_cfg;
    944 	RF_Raid_t *raidPtr;
    945 	RF_RaidDisk_t *diskPtr;
    946 	RF_AccTotals_t *totals;
    947 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    948 	u_char *specific_buf;
    949 	int retcode = 0;
    950 	int column;
    951 	int raidid;
    952 	struct rf_recon_req *rrcopy, *rr;
    953 	RF_ComponentLabel_t *clabel;
    954 	RF_ComponentLabel_t *ci_label;
    955 	RF_ComponentLabel_t **clabel_ptr;
    956 	RF_SingleComponent_t *sparePtr,*componentPtr;
    957 	RF_SingleComponent_t component;
    958 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    959 	int i, j, d;
    960 #ifdef __HAVE_OLD_DISKLABEL
    961 	struct disklabel newlabel;
    962 #endif
    963 	struct dkwedge_info *dkw;
    964 
    965 	if (unit >= numraid)
    966 		return (ENXIO);
    967 	rs = &raid_softc[unit];
    968 	raidPtr = raidPtrs[unit];
    969 
    970 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    971 		(int) DISKPART(dev), (int) unit, (int) cmd));
    972 
    973 	/* Must be open for writes for these commands... */
    974 	switch (cmd) {
    975 #ifdef DIOCGSECTORSIZE
    976 	case DIOCGSECTORSIZE:
    977 		*(u_int *)data = raidPtr->bytesPerSector;
    978 		return 0;
    979 	case DIOCGMEDIASIZE:
    980 		*(off_t *)data =
    981 		    (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
    982 		return 0;
    983 #endif
    984 	case DIOCSDINFO:
    985 	case DIOCWDINFO:
    986 #ifdef __HAVE_OLD_DISKLABEL
    987 	case ODIOCWDINFO:
    988 	case ODIOCSDINFO:
    989 #endif
    990 	case DIOCWLABEL:
    991 	case DIOCAWEDGE:
    992 	case DIOCDWEDGE:
    993 		if ((flag & FWRITE) == 0)
    994 			return (EBADF);
    995 	}
    996 
    997 	/* Must be initialized for these... */
    998 	switch (cmd) {
    999 	case DIOCGDINFO:
   1000 	case DIOCSDINFO:
   1001 	case DIOCWDINFO:
   1002 #ifdef __HAVE_OLD_DISKLABEL
   1003 	case ODIOCGDINFO:
   1004 	case ODIOCWDINFO:
   1005 	case ODIOCSDINFO:
   1006 	case ODIOCGDEFLABEL:
   1007 #endif
   1008 	case DIOCGPART:
   1009 	case DIOCWLABEL:
   1010 	case DIOCGDEFLABEL:
   1011 	case DIOCAWEDGE:
   1012 	case DIOCDWEDGE:
   1013 	case DIOCLWEDGES:
   1014 	case RAIDFRAME_SHUTDOWN:
   1015 	case RAIDFRAME_REWRITEPARITY:
   1016 	case RAIDFRAME_GET_INFO:
   1017 	case RAIDFRAME_RESET_ACCTOTALS:
   1018 	case RAIDFRAME_GET_ACCTOTALS:
   1019 	case RAIDFRAME_KEEP_ACCTOTALS:
   1020 	case RAIDFRAME_GET_SIZE:
   1021 	case RAIDFRAME_FAIL_DISK:
   1022 	case RAIDFRAME_COPYBACK:
   1023 	case RAIDFRAME_CHECK_RECON_STATUS:
   1024 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1025 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1026 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1027 	case RAIDFRAME_ADD_HOT_SPARE:
   1028 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1029 	case RAIDFRAME_INIT_LABELS:
   1030 	case RAIDFRAME_REBUILD_IN_PLACE:
   1031 	case RAIDFRAME_CHECK_PARITY:
   1032 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1033 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1034 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1035 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1036 	case RAIDFRAME_SET_AUTOCONFIG:
   1037 	case RAIDFRAME_SET_ROOT:
   1038 	case RAIDFRAME_DELETE_COMPONENT:
   1039 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1040 		if ((rs->sc_flags & RAIDF_INITED) == 0)
   1041 			return (ENXIO);
   1042 	}
   1043 
   1044 	switch (cmd) {
   1045 
   1046 		/* configure the system */
   1047 	case RAIDFRAME_CONFIGURE:
   1048 
   1049 		if (raidPtr->valid) {
   1050 			/* There is a valid RAID set running on this unit! */
   1051 			printf("raid%d: Device already configured!\n",unit);
   1052 			return(EINVAL);
   1053 		}
   1054 
   1055 		/* copy-in the configuration information */
   1056 		/* data points to a pointer to the configuration structure */
   1057 
   1058 		u_cfg = *((RF_Config_t **) data);
   1059 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
   1060 		if (k_cfg == NULL) {
   1061 			return (ENOMEM);
   1062 		}
   1063 		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
   1064 		if (retcode) {
   1065 			RF_Free(k_cfg, sizeof(RF_Config_t));
   1066 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
   1067 				retcode));
   1068 			return (retcode);
   1069 		}
   1070 		/* allocate a buffer for the layout-specific data, and copy it
   1071 		 * in */
   1072 		if (k_cfg->layoutSpecificSize) {
   1073 			if (k_cfg->layoutSpecificSize > 10000) {
   1074 				/* sanity check */
   1075 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1076 				return (EINVAL);
   1077 			}
   1078 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
   1079 			    (u_char *));
   1080 			if (specific_buf == NULL) {
   1081 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1082 				return (ENOMEM);
   1083 			}
   1084 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
   1085 			    k_cfg->layoutSpecificSize);
   1086 			if (retcode) {
   1087 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1088 				RF_Free(specific_buf,
   1089 					k_cfg->layoutSpecificSize);
   1090 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
   1091 					retcode));
   1092 				return (retcode);
   1093 			}
   1094 		} else
   1095 			specific_buf = NULL;
   1096 		k_cfg->layoutSpecific = specific_buf;
   1097 
   1098 		/* should do some kind of sanity check on the configuration.
   1099 		 * Store the sum of all the bytes in the last byte? */
   1100 
   1101 		/* configure the system */
   1102 
   1103 		/*
   1104 		 * Clear the entire RAID descriptor, just to make sure
   1105 		 *  there is no stale data left in the case of a
   1106 		 *  reconfiguration
   1107 		 */
   1108 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
   1109 		raidPtr->raidid = unit;
   1110 
   1111 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
   1112 
   1113 		if (retcode == 0) {
   1114 
   1115 			/* allow this many simultaneous IO's to
   1116 			   this RAID device */
   1117 			raidPtr->openings = RAIDOUTSTANDING;
   1118 
   1119 			raidinit(raidPtr);
   1120 			rf_markalldirty(raidPtr);
   1121 		}
   1122 		/* free the buffers.  No return code here. */
   1123 		if (k_cfg->layoutSpecificSize) {
   1124 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
   1125 		}
   1126 		RF_Free(k_cfg, sizeof(RF_Config_t));
   1127 
   1128 		return (retcode);
   1129 
   1130 		/* shutdown the system */
   1131 	case RAIDFRAME_SHUTDOWN:
   1132 
   1133 		if ((error = raidlock(rs)) != 0)
   1134 			return (error);
   1135 
   1136 		/*
   1137 		 * If somebody has a partition mounted, we shouldn't
   1138 		 * shutdown.
   1139 		 */
   1140 
   1141 		part = DISKPART(dev);
   1142 		pmask = (1 << part);
   1143 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
   1144 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
   1145 			(rs->sc_dkdev.dk_copenmask & pmask))) {
   1146 			raidunlock(rs);
   1147 			return (EBUSY);
   1148 		}
   1149 
   1150 		retcode = rf_Shutdown(raidPtr);
   1151 
   1152 		/* It's no longer initialized... */
   1153 		rs->sc_flags &= ~RAIDF_INITED;
   1154 
   1155 		/* free the pseudo device attach bits */
   1156 
   1157 		cf = device_cfdata(rs->sc_dev);
   1158 		/* XXX this causes us to not return any errors
   1159 		   from the above call to rf_Shutdown() */
   1160 		retcode = config_detach(rs->sc_dev, DETACH_QUIET);
   1161 		free(cf, M_RAIDFRAME);
   1162 
   1163 		/* Detach the disk. */
   1164 		pseudo_disk_detach(&rs->sc_dkdev);
   1165 
   1166 		raidunlock(rs);
   1167 
   1168 		return (retcode);
   1169 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1170 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1171 		/* need to read the component label for the disk indicated
   1172 		   by row,column in clabel */
   1173 
   1174 		/* For practice, let's get it directly fromdisk, rather
   1175 		   than from the in-core copy */
   1176 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
   1177 			   (RF_ComponentLabel_t *));
   1178 		if (clabel == NULL)
   1179 			return (ENOMEM);
   1180 
   1181 		retcode = copyin( *clabel_ptr, clabel,
   1182 				  sizeof(RF_ComponentLabel_t));
   1183 
   1184 		if (retcode) {
   1185 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1186 			return(retcode);
   1187 		}
   1188 
   1189 		clabel->row = 0; /* Don't allow looking at anything else.*/
   1190 
   1191 		column = clabel->column;
   1192 
   1193 		if ((column < 0) || (column >= raidPtr->numCol +
   1194 				     raidPtr->numSpare)) {
   1195 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1196 			return(EINVAL);
   1197 		}
   1198 
   1199 		retcode = raidread_component_label(raidPtr->Disks[column].dev,
   1200 				raidPtr->raid_cinfo[column].ci_vp,
   1201 				clabel );
   1202 
   1203 		if (retcode == 0) {
   1204 			retcode = copyout(clabel, *clabel_ptr,
   1205 					  sizeof(RF_ComponentLabel_t));
   1206 		}
   1207 		RF_Free(clabel, sizeof(RF_ComponentLabel_t));
   1208 		return (retcode);
   1209 
   1210 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1211 		clabel = (RF_ComponentLabel_t *) data;
   1212 
   1213 		/* XXX check the label for valid stuff... */
   1214 		/* Note that some things *should not* get modified --
   1215 		   the user should be re-initing the labels instead of
   1216 		   trying to patch things.
   1217 		   */
   1218 
   1219 		raidid = raidPtr->raidid;
   1220 #ifdef DEBUG
   1221 		printf("raid%d: Got component label:\n", raidid);
   1222 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1223 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1224 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1225 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1226 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1227 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1228 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1229 #endif
   1230 		clabel->row = 0;
   1231 		column = clabel->column;
   1232 
   1233 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1234 			return(EINVAL);
   1235 		}
   1236 
   1237 		/* XXX this isn't allowed to do anything for now :-) */
   1238 
   1239 		/* XXX and before it is, we need to fill in the rest
   1240 		   of the fields!?!?!?! */
   1241 #if 0
   1242 		raidwrite_component_label(
   1243 		     raidPtr->Disks[column].dev,
   1244 			    raidPtr->raid_cinfo[column].ci_vp,
   1245 			    clabel );
   1246 #endif
   1247 		return (0);
   1248 
   1249 	case RAIDFRAME_INIT_LABELS:
   1250 		clabel = (RF_ComponentLabel_t *) data;
   1251 		/*
   1252 		   we only want the serial number from
   1253 		   the above.  We get all the rest of the information
   1254 		   from the config that was used to create this RAID
   1255 		   set.
   1256 		   */
   1257 
   1258 		raidPtr->serial_number = clabel->serial_number;
   1259 
   1260 		RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t),
   1261 			  (RF_ComponentLabel_t *));
   1262 		if (ci_label == NULL)
   1263 			return (ENOMEM);
   1264 
   1265 		raid_init_component_label(raidPtr, ci_label);
   1266 		ci_label->serial_number = clabel->serial_number;
   1267 		ci_label->row = 0; /* we dont' pretend to support more */
   1268 
   1269 		for(column=0;column<raidPtr->numCol;column++) {
   1270 			diskPtr = &raidPtr->Disks[column];
   1271 			if (!RF_DEAD_DISK(diskPtr->status)) {
   1272 				ci_label->partitionSize = diskPtr->partitionSize;
   1273 				ci_label->column = column;
   1274 				raidwrite_component_label(
   1275 							  raidPtr->Disks[column].dev,
   1276 							  raidPtr->raid_cinfo[column].ci_vp,
   1277 							  ci_label );
   1278 			}
   1279 		}
   1280 		RF_Free(ci_label, sizeof(RF_ComponentLabel_t));
   1281 
   1282 		return (retcode);
   1283 	case RAIDFRAME_SET_AUTOCONFIG:
   1284 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1285 		printf("raid%d: New autoconfig value is: %d\n",
   1286 		       raidPtr->raidid, d);
   1287 		*(int *) data = d;
   1288 		return (retcode);
   1289 
   1290 	case RAIDFRAME_SET_ROOT:
   1291 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1292 		printf("raid%d: New rootpartition value is: %d\n",
   1293 		       raidPtr->raidid, d);
   1294 		*(int *) data = d;
   1295 		return (retcode);
   1296 
   1297 		/* initialize all parity */
   1298 	case RAIDFRAME_REWRITEPARITY:
   1299 
   1300 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1301 			/* Parity for RAID 0 is trivially correct */
   1302 			raidPtr->parity_good = RF_RAID_CLEAN;
   1303 			return(0);
   1304 		}
   1305 
   1306 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1307 			/* Re-write is already in progress! */
   1308 			return(EINVAL);
   1309 		}
   1310 
   1311 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1312 					   rf_RewriteParityThread,
   1313 					   raidPtr,"raid_parity");
   1314 		return (retcode);
   1315 
   1316 
   1317 	case RAIDFRAME_ADD_HOT_SPARE:
   1318 		sparePtr = (RF_SingleComponent_t *) data;
   1319 		memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
   1320 		retcode = rf_add_hot_spare(raidPtr, &component);
   1321 		return(retcode);
   1322 
   1323 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1324 		return(retcode);
   1325 
   1326 	case RAIDFRAME_DELETE_COMPONENT:
   1327 		componentPtr = (RF_SingleComponent_t *)data;
   1328 		memcpy( &component, componentPtr,
   1329 			sizeof(RF_SingleComponent_t));
   1330 		retcode = rf_delete_component(raidPtr, &component);
   1331 		return(retcode);
   1332 
   1333 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1334 		componentPtr = (RF_SingleComponent_t *)data;
   1335 		memcpy( &component, componentPtr,
   1336 			sizeof(RF_SingleComponent_t));
   1337 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1338 		return(retcode);
   1339 
   1340 	case RAIDFRAME_REBUILD_IN_PLACE:
   1341 
   1342 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1343 			/* Can't do this on a RAID 0!! */
   1344 			return(EINVAL);
   1345 		}
   1346 
   1347 		if (raidPtr->recon_in_progress == 1) {
   1348 			/* a reconstruct is already in progress! */
   1349 			return(EINVAL);
   1350 		}
   1351 
   1352 		componentPtr = (RF_SingleComponent_t *) data;
   1353 		memcpy( &component, componentPtr,
   1354 			sizeof(RF_SingleComponent_t));
   1355 		component.row = 0; /* we don't support any more */
   1356 		column = component.column;
   1357 
   1358 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1359 			return(EINVAL);
   1360 		}
   1361 
   1362 		RF_LOCK_MUTEX(raidPtr->mutex);
   1363 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
   1364 		    (raidPtr->numFailures > 0)) {
   1365 			/* XXX 0 above shouldn't be constant!!! */
   1366 			/* some component other than this has failed.
   1367 			   Let's not make things worse than they already
   1368 			   are... */
   1369 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1370 			       raidPtr->raidid);
   1371 			printf("raid%d:     Col: %d   Too many failures.\n",
   1372 			       raidPtr->raidid, column);
   1373 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1374 			return (EINVAL);
   1375 		}
   1376 		if (raidPtr->Disks[column].status ==
   1377 		    rf_ds_reconstructing) {
   1378 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1379 			       raidPtr->raidid);
   1380 			printf("raid%d:    Col: %d   Reconstruction already occuring!\n", raidPtr->raidid, column);
   1381 
   1382 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1383 			return (EINVAL);
   1384 		}
   1385 		if (raidPtr->Disks[column].status == rf_ds_spared) {
   1386 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1387 			return (EINVAL);
   1388 		}
   1389 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1390 
   1391 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1392 		if (rrcopy == NULL)
   1393 			return(ENOMEM);
   1394 
   1395 		rrcopy->raidPtr = (void *) raidPtr;
   1396 		rrcopy->col = column;
   1397 
   1398 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1399 					   rf_ReconstructInPlaceThread,
   1400 					   rrcopy,"raid_reconip");
   1401 		return(retcode);
   1402 
   1403 	case RAIDFRAME_GET_INFO:
   1404 		if (!raidPtr->valid)
   1405 			return (ENODEV);
   1406 		ucfgp = (RF_DeviceConfig_t **) data;
   1407 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1408 			  (RF_DeviceConfig_t *));
   1409 		if (d_cfg == NULL)
   1410 			return (ENOMEM);
   1411 		d_cfg->rows = 1; /* there is only 1 row now */
   1412 		d_cfg->cols = raidPtr->numCol;
   1413 		d_cfg->ndevs = raidPtr->numCol;
   1414 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1415 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1416 			return (ENOMEM);
   1417 		}
   1418 		d_cfg->nspares = raidPtr->numSpare;
   1419 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1420 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1421 			return (ENOMEM);
   1422 		}
   1423 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1424 		d = 0;
   1425 		for (j = 0; j < d_cfg->cols; j++) {
   1426 			d_cfg->devs[d] = raidPtr->Disks[j];
   1427 			d++;
   1428 		}
   1429 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1430 			d_cfg->spares[i] = raidPtr->Disks[j];
   1431 		}
   1432 		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
   1433 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1434 
   1435 		return (retcode);
   1436 
   1437 	case RAIDFRAME_CHECK_PARITY:
   1438 		*(int *) data = raidPtr->parity_good;
   1439 		return (0);
   1440 
   1441 	case RAIDFRAME_RESET_ACCTOTALS:
   1442 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1443 		return (0);
   1444 
   1445 	case RAIDFRAME_GET_ACCTOTALS:
   1446 		totals = (RF_AccTotals_t *) data;
   1447 		*totals = raidPtr->acc_totals;
   1448 		return (0);
   1449 
   1450 	case RAIDFRAME_KEEP_ACCTOTALS:
   1451 		raidPtr->keep_acc_totals = *(int *)data;
   1452 		return (0);
   1453 
   1454 	case RAIDFRAME_GET_SIZE:
   1455 		*(int *) data = raidPtr->totalSectors;
   1456 		return (0);
   1457 
   1458 		/* fail a disk & optionally start reconstruction */
   1459 	case RAIDFRAME_FAIL_DISK:
   1460 
   1461 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1462 			/* Can't do this on a RAID 0!! */
   1463 			return(EINVAL);
   1464 		}
   1465 
   1466 		rr = (struct rf_recon_req *) data;
   1467 		rr->row = 0;
   1468 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
   1469 			return (EINVAL);
   1470 
   1471 
   1472 		RF_LOCK_MUTEX(raidPtr->mutex);
   1473 		if (raidPtr->status == rf_rs_reconstructing) {
   1474 			/* you can't fail a disk while we're reconstructing! */
   1475 			/* XXX wrong for RAID6 */
   1476 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1477 			return (EINVAL);
   1478 		}
   1479 		if ((raidPtr->Disks[rr->col].status ==
   1480 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
   1481 			/* some other component has failed.  Let's not make
   1482 			   things worse. XXX wrong for RAID6 */
   1483 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1484 			return (EINVAL);
   1485 		}
   1486 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
   1487 			/* Can't fail a spared disk! */
   1488 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1489 			return (EINVAL);
   1490 		}
   1491 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1492 
   1493 		/* make a copy of the recon request so that we don't rely on
   1494 		 * the user's buffer */
   1495 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1496 		if (rrcopy == NULL)
   1497 			return(ENOMEM);
   1498 		memcpy(rrcopy, rr, sizeof(*rr));
   1499 		rrcopy->raidPtr = (void *) raidPtr;
   1500 
   1501 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1502 					   rf_ReconThread,
   1503 					   rrcopy,"raid_recon");
   1504 		return (0);
   1505 
   1506 		/* invoke a copyback operation after recon on whatever disk
   1507 		 * needs it, if any */
   1508 	case RAIDFRAME_COPYBACK:
   1509 
   1510 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1511 			/* This makes no sense on a RAID 0!! */
   1512 			return(EINVAL);
   1513 		}
   1514 
   1515 		if (raidPtr->copyback_in_progress == 1) {
   1516 			/* Copyback is already in progress! */
   1517 			return(EINVAL);
   1518 		}
   1519 
   1520 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1521 					   rf_CopybackThread,
   1522 					   raidPtr,"raid_copyback");
   1523 		return (retcode);
   1524 
   1525 		/* return the percentage completion of reconstruction */
   1526 	case RAIDFRAME_CHECK_RECON_STATUS:
   1527 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1528 			/* This makes no sense on a RAID 0, so tell the
   1529 			   user it's done. */
   1530 			*(int *) data = 100;
   1531 			return(0);
   1532 		}
   1533 		if (raidPtr->status != rf_rs_reconstructing)
   1534 			*(int *) data = 100;
   1535 		else {
   1536 			if (raidPtr->reconControl->numRUsTotal > 0) {
   1537 				*(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
   1538 			} else {
   1539 				*(int *) data = 0;
   1540 			}
   1541 		}
   1542 		return (0);
   1543 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1544 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1545 		if (raidPtr->status != rf_rs_reconstructing) {
   1546 			progressInfo.remaining = 0;
   1547 			progressInfo.completed = 100;
   1548 			progressInfo.total = 100;
   1549 		} else {
   1550 			progressInfo.total =
   1551 				raidPtr->reconControl->numRUsTotal;
   1552 			progressInfo.completed =
   1553 				raidPtr->reconControl->numRUsComplete;
   1554 			progressInfo.remaining = progressInfo.total -
   1555 				progressInfo.completed;
   1556 		}
   1557 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1558 				  sizeof(RF_ProgressInfo_t));
   1559 		return (retcode);
   1560 
   1561 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1562 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1563 			/* This makes no sense on a RAID 0, so tell the
   1564 			   user it's done. */
   1565 			*(int *) data = 100;
   1566 			return(0);
   1567 		}
   1568 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1569 			*(int *) data = 100 *
   1570 				raidPtr->parity_rewrite_stripes_done /
   1571 				raidPtr->Layout.numStripe;
   1572 		} else {
   1573 			*(int *) data = 100;
   1574 		}
   1575 		return (0);
   1576 
   1577 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1578 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1579 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1580 			progressInfo.total = raidPtr->Layout.numStripe;
   1581 			progressInfo.completed =
   1582 				raidPtr->parity_rewrite_stripes_done;
   1583 			progressInfo.remaining = progressInfo.total -
   1584 				progressInfo.completed;
   1585 		} else {
   1586 			progressInfo.remaining = 0;
   1587 			progressInfo.completed = 100;
   1588 			progressInfo.total = 100;
   1589 		}
   1590 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1591 				  sizeof(RF_ProgressInfo_t));
   1592 		return (retcode);
   1593 
   1594 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1595 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1596 			/* This makes no sense on a RAID 0 */
   1597 			*(int *) data = 100;
   1598 			return(0);
   1599 		}
   1600 		if (raidPtr->copyback_in_progress == 1) {
   1601 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1602 				raidPtr->Layout.numStripe;
   1603 		} else {
   1604 			*(int *) data = 100;
   1605 		}
   1606 		return (0);
   1607 
   1608 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1609 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1610 		if (raidPtr->copyback_in_progress == 1) {
   1611 			progressInfo.total = raidPtr->Layout.numStripe;
   1612 			progressInfo.completed =
   1613 				raidPtr->copyback_stripes_done;
   1614 			progressInfo.remaining = progressInfo.total -
   1615 				progressInfo.completed;
   1616 		} else {
   1617 			progressInfo.remaining = 0;
   1618 			progressInfo.completed = 100;
   1619 			progressInfo.total = 100;
   1620 		}
   1621 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1622 				  sizeof(RF_ProgressInfo_t));
   1623 		return (retcode);
   1624 
   1625 		/* the sparetable daemon calls this to wait for the kernel to
   1626 		 * need a spare table. this ioctl does not return until a
   1627 		 * spare table is needed. XXX -- calling mpsleep here in the
   1628 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1629 		 * -- I should either compute the spare table in the kernel,
   1630 		 * or have a different -- XXX XXX -- interface (a different
   1631 		 * character device) for delivering the table     -- XXX */
   1632 #if 0
   1633 	case RAIDFRAME_SPARET_WAIT:
   1634 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1635 		while (!rf_sparet_wait_queue)
   1636 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1637 		waitreq = rf_sparet_wait_queue;
   1638 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1639 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1640 
   1641 		/* structure assignment */
   1642 		*((RF_SparetWait_t *) data) = *waitreq;
   1643 
   1644 		RF_Free(waitreq, sizeof(*waitreq));
   1645 		return (0);
   1646 
   1647 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1648 		 * code in it that will cause the dameon to exit */
   1649 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1650 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1651 		waitreq->fcol = -1;
   1652 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1653 		waitreq->next = rf_sparet_wait_queue;
   1654 		rf_sparet_wait_queue = waitreq;
   1655 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1656 		wakeup(&rf_sparet_wait_queue);
   1657 		return (0);
   1658 
   1659 		/* used by the spare table daemon to deliver a spare table
   1660 		 * into the kernel */
   1661 	case RAIDFRAME_SEND_SPARET:
   1662 
   1663 		/* install the spare table */
   1664 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1665 
   1666 		/* respond to the requestor.  the return status of the spare
   1667 		 * table installation is passed in the "fcol" field */
   1668 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1669 		waitreq->fcol = retcode;
   1670 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1671 		waitreq->next = rf_sparet_resp_queue;
   1672 		rf_sparet_resp_queue = waitreq;
   1673 		wakeup(&rf_sparet_resp_queue);
   1674 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1675 
   1676 		return (retcode);
   1677 #endif
   1678 
   1679 	default:
   1680 		break; /* fall through to the os-specific code below */
   1681 
   1682 	}
   1683 
   1684 	if (!raidPtr->valid)
   1685 		return (EINVAL);
   1686 
   1687 	/*
   1688 	 * Add support for "regular" device ioctls here.
   1689 	 */
   1690 
   1691 	switch (cmd) {
   1692 	case DIOCGDINFO:
   1693 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1694 		break;
   1695 #ifdef __HAVE_OLD_DISKLABEL
   1696 	case ODIOCGDINFO:
   1697 		newlabel = *(rs->sc_dkdev.dk_label);
   1698 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1699 			return ENOTTY;
   1700 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1701 		break;
   1702 #endif
   1703 
   1704 	case DIOCGPART:
   1705 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1706 		((struct partinfo *) data)->part =
   1707 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1708 		break;
   1709 
   1710 	case DIOCWDINFO:
   1711 	case DIOCSDINFO:
   1712 #ifdef __HAVE_OLD_DISKLABEL
   1713 	case ODIOCWDINFO:
   1714 	case ODIOCSDINFO:
   1715 #endif
   1716 	{
   1717 		struct disklabel *lp;
   1718 #ifdef __HAVE_OLD_DISKLABEL
   1719 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1720 			memset(&newlabel, 0, sizeof newlabel);
   1721 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1722 			lp = &newlabel;
   1723 		} else
   1724 #endif
   1725 		lp = (struct disklabel *)data;
   1726 
   1727 		if ((error = raidlock(rs)) != 0)
   1728 			return (error);
   1729 
   1730 		rs->sc_flags |= RAIDF_LABELLING;
   1731 
   1732 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1733 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1734 		if (error == 0) {
   1735 			if (cmd == DIOCWDINFO
   1736 #ifdef __HAVE_OLD_DISKLABEL
   1737 			    || cmd == ODIOCWDINFO
   1738 #endif
   1739 			   )
   1740 				error = writedisklabel(RAIDLABELDEV(dev),
   1741 				    raidstrategy, rs->sc_dkdev.dk_label,
   1742 				    rs->sc_dkdev.dk_cpulabel);
   1743 		}
   1744 		rs->sc_flags &= ~RAIDF_LABELLING;
   1745 
   1746 		raidunlock(rs);
   1747 
   1748 		if (error)
   1749 			return (error);
   1750 		break;
   1751 	}
   1752 
   1753 	case DIOCWLABEL:
   1754 		if (*(int *) data != 0)
   1755 			rs->sc_flags |= RAIDF_WLABEL;
   1756 		else
   1757 			rs->sc_flags &= ~RAIDF_WLABEL;
   1758 		break;
   1759 
   1760 	case DIOCGDEFLABEL:
   1761 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1762 		break;
   1763 
   1764 #ifdef __HAVE_OLD_DISKLABEL
   1765 	case ODIOCGDEFLABEL:
   1766 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1767 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1768 			return ENOTTY;
   1769 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1770 		break;
   1771 #endif
   1772 
   1773 	case DIOCAWEDGE:
   1774 	case DIOCDWEDGE:
   1775 	    	dkw = (void *)data;
   1776 
   1777 		/* If the ioctl happens here, the parent is us. */
   1778 		(void)strcpy(dkw->dkw_parent, rs->sc_xname);
   1779 		return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
   1780 
   1781 	case DIOCLWEDGES:
   1782 		return dkwedge_list(&rs->sc_dkdev,
   1783 		    (struct dkwedge_list *)data, l);
   1784 
   1785 	default:
   1786 		retcode = ENOTTY;
   1787 	}
   1788 	return (retcode);
   1789 
   1790 }
   1791 
   1792 
   1793 /* raidinit -- complete the rest of the initialization for the
   1794    RAIDframe device.  */
   1795 
   1796 
   1797 static void
   1798 raidinit(RF_Raid_t *raidPtr)
   1799 {
   1800 	struct cfdata *cf;
   1801 	struct raid_softc *rs;
   1802 	int     unit;
   1803 
   1804 	unit = raidPtr->raidid;
   1805 
   1806 	rs = &raid_softc[unit];
   1807 
   1808 	/* XXX should check return code first... */
   1809 	rs->sc_flags |= RAIDF_INITED;
   1810 
   1811 	/* XXX doesn't check bounds. */
   1812 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
   1813 
   1814 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1815 
   1816 	/* attach the pseudo device */
   1817 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
   1818 	cf->cf_name = raid_cd.cd_name;
   1819 	cf->cf_atname = raid_cd.cd_name;
   1820 	cf->cf_unit = unit;
   1821 	cf->cf_fstate = FSTATE_STAR;
   1822 
   1823 	rs->sc_dev = config_attach_pseudo(cf);
   1824 
   1825 	if (rs->sc_dev==NULL) {
   1826 		printf("raid%d: config_attach_pseudo failed\n",
   1827 		       raidPtr->raidid);
   1828 	}
   1829 
   1830 	/* disk_attach actually creates space for the CPU disklabel, among
   1831 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1832 	 * with disklabels. */
   1833 
   1834 	disk_attach(&rs->sc_dkdev);
   1835 
   1836 	/* XXX There may be a weird interaction here between this, and
   1837 	 * protectedSectors, as used in RAIDframe.  */
   1838 
   1839 	rs->sc_size = raidPtr->totalSectors;
   1840 }
   1841 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   1842 /* wake up the daemon & tell it to get us a spare table
   1843  * XXX
   1844  * the entries in the queues should be tagged with the raidPtr
   1845  * so that in the extremely rare case that two recons happen at once,
   1846  * we know for which device were requesting a spare table
   1847  * XXX
   1848  *
   1849  * XXX This code is not currently used. GO
   1850  */
   1851 int
   1852 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
   1853 {
   1854 	int     retcode;
   1855 
   1856 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1857 	req->next = rf_sparet_wait_queue;
   1858 	rf_sparet_wait_queue = req;
   1859 	wakeup(&rf_sparet_wait_queue);
   1860 
   1861 	/* mpsleep unlocks the mutex */
   1862 	while (!rf_sparet_resp_queue) {
   1863 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1864 		    "raidframe getsparetable", 0);
   1865 	}
   1866 	req = rf_sparet_resp_queue;
   1867 	rf_sparet_resp_queue = req->next;
   1868 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1869 
   1870 	retcode = req->fcol;
   1871 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1872 					 * alloc'd */
   1873 	return (retcode);
   1874 }
   1875 #endif
   1876 
   1877 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1878  * bp & passes it down.
   1879  * any calls originating in the kernel must use non-blocking I/O
   1880  * do some extra sanity checking to return "appropriate" error values for
   1881  * certain conditions (to make some standard utilities work)
   1882  *
   1883  * Formerly known as: rf_DoAccessKernel
   1884  */
   1885 void
   1886 raidstart(RF_Raid_t *raidPtr)
   1887 {
   1888 	RF_SectorCount_t num_blocks, pb, sum;
   1889 	RF_RaidAddr_t raid_addr;
   1890 	struct partition *pp;
   1891 	daddr_t blocknum;
   1892 	int     unit;
   1893 	struct raid_softc *rs;
   1894 	int     do_async;
   1895 	struct buf *bp;
   1896 	int rc;
   1897 
   1898 	unit = raidPtr->raidid;
   1899 	rs = &raid_softc[unit];
   1900 
   1901 	/* quick check to see if anything has died recently */
   1902 	RF_LOCK_MUTEX(raidPtr->mutex);
   1903 	if (raidPtr->numNewFailures > 0) {
   1904 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1905 		rf_update_component_labels(raidPtr,
   1906 					   RF_NORMAL_COMPONENT_UPDATE);
   1907 		RF_LOCK_MUTEX(raidPtr->mutex);
   1908 		raidPtr->numNewFailures--;
   1909 	}
   1910 
   1911 	/* Check to see if we're at the limit... */
   1912 	while (raidPtr->openings > 0) {
   1913 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1914 
   1915 		/* get the next item, if any, from the queue */
   1916 		if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
   1917 			/* nothing more to do */
   1918 			return;
   1919 		}
   1920 
   1921 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1922 		 * partition.. Need to make it absolute to the underlying
   1923 		 * device.. */
   1924 
   1925 		blocknum = bp->b_blkno;
   1926 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1927 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1928 			blocknum += pp->p_offset;
   1929 		}
   1930 
   1931 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1932 			    (int) blocknum));
   1933 
   1934 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1935 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1936 
   1937 		/* *THIS* is where we adjust what block we're going to...
   1938 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1939 		raid_addr = blocknum;
   1940 
   1941 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1942 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1943 		sum = raid_addr + num_blocks + pb;
   1944 		if (1 || rf_debugKernelAccess) {
   1945 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1946 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1947 				    (int) pb, (int) bp->b_resid));
   1948 		}
   1949 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1950 		    || (sum < num_blocks) || (sum < pb)) {
   1951 			bp->b_error = ENOSPC;
   1952 			bp->b_flags |= B_ERROR;
   1953 			bp->b_resid = bp->b_bcount;
   1954 			biodone(bp);
   1955 			RF_LOCK_MUTEX(raidPtr->mutex);
   1956 			continue;
   1957 		}
   1958 		/*
   1959 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1960 		 */
   1961 
   1962 		if (bp->b_bcount & raidPtr->sectorMask) {
   1963 			bp->b_error = EINVAL;
   1964 			bp->b_flags |= B_ERROR;
   1965 			bp->b_resid = bp->b_bcount;
   1966 			biodone(bp);
   1967 			RF_LOCK_MUTEX(raidPtr->mutex);
   1968 			continue;
   1969 
   1970 		}
   1971 		db1_printf(("Calling DoAccess..\n"));
   1972 
   1973 
   1974 		RF_LOCK_MUTEX(raidPtr->mutex);
   1975 		raidPtr->openings--;
   1976 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1977 
   1978 		/*
   1979 		 * Everything is async.
   1980 		 */
   1981 		do_async = 1;
   1982 
   1983 		disk_busy(&rs->sc_dkdev);
   1984 
   1985 		/* XXX we're still at splbio() here... do we *really*
   1986 		   need to be? */
   1987 
   1988 		/* don't ever condition on bp->b_flags & B_WRITE.
   1989 		 * always condition on B_READ instead */
   1990 
   1991 		rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1992 				 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1993 				 do_async, raid_addr, num_blocks,
   1994 				 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1995 
   1996 		if (rc) {
   1997 			bp->b_error = rc;
   1998 			bp->b_flags |= B_ERROR;
   1999 			bp->b_resid = bp->b_bcount;
   2000 			biodone(bp);
   2001 			/* continue loop */
   2002 		}
   2003 
   2004 		RF_LOCK_MUTEX(raidPtr->mutex);
   2005 	}
   2006 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   2007 }
   2008 
   2009 
   2010 
   2011 
   2012 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   2013 
   2014 int
   2015 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
   2016 {
   2017 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   2018 	struct buf *bp;
   2019 
   2020 	req->queue = queue;
   2021 
   2022 #if DIAGNOSTIC
   2023 	if (queue->raidPtr->raidid >= numraid) {
   2024 		printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
   2025 		    numraid);
   2026 		panic("Invalid Unit number in rf_DispatchKernelIO");
   2027 	}
   2028 #endif
   2029 
   2030 	bp = req->bp;
   2031 
   2032 	switch (req->type) {
   2033 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   2034 		/* XXX need to do something extra here.. */
   2035 		/* I'm leaving this in, as I've never actually seen it used,
   2036 		 * and I'd like folks to report it... GO */
   2037 		printf(("WAKEUP CALLED\n"));
   2038 		queue->numOutstanding++;
   2039 
   2040 		bp->b_flags = 0;
   2041 		bp->b_private = req;
   2042 
   2043 		KernelWakeupFunc(bp);
   2044 		break;
   2045 
   2046 	case RF_IO_TYPE_READ:
   2047 	case RF_IO_TYPE_WRITE:
   2048 #if RF_ACC_TRACE > 0
   2049 		if (req->tracerec) {
   2050 			RF_ETIMER_START(req->tracerec->timer);
   2051 		}
   2052 #endif
   2053 		InitBP(bp, queue->rf_cinfo->ci_vp,
   2054 		    op, queue->rf_cinfo->ci_dev,
   2055 		    req->sectorOffset, req->numSector,
   2056 		    req->buf, KernelWakeupFunc, (void *) req,
   2057 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   2058 
   2059 		if (rf_debugKernelAccess) {
   2060 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   2061 				(long) bp->b_blkno));
   2062 		}
   2063 		queue->numOutstanding++;
   2064 		queue->last_deq_sector = req->sectorOffset;
   2065 		/* acc wouldn't have been let in if there were any pending
   2066 		 * reqs at any other priority */
   2067 		queue->curPriority = req->priority;
   2068 
   2069 		db1_printf(("Going for %c to unit %d col %d\n",
   2070 			    req->type, queue->raidPtr->raidid,
   2071 			    queue->col));
   2072 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   2073 			(int) req->sectorOffset, (int) req->numSector,
   2074 			(int) (req->numSector <<
   2075 			    queue->raidPtr->logBytesPerSector),
   2076 			(int) queue->raidPtr->logBytesPerSector));
   2077 		VOP_STRATEGY(bp->b_vp, bp);
   2078 
   2079 		break;
   2080 
   2081 	default:
   2082 		panic("bad req->type in rf_DispatchKernelIO");
   2083 	}
   2084 	db1_printf(("Exiting from DispatchKernelIO\n"));
   2085 
   2086 	return (0);
   2087 }
   2088 /* this is the callback function associated with a I/O invoked from
   2089    kernel code.
   2090  */
   2091 static void
   2092 KernelWakeupFunc(struct buf *bp)
   2093 {
   2094 	RF_DiskQueueData_t *req = NULL;
   2095 	RF_DiskQueue_t *queue;
   2096 	int s;
   2097 
   2098 	s = splbio();
   2099 	db1_printf(("recovering the request queue:\n"));
   2100 	req = bp->b_private;
   2101 
   2102 	queue = (RF_DiskQueue_t *) req->queue;
   2103 
   2104 #if RF_ACC_TRACE > 0
   2105 	if (req->tracerec) {
   2106 		RF_ETIMER_STOP(req->tracerec->timer);
   2107 		RF_ETIMER_EVAL(req->tracerec->timer);
   2108 		RF_LOCK_MUTEX(rf_tracing_mutex);
   2109 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2110 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2111 		req->tracerec->num_phys_ios++;
   2112 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   2113 	}
   2114 #endif
   2115 
   2116 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   2117 	 * ballistic, and mark the component as hosed... */
   2118 
   2119 	if (bp->b_flags & B_ERROR) {
   2120 		/* Mark the disk as dead */
   2121 		/* but only mark it once... */
   2122 		/* and only if it wouldn't leave this RAID set
   2123 		   completely broken */
   2124 		if (((queue->raidPtr->Disks[queue->col].status ==
   2125 		      rf_ds_optimal) ||
   2126 		     (queue->raidPtr->Disks[queue->col].status ==
   2127 		      rf_ds_used_spare)) &&
   2128 		     (queue->raidPtr->numFailures <
   2129 		      queue->raidPtr->Layout.map->faultsTolerated)) {
   2130 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   2131 			       queue->raidPtr->raidid,
   2132 			       queue->raidPtr->Disks[queue->col].devname);
   2133 			queue->raidPtr->Disks[queue->col].status =
   2134 			    rf_ds_failed;
   2135 			queue->raidPtr->status = rf_rs_degraded;
   2136 			queue->raidPtr->numFailures++;
   2137 			queue->raidPtr->numNewFailures++;
   2138 		} else {	/* Disk is already dead... */
   2139 			/* printf("Disk already marked as dead!\n"); */
   2140 		}
   2141 
   2142 	}
   2143 
   2144 	/* Fill in the error value */
   2145 
   2146 	req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
   2147 
   2148 	simple_lock(&queue->raidPtr->iodone_lock);
   2149 
   2150 	/* Drop this one on the "finished" queue... */
   2151 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   2152 
   2153 	/* Let the raidio thread know there is work to be done. */
   2154 	wakeup(&(queue->raidPtr->iodone));
   2155 
   2156 	simple_unlock(&queue->raidPtr->iodone_lock);
   2157 
   2158 	splx(s);
   2159 }
   2160 
   2161 
   2162 
   2163 /*
   2164  * initialize a buf structure for doing an I/O in the kernel.
   2165  */
   2166 static void
   2167 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
   2168        RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf,
   2169        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
   2170        struct proc *b_proc)
   2171 {
   2172 	/* bp->b_flags       = B_PHYS | rw_flag; */
   2173 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   2174 	bp->b_bcount = numSect << logBytesPerSector;
   2175 	bp->b_bufsize = bp->b_bcount;
   2176 	bp->b_error = 0;
   2177 	bp->b_dev = dev;
   2178 	bp->b_data = bf;
   2179 	bp->b_blkno = startSect;
   2180 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   2181 	if (bp->b_bcount == 0) {
   2182 		panic("bp->b_bcount is zero in InitBP!!");
   2183 	}
   2184 	bp->b_proc = b_proc;
   2185 	bp->b_iodone = cbFunc;
   2186 	bp->b_private = cbArg;
   2187 	bp->b_vp = b_vp;
   2188 	if ((bp->b_flags & B_READ) == 0) {
   2189 		bp->b_vp->v_numoutput++;
   2190 	}
   2191 
   2192 }
   2193 
   2194 static void
   2195 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
   2196 		    struct disklabel *lp)
   2197 {
   2198 	memset(lp, 0, sizeof(*lp));
   2199 
   2200 	/* fabricate a label... */
   2201 	lp->d_secperunit = raidPtr->totalSectors;
   2202 	lp->d_secsize = raidPtr->bytesPerSector;
   2203 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2204 	lp->d_ntracks = 4 * raidPtr->numCol;
   2205 	lp->d_ncylinders = raidPtr->totalSectors /
   2206 		(lp->d_nsectors * lp->d_ntracks);
   2207 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2208 
   2209 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2210 	lp->d_type = DTYPE_RAID;
   2211 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2212 	lp->d_rpm = 3600;
   2213 	lp->d_interleave = 1;
   2214 	lp->d_flags = 0;
   2215 
   2216 	lp->d_partitions[RAW_PART].p_offset = 0;
   2217 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2218 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2219 	lp->d_npartitions = RAW_PART + 1;
   2220 
   2221 	lp->d_magic = DISKMAGIC;
   2222 	lp->d_magic2 = DISKMAGIC;
   2223 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2224 
   2225 }
   2226 /*
   2227  * Read the disklabel from the raid device.  If one is not present, fake one
   2228  * up.
   2229  */
   2230 static void
   2231 raidgetdisklabel(dev_t dev)
   2232 {
   2233 	int     unit = raidunit(dev);
   2234 	struct raid_softc *rs = &raid_softc[unit];
   2235 	const char   *errstring;
   2236 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2237 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2238 	RF_Raid_t *raidPtr;
   2239 
   2240 	db1_printf(("Getting the disklabel...\n"));
   2241 
   2242 	memset(clp, 0, sizeof(*clp));
   2243 
   2244 	raidPtr = raidPtrs[unit];
   2245 
   2246 	raidgetdefaultlabel(raidPtr, rs, lp);
   2247 
   2248 	/*
   2249 	 * Call the generic disklabel extraction routine.
   2250 	 */
   2251 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2252 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2253 	if (errstring)
   2254 		raidmakedisklabel(rs);
   2255 	else {
   2256 		int     i;
   2257 		struct partition *pp;
   2258 
   2259 		/*
   2260 		 * Sanity check whether the found disklabel is valid.
   2261 		 *
   2262 		 * This is necessary since total size of the raid device
   2263 		 * may vary when an interleave is changed even though exactly
   2264 		 * same components are used, and old disklabel may used
   2265 		 * if that is found.
   2266 		 */
   2267 		if (lp->d_secperunit != rs->sc_size)
   2268 			printf("raid%d: WARNING: %s: "
   2269 			    "total sector size in disklabel (%d) != "
   2270 			    "the size of raid (%ld)\n", unit, rs->sc_xname,
   2271 			    lp->d_secperunit, (long) rs->sc_size);
   2272 		for (i = 0; i < lp->d_npartitions; i++) {
   2273 			pp = &lp->d_partitions[i];
   2274 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2275 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2276 				       "exceeds the size of raid (%ld)\n",
   2277 				       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2278 		}
   2279 	}
   2280 
   2281 }
   2282 /*
   2283  * Take care of things one might want to take care of in the event
   2284  * that a disklabel isn't present.
   2285  */
   2286 static void
   2287 raidmakedisklabel(struct raid_softc *rs)
   2288 {
   2289 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2290 	db1_printf(("Making a label..\n"));
   2291 
   2292 	/*
   2293 	 * For historical reasons, if there's no disklabel present
   2294 	 * the raw partition must be marked FS_BSDFFS.
   2295 	 */
   2296 
   2297 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2298 
   2299 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2300 
   2301 	lp->d_checksum = dkcksum(lp);
   2302 }
   2303 /*
   2304  * Wait interruptibly for an exclusive lock.
   2305  *
   2306  * XXX
   2307  * Several drivers do this; it should be abstracted and made MP-safe.
   2308  * (Hmm... where have we seen this warning before :->  GO )
   2309  */
   2310 static int
   2311 raidlock(struct raid_softc *rs)
   2312 {
   2313 	int     error;
   2314 
   2315 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2316 		rs->sc_flags |= RAIDF_WANTED;
   2317 		if ((error =
   2318 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2319 			return (error);
   2320 	}
   2321 	rs->sc_flags |= RAIDF_LOCKED;
   2322 	return (0);
   2323 }
   2324 /*
   2325  * Unlock and wake up any waiters.
   2326  */
   2327 static void
   2328 raidunlock(struct raid_softc *rs)
   2329 {
   2330 
   2331 	rs->sc_flags &= ~RAIDF_LOCKED;
   2332 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2333 		rs->sc_flags &= ~RAIDF_WANTED;
   2334 		wakeup(rs);
   2335 	}
   2336 }
   2337 
   2338 
   2339 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2340 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2341 
   2342 int
   2343 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2344 {
   2345 	RF_ComponentLabel_t clabel;
   2346 	raidread_component_label(dev, b_vp, &clabel);
   2347 	clabel.mod_counter = mod_counter;
   2348 	clabel.clean = RF_RAID_CLEAN;
   2349 	raidwrite_component_label(dev, b_vp, &clabel);
   2350 	return(0);
   2351 }
   2352 
   2353 
   2354 int
   2355 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2356 {
   2357 	RF_ComponentLabel_t clabel;
   2358 	raidread_component_label(dev, b_vp, &clabel);
   2359 	clabel.mod_counter = mod_counter;
   2360 	clabel.clean = RF_RAID_DIRTY;
   2361 	raidwrite_component_label(dev, b_vp, &clabel);
   2362 	return(0);
   2363 }
   2364 
   2365 /* ARGSUSED */
   2366 int
   2367 raidread_component_label(dev_t dev, struct vnode *b_vp,
   2368 			 RF_ComponentLabel_t *clabel)
   2369 {
   2370 	struct buf *bp;
   2371 	const struct bdevsw *bdev;
   2372 	int error;
   2373 
   2374 	/* XXX should probably ensure that we don't try to do this if
   2375 	   someone has changed rf_protected_sectors. */
   2376 
   2377 	if (b_vp == NULL) {
   2378 		/* For whatever reason, this component is not valid.
   2379 		   Don't try to read a component label from it. */
   2380 		return(EINVAL);
   2381 	}
   2382 
   2383 	/* get a block of the appropriate size... */
   2384 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2385 	bp->b_dev = dev;
   2386 
   2387 	/* get our ducks in a row for the read */
   2388 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2389 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2390 	bp->b_flags |= B_READ;
   2391  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2392 
   2393 	bdev = bdevsw_lookup(bp->b_dev);
   2394 	if (bdev == NULL)
   2395 		return (ENXIO);
   2396 	(*bdev->d_strategy)(bp);
   2397 
   2398 	error = biowait(bp);
   2399 
   2400 	if (!error) {
   2401 		memcpy(clabel, bp->b_data,
   2402 		       sizeof(RF_ComponentLabel_t));
   2403 	}
   2404 
   2405 	brelse(bp);
   2406 	return(error);
   2407 }
   2408 /* ARGSUSED */
   2409 int
   2410 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
   2411 			  RF_ComponentLabel_t *clabel)
   2412 {
   2413 	struct buf *bp;
   2414 	const struct bdevsw *bdev;
   2415 	int error;
   2416 
   2417 	/* get a block of the appropriate size... */
   2418 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2419 	bp->b_dev = dev;
   2420 
   2421 	/* get our ducks in a row for the write */
   2422 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2423 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2424 	bp->b_flags |= B_WRITE;
   2425  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2426 
   2427 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2428 
   2429 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2430 
   2431 	bdev = bdevsw_lookup(bp->b_dev);
   2432 	if (bdev == NULL)
   2433 		return (ENXIO);
   2434 	(*bdev->d_strategy)(bp);
   2435 	error = biowait(bp);
   2436 	brelse(bp);
   2437 	if (error) {
   2438 #if 1
   2439 		printf("Failed to write RAID component info!\n");
   2440 #endif
   2441 	}
   2442 
   2443 	return(error);
   2444 }
   2445 
   2446 void
   2447 rf_markalldirty(RF_Raid_t *raidPtr)
   2448 {
   2449 	RF_ComponentLabel_t clabel;
   2450 	int sparecol;
   2451 	int c;
   2452 	int j;
   2453 	int scol = -1;
   2454 
   2455 	raidPtr->mod_counter++;
   2456 	for (c = 0; c < raidPtr->numCol; c++) {
   2457 		/* we don't want to touch (at all) a disk that has
   2458 		   failed */
   2459 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   2460 			raidread_component_label(
   2461 						 raidPtr->Disks[c].dev,
   2462 						 raidPtr->raid_cinfo[c].ci_vp,
   2463 						 &clabel);
   2464 			if (clabel.status == rf_ds_spared) {
   2465 				/* XXX do something special...
   2466 				   but whatever you do, don't
   2467 				   try to access it!! */
   2468 			} else {
   2469 				raidmarkdirty(
   2470 					      raidPtr->Disks[c].dev,
   2471 					      raidPtr->raid_cinfo[c].ci_vp,
   2472 					      raidPtr->mod_counter);
   2473 			}
   2474 		}
   2475 	}
   2476 
   2477 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2478 		sparecol = raidPtr->numCol + c;
   2479 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2480 			/*
   2481 
   2482 			   we claim this disk is "optimal" if it's
   2483 			   rf_ds_used_spare, as that means it should be
   2484 			   directly substitutable for the disk it replaced.
   2485 			   We note that too...
   2486 
   2487 			 */
   2488 
   2489 			for(j=0;j<raidPtr->numCol;j++) {
   2490 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2491 					scol = j;
   2492 					break;
   2493 				}
   2494 			}
   2495 
   2496 			raidread_component_label(
   2497 				 raidPtr->Disks[sparecol].dev,
   2498 				 raidPtr->raid_cinfo[sparecol].ci_vp,
   2499 				 &clabel);
   2500 			/* make sure status is noted */
   2501 
   2502 			raid_init_component_label(raidPtr, &clabel);
   2503 
   2504 			clabel.row = 0;
   2505 			clabel.column = scol;
   2506 			/* Note: we *don't* change status from rf_ds_used_spare
   2507 			   to rf_ds_optimal */
   2508 			/* clabel.status = rf_ds_optimal; */
   2509 
   2510 			raidmarkdirty(raidPtr->Disks[sparecol].dev,
   2511 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2512 				      raidPtr->mod_counter);
   2513 		}
   2514 	}
   2515 }
   2516 
   2517 
   2518 void
   2519 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
   2520 {
   2521 	RF_ComponentLabel_t clabel;
   2522 	int sparecol;
   2523 	int c;
   2524 	int j;
   2525 	int scol;
   2526 
   2527 	scol = -1;
   2528 
   2529 	/* XXX should do extra checks to make sure things really are clean,
   2530 	   rather than blindly setting the clean bit... */
   2531 
   2532 	raidPtr->mod_counter++;
   2533 
   2534 	for (c = 0; c < raidPtr->numCol; c++) {
   2535 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   2536 			raidread_component_label(
   2537 						 raidPtr->Disks[c].dev,
   2538 						 raidPtr->raid_cinfo[c].ci_vp,
   2539 						 &clabel);
   2540 			/* make sure status is noted */
   2541 			clabel.status = rf_ds_optimal;
   2542 
   2543 			/* bump the counter */
   2544 			clabel.mod_counter = raidPtr->mod_counter;
   2545 
   2546 			/* note what unit we are configured as */
   2547 			clabel.last_unit = raidPtr->raidid;
   2548 
   2549 			raidwrite_component_label(
   2550 						  raidPtr->Disks[c].dev,
   2551 						  raidPtr->raid_cinfo[c].ci_vp,
   2552 						  &clabel);
   2553 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2554 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2555 					raidmarkclean(
   2556 						      raidPtr->Disks[c].dev,
   2557 						      raidPtr->raid_cinfo[c].ci_vp,
   2558 						      raidPtr->mod_counter);
   2559 				}
   2560 			}
   2561 		}
   2562 		/* else we don't touch it.. */
   2563 	}
   2564 
   2565 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2566 		sparecol = raidPtr->numCol + c;
   2567 		/* Need to ensure that the reconstruct actually completed! */
   2568 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2569 			/*
   2570 
   2571 			   we claim this disk is "optimal" if it's
   2572 			   rf_ds_used_spare, as that means it should be
   2573 			   directly substitutable for the disk it replaced.
   2574 			   We note that too...
   2575 
   2576 			 */
   2577 
   2578 			for(j=0;j<raidPtr->numCol;j++) {
   2579 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2580 					scol = j;
   2581 					break;
   2582 				}
   2583 			}
   2584 
   2585 			/* XXX shouldn't *really* need this... */
   2586 			raidread_component_label(
   2587 				      raidPtr->Disks[sparecol].dev,
   2588 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2589 				      &clabel);
   2590 			/* make sure status is noted */
   2591 
   2592 			raid_init_component_label(raidPtr, &clabel);
   2593 
   2594 			clabel.mod_counter = raidPtr->mod_counter;
   2595 			clabel.column = scol;
   2596 			clabel.status = rf_ds_optimal;
   2597 			clabel.last_unit = raidPtr->raidid;
   2598 
   2599 			raidwrite_component_label(
   2600 				      raidPtr->Disks[sparecol].dev,
   2601 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2602 				      &clabel);
   2603 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2604 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2605 					raidmarkclean( raidPtr->Disks[sparecol].dev,
   2606 						       raidPtr->raid_cinfo[sparecol].ci_vp,
   2607 						       raidPtr->mod_counter);
   2608 				}
   2609 			}
   2610 		}
   2611 	}
   2612 }
   2613 
   2614 void
   2615 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
   2616 {
   2617 	struct proc *p;
   2618 	struct lwp *l;
   2619 
   2620 	p = raidPtr->engine_thread;
   2621 	l = LIST_FIRST(&p->p_lwps);
   2622 
   2623 	if (vp != NULL) {
   2624 		if (auto_configured == 1) {
   2625 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2626 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2627 			vput(vp);
   2628 
   2629 		} else {
   2630 			(void) vn_close(vp, FREAD | FWRITE, p->p_cred, l);
   2631 		}
   2632 	}
   2633 }
   2634 
   2635 
   2636 void
   2637 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
   2638 {
   2639 	int r,c;
   2640 	struct vnode *vp;
   2641 	int acd;
   2642 
   2643 
   2644 	/* We take this opportunity to close the vnodes like we should.. */
   2645 
   2646 	for (c = 0; c < raidPtr->numCol; c++) {
   2647 		vp = raidPtr->raid_cinfo[c].ci_vp;
   2648 		acd = raidPtr->Disks[c].auto_configured;
   2649 		rf_close_component(raidPtr, vp, acd);
   2650 		raidPtr->raid_cinfo[c].ci_vp = NULL;
   2651 		raidPtr->Disks[c].auto_configured = 0;
   2652 	}
   2653 
   2654 	for (r = 0; r < raidPtr->numSpare; r++) {
   2655 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
   2656 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
   2657 		rf_close_component(raidPtr, vp, acd);
   2658 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
   2659 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
   2660 	}
   2661 }
   2662 
   2663 
   2664 void
   2665 rf_ReconThread(struct rf_recon_req *req)
   2666 {
   2667 	int     s;
   2668 	RF_Raid_t *raidPtr;
   2669 
   2670 	s = splbio();
   2671 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2672 	raidPtr->recon_in_progress = 1;
   2673 
   2674 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
   2675 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2676 
   2677 	RF_Free(req, sizeof(*req));
   2678 
   2679 	raidPtr->recon_in_progress = 0;
   2680 	splx(s);
   2681 
   2682 	/* That's all... */
   2683 	kthread_exit(0);	/* does not return */
   2684 }
   2685 
   2686 void
   2687 rf_RewriteParityThread(RF_Raid_t *raidPtr)
   2688 {
   2689 	int retcode;
   2690 	int s;
   2691 
   2692 	raidPtr->parity_rewrite_stripes_done = 0;
   2693 	raidPtr->parity_rewrite_in_progress = 1;
   2694 	s = splbio();
   2695 	retcode = rf_RewriteParity(raidPtr);
   2696 	splx(s);
   2697 	if (retcode) {
   2698 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2699 	} else {
   2700 		/* set the clean bit!  If we shutdown correctly,
   2701 		   the clean bit on each component label will get
   2702 		   set */
   2703 		raidPtr->parity_good = RF_RAID_CLEAN;
   2704 	}
   2705 	raidPtr->parity_rewrite_in_progress = 0;
   2706 
   2707 	/* Anyone waiting for us to stop?  If so, inform them... */
   2708 	if (raidPtr->waitShutdown) {
   2709 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2710 	}
   2711 
   2712 	/* That's all... */
   2713 	kthread_exit(0);	/* does not return */
   2714 }
   2715 
   2716 
   2717 void
   2718 rf_CopybackThread(RF_Raid_t *raidPtr)
   2719 {
   2720 	int s;
   2721 
   2722 	raidPtr->copyback_in_progress = 1;
   2723 	s = splbio();
   2724 	rf_CopybackReconstructedData(raidPtr);
   2725 	splx(s);
   2726 	raidPtr->copyback_in_progress = 0;
   2727 
   2728 	/* That's all... */
   2729 	kthread_exit(0);	/* does not return */
   2730 }
   2731 
   2732 
   2733 void
   2734 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
   2735 {
   2736 	int s;
   2737 	RF_Raid_t *raidPtr;
   2738 
   2739 	s = splbio();
   2740 	raidPtr = req->raidPtr;
   2741 	raidPtr->recon_in_progress = 1;
   2742 	rf_ReconstructInPlace(raidPtr, req->col);
   2743 	RF_Free(req, sizeof(*req));
   2744 	raidPtr->recon_in_progress = 0;
   2745 	splx(s);
   2746 
   2747 	/* That's all... */
   2748 	kthread_exit(0);	/* does not return */
   2749 }
   2750 
   2751 static RF_AutoConfig_t *
   2752 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
   2753     const char *cname, RF_SectorCount_t size)
   2754 {
   2755 	int good_one = 0;
   2756 	RF_ComponentLabel_t *clabel;
   2757 	RF_AutoConfig_t *ac;
   2758 
   2759 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
   2760 	if (clabel == NULL) {
   2761 oomem:
   2762 		    while(ac_list) {
   2763 			    ac = ac_list;
   2764 			    if (ac->clabel)
   2765 				    free(ac->clabel, M_RAIDFRAME);
   2766 			    ac_list = ac_list->next;
   2767 			    free(ac, M_RAIDFRAME);
   2768 		    }
   2769 		    printf("RAID auto config: out of memory!\n");
   2770 		    return NULL; /* XXX probably should panic? */
   2771 	}
   2772 
   2773 	if (!raidread_component_label(dev, vp, clabel)) {
   2774 		    /* Got the label.  Does it look reasonable? */
   2775 		    if (rf_reasonable_label(clabel) &&
   2776 			(clabel->partitionSize <= size)) {
   2777 #ifdef DEBUG
   2778 			    printf("Component on: %s: %llu\n",
   2779 				cname, (unsigned long long)size);
   2780 			    rf_print_component_label(clabel);
   2781 #endif
   2782 			    /* if it's reasonable, add it, else ignore it. */
   2783 			    ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
   2784 				M_NOWAIT);
   2785 			    if (ac == NULL) {
   2786 				    free(clabel, M_RAIDFRAME);
   2787 				    goto oomem;
   2788 			    }
   2789 			    strlcpy(ac->devname, cname, sizeof(ac->devname));
   2790 			    ac->dev = dev;
   2791 			    ac->vp = vp;
   2792 			    ac->clabel = clabel;
   2793 			    ac->next = ac_list;
   2794 			    ac_list = ac;
   2795 			    good_one = 1;
   2796 		    }
   2797 	}
   2798 	if (!good_one) {
   2799 		/* cleanup */
   2800 		free(clabel, M_RAIDFRAME);
   2801 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2802 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2803 		vput(vp);
   2804 	}
   2805 	return ac_list;
   2806 }
   2807 
   2808 RF_AutoConfig_t *
   2809 rf_find_raid_components()
   2810 {
   2811 	struct vnode *vp;
   2812 	struct disklabel label;
   2813 	struct device *dv;
   2814 	dev_t dev;
   2815 	int bmajor, bminor, wedge;
   2816 	int error;
   2817 	int i;
   2818 	RF_AutoConfig_t *ac_list;
   2819 
   2820 
   2821 	/* initialize the AutoConfig list */
   2822 	ac_list = NULL;
   2823 
   2824 	/* we begin by trolling through *all* the devices on the system */
   2825 
   2826 	for (dv = alldevs.tqh_first; dv != NULL;
   2827 	     dv = dv->dv_list.tqe_next) {
   2828 
   2829 		/* we are only interested in disks... */
   2830 		if (device_class(dv) != DV_DISK)
   2831 			continue;
   2832 
   2833 		/* we don't care about floppies... */
   2834 		if (device_is_a(dv, "fd")) {
   2835 			continue;
   2836 		}
   2837 
   2838 		/* we don't care about CD's... */
   2839 		if (device_is_a(dv, "cd")) {
   2840 			continue;
   2841 		}
   2842 
   2843 		/* hdfd is the Atari/Hades floppy driver */
   2844 		if (device_is_a(dv, "hdfd")) {
   2845 			continue;
   2846 		}
   2847 
   2848 		/* fdisa is the Atari/Milan floppy driver */
   2849 		if (device_is_a(dv, "fdisa")) {
   2850 			continue;
   2851 		}
   2852 
   2853 		/* need to find the device_name_to_block_device_major stuff */
   2854 		bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
   2855 
   2856 		/* get a vnode for the raw partition of this disk */
   2857 
   2858 		wedge = device_is_a(dv, "dk");
   2859 		bminor = minor(device_unit(dv));
   2860 		dev = wedge ? makedev(bmajor, bminor) :
   2861 		    MAKEDISKDEV(bmajor, bminor, RAW_PART);
   2862 		if (bdevvp(dev, &vp))
   2863 			panic("RAID can't alloc vnode");
   2864 
   2865 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2866 
   2867 		if (error) {
   2868 			/* "Who cares."  Continue looking
   2869 			   for something that exists*/
   2870 			vput(vp);
   2871 			continue;
   2872 		}
   2873 
   2874 		if (wedge) {
   2875 			struct dkwedge_info dkw;
   2876 			error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
   2877 			    NOCRED, 0);
   2878 			if (error) {
   2879 				printf("RAIDframe: can't get wedge info for "
   2880 				    "dev %s (%d)\n", dv->dv_xname, error);
   2881 out:
   2882 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2883 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2884 				vput(vp);
   2885 				continue;
   2886 			}
   2887 
   2888 			if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0)
   2889 				goto out;
   2890 
   2891 			ac_list = rf_get_component(ac_list, dev, vp,
   2892 			    dv->dv_xname, dkw.dkw_size);
   2893 			continue;
   2894 		}
   2895 
   2896 		/* Ok, the disk exists.  Go get the disklabel. */
   2897 		error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
   2898 		if (error) {
   2899 			/*
   2900 			 * XXX can't happen - open() would
   2901 			 * have errored out (or faked up one)
   2902 			 */
   2903 			if (error != ENOTTY)
   2904 				printf("RAIDframe: can't get label for dev "
   2905 				    "%s (%d)\n", dv->dv_xname, error);
   2906 		}
   2907 
   2908 		/* don't need this any more.  We'll allocate it again
   2909 		   a little later if we really do... */
   2910 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2911 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2912 		vput(vp);
   2913 
   2914 		if (error)
   2915 			continue;
   2916 
   2917 		for (i = 0; i < label.d_npartitions; i++) {
   2918 			char cname[sizeof(ac_list->devname)];
   2919 
   2920 			/* We only support partitions marked as RAID */
   2921 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2922 				continue;
   2923 
   2924 			dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
   2925 			if (bdevvp(dev, &vp))
   2926 				panic("RAID can't alloc vnode");
   2927 
   2928 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2929 			if (error) {
   2930 				/* Whatever... */
   2931 				vput(vp);
   2932 				continue;
   2933 			}
   2934 			snprintf(cname, sizeof(cname), "%s%c",
   2935 			    dv->dv_xname, 'a' + i);
   2936 			ac_list = rf_get_component(ac_list, dev, vp, cname,
   2937 				label.d_partitions[i].p_size);
   2938 		}
   2939 	}
   2940 	return ac_list;
   2941 }
   2942 
   2943 
   2944 static int
   2945 rf_reasonable_label(RF_ComponentLabel_t *clabel)
   2946 {
   2947 
   2948 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2949 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2950 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2951 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2952 	    clabel->row >=0 &&
   2953 	    clabel->column >= 0 &&
   2954 	    clabel->num_rows > 0 &&
   2955 	    clabel->num_columns > 0 &&
   2956 	    clabel->row < clabel->num_rows &&
   2957 	    clabel->column < clabel->num_columns &&
   2958 	    clabel->blockSize > 0 &&
   2959 	    clabel->numBlocks > 0) {
   2960 		/* label looks reasonable enough... */
   2961 		return(1);
   2962 	}
   2963 	return(0);
   2964 }
   2965 
   2966 
   2967 #ifdef DEBUG
   2968 void
   2969 rf_print_component_label(RF_ComponentLabel_t *clabel)
   2970 {
   2971 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2972 	       clabel->row, clabel->column,
   2973 	       clabel->num_rows, clabel->num_columns);
   2974 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2975 	       clabel->version, clabel->serial_number,
   2976 	       clabel->mod_counter);
   2977 	printf("   Clean: %s Status: %d\n",
   2978 	       clabel->clean ? "Yes" : "No", clabel->status );
   2979 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2980 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2981 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2982 	       (char) clabel->parityConfig, clabel->blockSize,
   2983 	       clabel->numBlocks);
   2984 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2985 	printf("   Contains root partition: %s\n",
   2986 	       clabel->root_partition ? "Yes" : "No" );
   2987 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2988 #if 0
   2989 	   printf("   Config order: %d\n", clabel->config_order);
   2990 #endif
   2991 
   2992 }
   2993 #endif
   2994 
   2995 RF_ConfigSet_t *
   2996 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
   2997 {
   2998 	RF_AutoConfig_t *ac;
   2999 	RF_ConfigSet_t *config_sets;
   3000 	RF_ConfigSet_t *cset;
   3001 	RF_AutoConfig_t *ac_next;
   3002 
   3003 
   3004 	config_sets = NULL;
   3005 
   3006 	/* Go through the AutoConfig list, and figure out which components
   3007 	   belong to what sets.  */
   3008 	ac = ac_list;
   3009 	while(ac!=NULL) {
   3010 		/* we're going to putz with ac->next, so save it here
   3011 		   for use at the end of the loop */
   3012 		ac_next = ac->next;
   3013 
   3014 		if (config_sets == NULL) {
   3015 			/* will need at least this one... */
   3016 			config_sets = (RF_ConfigSet_t *)
   3017 				malloc(sizeof(RF_ConfigSet_t),
   3018 				       M_RAIDFRAME, M_NOWAIT);
   3019 			if (config_sets == NULL) {
   3020 				panic("rf_create_auto_sets: No memory!");
   3021 			}
   3022 			/* this one is easy :) */
   3023 			config_sets->ac = ac;
   3024 			config_sets->next = NULL;
   3025 			config_sets->rootable = 0;
   3026 			ac->next = NULL;
   3027 		} else {
   3028 			/* which set does this component fit into? */
   3029 			cset = config_sets;
   3030 			while(cset!=NULL) {
   3031 				if (rf_does_it_fit(cset, ac)) {
   3032 					/* looks like it matches... */
   3033 					ac->next = cset->ac;
   3034 					cset->ac = ac;
   3035 					break;
   3036 				}
   3037 				cset = cset->next;
   3038 			}
   3039 			if (cset==NULL) {
   3040 				/* didn't find a match above... new set..*/
   3041 				cset = (RF_ConfigSet_t *)
   3042 					malloc(sizeof(RF_ConfigSet_t),
   3043 					       M_RAIDFRAME, M_NOWAIT);
   3044 				if (cset == NULL) {
   3045 					panic("rf_create_auto_sets: No memory!");
   3046 				}
   3047 				cset->ac = ac;
   3048 				ac->next = NULL;
   3049 				cset->next = config_sets;
   3050 				cset->rootable = 0;
   3051 				config_sets = cset;
   3052 			}
   3053 		}
   3054 		ac = ac_next;
   3055 	}
   3056 
   3057 
   3058 	return(config_sets);
   3059 }
   3060 
   3061 static int
   3062 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
   3063 {
   3064 	RF_ComponentLabel_t *clabel1, *clabel2;
   3065 
   3066 	/* If this one matches the *first* one in the set, that's good
   3067 	   enough, since the other members of the set would have been
   3068 	   through here too... */
   3069 	/* note that we are not checking partitionSize here..
   3070 
   3071 	   Note that we are also not checking the mod_counters here.
   3072 	   If everything else matches execpt the mod_counter, that's
   3073 	   good enough for this test.  We will deal with the mod_counters
   3074 	   a little later in the autoconfiguration process.
   3075 
   3076 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   3077 
   3078 	   The reason we don't check for this is that failed disks
   3079 	   will have lower modification counts.  If those disks are
   3080 	   not added to the set they used to belong to, then they will
   3081 	   form their own set, which may result in 2 different sets,
   3082 	   for example, competing to be configured at raid0, and
   3083 	   perhaps competing to be the root filesystem set.  If the
   3084 	   wrong ones get configured, or both attempt to become /,
   3085 	   weird behaviour and or serious lossage will occur.  Thus we
   3086 	   need to bring them into the fold here, and kick them out at
   3087 	   a later point.
   3088 
   3089 	*/
   3090 
   3091 	clabel1 = cset->ac->clabel;
   3092 	clabel2 = ac->clabel;
   3093 	if ((clabel1->version == clabel2->version) &&
   3094 	    (clabel1->serial_number == clabel2->serial_number) &&
   3095 	    (clabel1->num_rows == clabel2->num_rows) &&
   3096 	    (clabel1->num_columns == clabel2->num_columns) &&
   3097 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   3098 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   3099 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   3100 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   3101 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   3102 	    (clabel1->blockSize == clabel2->blockSize) &&
   3103 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   3104 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   3105 	    (clabel1->root_partition == clabel2->root_partition) &&
   3106 	    (clabel1->last_unit == clabel2->last_unit) &&
   3107 	    (clabel1->config_order == clabel2->config_order)) {
   3108 		/* if it get's here, it almost *has* to be a match */
   3109 	} else {
   3110 		/* it's not consistent with somebody in the set..
   3111 		   punt */
   3112 		return(0);
   3113 	}
   3114 	/* all was fine.. it must fit... */
   3115 	return(1);
   3116 }
   3117 
   3118 int
   3119 rf_have_enough_components(RF_ConfigSet_t *cset)
   3120 {
   3121 	RF_AutoConfig_t *ac;
   3122 	RF_AutoConfig_t *auto_config;
   3123 	RF_ComponentLabel_t *clabel;
   3124 	int c;
   3125 	int num_cols;
   3126 	int num_missing;
   3127 	int mod_counter;
   3128 	int mod_counter_found;
   3129 	int even_pair_failed;
   3130 	char parity_type;
   3131 
   3132 
   3133 	/* check to see that we have enough 'live' components
   3134 	   of this set.  If so, we can configure it if necessary */
   3135 
   3136 	num_cols = cset->ac->clabel->num_columns;
   3137 	parity_type = cset->ac->clabel->parityConfig;
   3138 
   3139 	/* XXX Check for duplicate components!?!?!? */
   3140 
   3141 	/* Determine what the mod_counter is supposed to be for this set. */
   3142 
   3143 	mod_counter_found = 0;
   3144 	mod_counter = 0;
   3145 	ac = cset->ac;
   3146 	while(ac!=NULL) {
   3147 		if (mod_counter_found==0) {
   3148 			mod_counter = ac->clabel->mod_counter;
   3149 			mod_counter_found = 1;
   3150 		} else {
   3151 			if (ac->clabel->mod_counter > mod_counter) {
   3152 				mod_counter = ac->clabel->mod_counter;
   3153 			}
   3154 		}
   3155 		ac = ac->next;
   3156 	}
   3157 
   3158 	num_missing = 0;
   3159 	auto_config = cset->ac;
   3160 
   3161 	even_pair_failed = 0;
   3162 	for(c=0; c<num_cols; c++) {
   3163 		ac = auto_config;
   3164 		while(ac!=NULL) {
   3165 			if ((ac->clabel->column == c) &&
   3166 			    (ac->clabel->mod_counter == mod_counter)) {
   3167 				/* it's this one... */
   3168 #ifdef DEBUG
   3169 				printf("Found: %s at %d\n",
   3170 				       ac->devname,c);
   3171 #endif
   3172 				break;
   3173 			}
   3174 			ac=ac->next;
   3175 		}
   3176 		if (ac==NULL) {
   3177 				/* Didn't find one here! */
   3178 				/* special case for RAID 1, especially
   3179 				   where there are more than 2
   3180 				   components (where RAIDframe treats
   3181 				   things a little differently :( ) */
   3182 			if (parity_type == '1') {
   3183 				if (c%2 == 0) { /* even component */
   3184 					even_pair_failed = 1;
   3185 				} else { /* odd component.  If
   3186 					    we're failed, and
   3187 					    so is the even
   3188 					    component, it's
   3189 					    "Good Night, Charlie" */
   3190 					if (even_pair_failed == 1) {
   3191 						return(0);
   3192 					}
   3193 				}
   3194 			} else {
   3195 				/* normal accounting */
   3196 				num_missing++;
   3197 			}
   3198 		}
   3199 		if ((parity_type == '1') && (c%2 == 1)) {
   3200 				/* Just did an even component, and we didn't
   3201 				   bail.. reset the even_pair_failed flag,
   3202 				   and go on to the next component.... */
   3203 			even_pair_failed = 0;
   3204 		}
   3205 	}
   3206 
   3207 	clabel = cset->ac->clabel;
   3208 
   3209 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3210 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3211 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3212 		/* XXX this needs to be made *much* more general */
   3213 		/* Too many failures */
   3214 		return(0);
   3215 	}
   3216 	/* otherwise, all is well, and we've got enough to take a kick
   3217 	   at autoconfiguring this set */
   3218 	return(1);
   3219 }
   3220 
   3221 void
   3222 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
   3223 			RF_Raid_t *raidPtr)
   3224 {
   3225 	RF_ComponentLabel_t *clabel;
   3226 	int i;
   3227 
   3228 	clabel = ac->clabel;
   3229 
   3230 	/* 1. Fill in the common stuff */
   3231 	config->numRow = clabel->num_rows = 1;
   3232 	config->numCol = clabel->num_columns;
   3233 	config->numSpare = 0; /* XXX should this be set here? */
   3234 	config->sectPerSU = clabel->sectPerSU;
   3235 	config->SUsPerPU = clabel->SUsPerPU;
   3236 	config->SUsPerRU = clabel->SUsPerRU;
   3237 	config->parityConfig = clabel->parityConfig;
   3238 	/* XXX... */
   3239 	strcpy(config->diskQueueType,"fifo");
   3240 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3241 	config->layoutSpecificSize = 0; /* XXX ?? */
   3242 
   3243 	while(ac!=NULL) {
   3244 		/* row/col values will be in range due to the checks
   3245 		   in reasonable_label() */
   3246 		strcpy(config->devnames[0][ac->clabel->column],
   3247 		       ac->devname);
   3248 		ac = ac->next;
   3249 	}
   3250 
   3251 	for(i=0;i<RF_MAXDBGV;i++) {
   3252 		config->debugVars[i][0] = 0;
   3253 	}
   3254 }
   3255 
   3256 int
   3257 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
   3258 {
   3259 	RF_ComponentLabel_t clabel;
   3260 	struct vnode *vp;
   3261 	dev_t dev;
   3262 	int column;
   3263 	int sparecol;
   3264 
   3265 	raidPtr->autoconfigure = new_value;
   3266 
   3267 	for(column=0; column<raidPtr->numCol; column++) {
   3268 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3269 			dev = raidPtr->Disks[column].dev;
   3270 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3271 			raidread_component_label(dev, vp, &clabel);
   3272 			clabel.autoconfigure = new_value;
   3273 			raidwrite_component_label(dev, vp, &clabel);
   3274 		}
   3275 	}
   3276 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3277 		sparecol = raidPtr->numCol + column;
   3278 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3279 			dev = raidPtr->Disks[sparecol].dev;
   3280 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3281 			raidread_component_label(dev, vp, &clabel);
   3282 			clabel.autoconfigure = new_value;
   3283 			raidwrite_component_label(dev, vp, &clabel);
   3284 		}
   3285 	}
   3286 	return(new_value);
   3287 }
   3288 
   3289 int
   3290 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
   3291 {
   3292 	RF_ComponentLabel_t clabel;
   3293 	struct vnode *vp;
   3294 	dev_t dev;
   3295 	int column;
   3296 	int sparecol;
   3297 
   3298 	raidPtr->root_partition = new_value;
   3299 	for(column=0; column<raidPtr->numCol; column++) {
   3300 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3301 			dev = raidPtr->Disks[column].dev;
   3302 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3303 			raidread_component_label(dev, vp, &clabel);
   3304 			clabel.root_partition = new_value;
   3305 			raidwrite_component_label(dev, vp, &clabel);
   3306 		}
   3307 	}
   3308 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3309 		sparecol = raidPtr->numCol + column;
   3310 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3311 			dev = raidPtr->Disks[sparecol].dev;
   3312 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3313 			raidread_component_label(dev, vp, &clabel);
   3314 			clabel.root_partition = new_value;
   3315 			raidwrite_component_label(dev, vp, &clabel);
   3316 		}
   3317 	}
   3318 	return(new_value);
   3319 }
   3320 
   3321 void
   3322 rf_release_all_vps(RF_ConfigSet_t *cset)
   3323 {
   3324 	RF_AutoConfig_t *ac;
   3325 
   3326 	ac = cset->ac;
   3327 	while(ac!=NULL) {
   3328 		/* Close the vp, and give it back */
   3329 		if (ac->vp) {
   3330 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3331 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3332 			vput(ac->vp);
   3333 			ac->vp = NULL;
   3334 		}
   3335 		ac = ac->next;
   3336 	}
   3337 }
   3338 
   3339 
   3340 void
   3341 rf_cleanup_config_set(RF_ConfigSet_t *cset)
   3342 {
   3343 	RF_AutoConfig_t *ac;
   3344 	RF_AutoConfig_t *next_ac;
   3345 
   3346 	ac = cset->ac;
   3347 	while(ac!=NULL) {
   3348 		next_ac = ac->next;
   3349 		/* nuke the label */
   3350 		free(ac->clabel, M_RAIDFRAME);
   3351 		/* cleanup the config structure */
   3352 		free(ac, M_RAIDFRAME);
   3353 		/* "next.." */
   3354 		ac = next_ac;
   3355 	}
   3356 	/* and, finally, nuke the config set */
   3357 	free(cset, M_RAIDFRAME);
   3358 }
   3359 
   3360 
   3361 void
   3362 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   3363 {
   3364 	/* current version number */
   3365 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3366 	clabel->serial_number = raidPtr->serial_number;
   3367 	clabel->mod_counter = raidPtr->mod_counter;
   3368 	clabel->num_rows = 1;
   3369 	clabel->num_columns = raidPtr->numCol;
   3370 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3371 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3372 
   3373 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3374 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3375 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3376 
   3377 	clabel->blockSize = raidPtr->bytesPerSector;
   3378 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3379 
   3380 	/* XXX not portable */
   3381 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3382 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3383 	clabel->autoconfigure = raidPtr->autoconfigure;
   3384 	clabel->root_partition = raidPtr->root_partition;
   3385 	clabel->last_unit = raidPtr->raidid;
   3386 	clabel->config_order = raidPtr->config_order;
   3387 }
   3388 
   3389 int
   3390 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
   3391 {
   3392 	RF_Raid_t *raidPtr;
   3393 	RF_Config_t *config;
   3394 	int raidID;
   3395 	int retcode;
   3396 
   3397 #ifdef DEBUG
   3398 	printf("RAID autoconfigure\n");
   3399 #endif
   3400 
   3401 	retcode = 0;
   3402 	*unit = -1;
   3403 
   3404 	/* 1. Create a config structure */
   3405 
   3406 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3407 				       M_RAIDFRAME,
   3408 				       M_NOWAIT);
   3409 	if (config==NULL) {
   3410 		printf("Out of mem!?!?\n");
   3411 				/* XXX do something more intelligent here. */
   3412 		return(1);
   3413 	}
   3414 
   3415 	memset(config, 0, sizeof(RF_Config_t));
   3416 
   3417 	/*
   3418 	   2. Figure out what RAID ID this one is supposed to live at
   3419 	   See if we can get the same RAID dev that it was configured
   3420 	   on last time..
   3421 	*/
   3422 
   3423 	raidID = cset->ac->clabel->last_unit;
   3424 	if ((raidID < 0) || (raidID >= numraid)) {
   3425 		/* let's not wander off into lala land. */
   3426 		raidID = numraid - 1;
   3427 	}
   3428 	if (raidPtrs[raidID]->valid != 0) {
   3429 
   3430 		/*
   3431 		   Nope... Go looking for an alternative...
   3432 		   Start high so we don't immediately use raid0 if that's
   3433 		   not taken.
   3434 		*/
   3435 
   3436 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3437 			if (raidPtrs[raidID]->valid == 0) {
   3438 				/* can use this one! */
   3439 				break;
   3440 			}
   3441 		}
   3442 	}
   3443 
   3444 	if (raidID < 0) {
   3445 		/* punt... */
   3446 		printf("Unable to auto configure this set!\n");
   3447 		printf("(Out of RAID devs!)\n");
   3448 		free(config, M_RAIDFRAME);
   3449 		return(1);
   3450 	}
   3451 
   3452 #ifdef DEBUG
   3453 	printf("Configuring raid%d:\n",raidID);
   3454 #endif
   3455 
   3456 	raidPtr = raidPtrs[raidID];
   3457 
   3458 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3459 	raidPtr->raidid = raidID;
   3460 	raidPtr->openings = RAIDOUTSTANDING;
   3461 
   3462 	/* 3. Build the configuration structure */
   3463 	rf_create_configuration(cset->ac, config, raidPtr);
   3464 
   3465 	/* 4. Do the configuration */
   3466 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3467 
   3468 	if (retcode == 0) {
   3469 
   3470 		raidinit(raidPtrs[raidID]);
   3471 
   3472 		rf_markalldirty(raidPtrs[raidID]);
   3473 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3474 		if (cset->ac->clabel->root_partition==1) {
   3475 			/* everything configured just fine.  Make a note
   3476 			   that this set is eligible to be root. */
   3477 			cset->rootable = 1;
   3478 			/* XXX do this here? */
   3479 			raidPtrs[raidID]->root_partition = 1;
   3480 		}
   3481 	}
   3482 
   3483 	/* 5. Cleanup */
   3484 	free(config, M_RAIDFRAME);
   3485 
   3486 	*unit = raidID;
   3487 	return(retcode);
   3488 }
   3489 
   3490 void
   3491 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
   3492 {
   3493 	struct buf *bp;
   3494 
   3495 	bp = (struct buf *)desc->bp;
   3496 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3497 	    (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
   3498 }
   3499 
   3500 void
   3501 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
   3502 	     size_t xmin, size_t xmax)
   3503 {
   3504 	pool_init(p, size, 0, 0, 0, w_chan, NULL);
   3505 	pool_sethiwat(p, xmax);
   3506 	pool_prime(p, xmin);
   3507 	pool_setlowat(p, xmin);
   3508 }
   3509 
   3510 /*
   3511  * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
   3512  * if there is IO pending and if that IO could possibly be done for a
   3513  * given RAID set.  Returns 0 if IO is waiting and can be done, 1
   3514  * otherwise.
   3515  *
   3516  */
   3517 
   3518 int
   3519 rf_buf_queue_check(int raidid)
   3520 {
   3521 	if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
   3522 	    raidPtrs[raidid]->openings > 0) {
   3523 		/* there is work to do */
   3524 		return 0;
   3525 	}
   3526 	/* default is nothing to do */
   3527 	return 1;
   3528 }
   3529 
   3530 int
   3531 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
   3532 {
   3533 	struct partinfo dpart;
   3534 	struct dkwedge_info dkw;
   3535 	int error;
   3536 
   3537 	error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred, l);
   3538 	if (error == 0) {
   3539 		diskPtr->blockSize = dpart.disklab->d_secsize;
   3540 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
   3541 		diskPtr->partitionSize = dpart.part->p_size;
   3542 		return 0;
   3543 	}
   3544 
   3545 	error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred, l);
   3546 	if (error == 0) {
   3547 		diskPtr->blockSize = 512;	/* XXX */
   3548 		diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
   3549 		diskPtr->partitionSize = dkw.dkw_size;
   3550 		return 0;
   3551 	}
   3552 	return error;
   3553 }
   3554 
   3555 static int
   3556 raid_match(struct device *self, struct cfdata *cfdata,
   3557     void *aux)
   3558 {
   3559 	return 1;
   3560 }
   3561 
   3562 static void
   3563 raid_attach(struct device *parent, struct device *self,
   3564     void *aux)
   3565 {
   3566 
   3567 }
   3568 
   3569 
   3570 static int
   3571 raid_detach(struct device *self, int flags)
   3572 {
   3573 	struct raid_softc *rs = (struct raid_softc *)self;
   3574 
   3575 	if (rs->sc_flags & RAIDF_INITED)
   3576 		return EBUSY;
   3577 
   3578 	return 0;
   3579 }
   3580 
   3581 
   3582