Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.230.4.5
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.230.4.5 2007/11/14 19:04:35 joerg Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1990, 1993
     40  *      The Regents of the University of California.  All rights reserved.
     41  *
     42  * This code is derived from software contributed to Berkeley by
     43  * the Systems Programming Group of the University of Utah Computer
     44  * Science Department.
     45  *
     46  * Redistribution and use in source and binary forms, with or without
     47  * modification, are permitted provided that the following conditions
     48  * are met:
     49  * 1. Redistributions of source code must retain the above copyright
     50  *    notice, this list of conditions and the following disclaimer.
     51  * 2. Redistributions in binary form must reproduce the above copyright
     52  *    notice, this list of conditions and the following disclaimer in the
     53  *    documentation and/or other materials provided with the distribution.
     54  * 3. Neither the name of the University nor the names of its contributors
     55  *    may be used to endorse or promote products derived from this software
     56  *    without specific prior written permission.
     57  *
     58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     68  * SUCH DAMAGE.
     69  *
     70  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     71  *
     72  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     73  */
     74 
     75 /*
     76  * Copyright (c) 1988 University of Utah.
     77  *
     78  * This code is derived from software contributed to Berkeley by
     79  * the Systems Programming Group of the University of Utah Computer
     80  * Science Department.
     81  *
     82  * Redistribution and use in source and binary forms, with or without
     83  * modification, are permitted provided that the following conditions
     84  * are met:
     85  * 1. Redistributions of source code must retain the above copyright
     86  *    notice, this list of conditions and the following disclaimer.
     87  * 2. Redistributions in binary form must reproduce the above copyright
     88  *    notice, this list of conditions and the following disclaimer in the
     89  *    documentation and/or other materials provided with the distribution.
     90  * 3. All advertising materials mentioning features or use of this software
     91  *    must display the following acknowledgement:
     92  *      This product includes software developed by the University of
     93  *      California, Berkeley and its contributors.
     94  * 4. Neither the name of the University nor the names of its contributors
     95  *    may be used to endorse or promote products derived from this software
     96  *    without specific prior written permission.
     97  *
     98  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     99  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    100  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    101  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
    102  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    103  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    104  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    105  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    106  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    107  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    108  * SUCH DAMAGE.
    109  *
    110  * from: Utah $Hdr: cd.c 1.6 90/11/28$
    111  *
    112  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
    113  */
    114 
    115 /*
    116  * Copyright (c) 1995 Carnegie-Mellon University.
    117  * All rights reserved.
    118  *
    119  * Authors: Mark Holland, Jim Zelenka
    120  *
    121  * Permission to use, copy, modify and distribute this software and
    122  * its documentation is hereby granted, provided that both the copyright
    123  * notice and this permission notice appear in all copies of the
    124  * software, derivative works or modified versions, and any portions
    125  * thereof, and that both notices appear in supporting documentation.
    126  *
    127  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
    128  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
    129  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
    130  *
    131  * Carnegie Mellon requests users of this software to return to
    132  *
    133  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    134  *  School of Computer Science
    135  *  Carnegie Mellon University
    136  *  Pittsburgh PA 15213-3890
    137  *
    138  * any improvements or extensions that they make and grant Carnegie the
    139  * rights to redistribute these changes.
    140  */
    141 
    142 /***********************************************************
    143  *
    144  * rf_kintf.c -- the kernel interface routines for RAIDframe
    145  *
    146  ***********************************************************/
    147 
    148 #include <sys/cdefs.h>
    149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.230.4.5 2007/11/14 19:04:35 joerg Exp $");
    150 
    151 #include <sys/param.h>
    152 #include <sys/errno.h>
    153 #include <sys/pool.h>
    154 #include <sys/proc.h>
    155 #include <sys/queue.h>
    156 #include <sys/disk.h>
    157 #include <sys/device.h>
    158 #include <sys/stat.h>
    159 #include <sys/ioctl.h>
    160 #include <sys/fcntl.h>
    161 #include <sys/systm.h>
    162 #include <sys/vnode.h>
    163 #include <sys/disklabel.h>
    164 #include <sys/conf.h>
    165 #include <sys/lock.h>
    166 #include <sys/buf.h>
    167 #include <sys/bufq.h>
    168 #include <sys/user.h>
    169 #include <sys/reboot.h>
    170 #include <sys/kauth.h>
    171 
    172 #include <prop/proplib.h>
    173 
    174 #include <dev/raidframe/raidframevar.h>
    175 #include <dev/raidframe/raidframeio.h>
    176 #include "raid.h"
    177 #include "opt_raid_autoconfig.h"
    178 #include "rf_raid.h"
    179 #include "rf_copyback.h"
    180 #include "rf_dag.h"
    181 #include "rf_dagflags.h"
    182 #include "rf_desc.h"
    183 #include "rf_diskqueue.h"
    184 #include "rf_etimer.h"
    185 #include "rf_general.h"
    186 #include "rf_kintf.h"
    187 #include "rf_options.h"
    188 #include "rf_driver.h"
    189 #include "rf_parityscan.h"
    190 #include "rf_threadstuff.h"
    191 
    192 #ifdef DEBUG
    193 int     rf_kdebug_level = 0;
    194 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    195 #else				/* DEBUG */
    196 #define db1_printf(a) { }
    197 #endif				/* DEBUG */
    198 
    199 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    200 
    201 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    202 
    203 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    204 						 * spare table */
    205 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    206 						 * installation process */
    207 
    208 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
    209 
    210 /* prototypes */
    211 static void KernelWakeupFunc(struct buf *);
    212 static void InitBP(struct buf *, struct vnode *, unsigned,
    213     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
    214     void *, int, struct proc *);
    215 static void raidinit(RF_Raid_t *);
    216 
    217 void raidattach(int);
    218 static int raid_match(struct device *, struct cfdata *, void *);
    219 static void raid_attach(struct device *, struct device *, void *);
    220 static int raid_detach(struct device *, int);
    221 
    222 dev_type_open(raidopen);
    223 dev_type_close(raidclose);
    224 dev_type_read(raidread);
    225 dev_type_write(raidwrite);
    226 dev_type_ioctl(raidioctl);
    227 dev_type_strategy(raidstrategy);
    228 dev_type_dump(raiddump);
    229 dev_type_size(raidsize);
    230 
    231 const struct bdevsw raid_bdevsw = {
    232 	raidopen, raidclose, raidstrategy, raidioctl,
    233 	raiddump, raidsize, D_DISK
    234 };
    235 
    236 const struct cdevsw raid_cdevsw = {
    237 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    238 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
    239 };
    240 
    241 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
    242 
    243 /* XXX Not sure if the following should be replacing the raidPtrs above,
    244    or if it should be used in conjunction with that...
    245 */
    246 
    247 struct raid_softc {
    248 	struct device *sc_dev;
    249 	int     sc_flags;	/* flags */
    250 	int     sc_cflags;	/* configuration flags */
    251 	uint64_t sc_size;	/* size of the raid device */
    252 	char    sc_xname[20];	/* XXX external name */
    253 	struct disk sc_dkdev;	/* generic disk device info */
    254 	struct bufq_state *buf_queue;	/* used for the device queue */
    255 };
    256 /* sc_flags */
    257 #define RAIDF_INITED	0x01	/* unit has been initialized */
    258 #define RAIDF_WLABEL	0x02	/* label area is writable */
    259 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    260 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    261 #define RAIDF_LOCKED	0x80	/* unit is locked */
    262 
    263 #define	raidunit(x)	DISKUNIT(x)
    264 int numraid = 0;
    265 
    266 extern struct cfdriver raid_cd;
    267 CFATTACH_DECL(raid, sizeof(struct raid_softc),
    268     raid_match, raid_attach, raid_detach, NULL);
    269 
    270 /*
    271  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    272  * Be aware that large numbers can allow the driver to consume a lot of
    273  * kernel memory, especially on writes, and in degraded mode reads.
    274  *
    275  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    276  * a single 64K write will typically require 64K for the old data,
    277  * 64K for the old parity, and 64K for the new parity, for a total
    278  * of 192K (if the parity buffer is not re-used immediately).
    279  * Even it if is used immediately, that's still 128K, which when multiplied
    280  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    281  *
    282  * Now in degraded mode, for example, a 64K read on the above setup may
    283  * require data reconstruction, which will require *all* of the 4 remaining
    284  * disks to participate -- 4 * 32K/disk == 128K again.
    285  */
    286 
    287 #ifndef RAIDOUTSTANDING
    288 #define RAIDOUTSTANDING   6
    289 #endif
    290 
    291 #define RAIDLABELDEV(dev)	\
    292 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    293 
    294 /* declared here, and made public, for the benefit of KVM stuff.. */
    295 struct raid_softc *raid_softc;
    296 
    297 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    298 				     struct disklabel *);
    299 static void raidgetdisklabel(dev_t);
    300 static void raidmakedisklabel(struct raid_softc *);
    301 
    302 static int raidlock(struct raid_softc *);
    303 static void raidunlock(struct raid_softc *);
    304 
    305 static void rf_markalldirty(RF_Raid_t *);
    306 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
    307 
    308 void rf_ReconThread(struct rf_recon_req *);
    309 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    310 void rf_CopybackThread(RF_Raid_t *raidPtr);
    311 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    312 int rf_autoconfig(struct device *self);
    313 void rf_buildroothack(RF_ConfigSet_t *);
    314 
    315 RF_AutoConfig_t *rf_find_raid_components(void);
    316 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    317 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    318 static int rf_reasonable_label(RF_ComponentLabel_t *);
    319 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    320 int rf_set_autoconfig(RF_Raid_t *, int);
    321 int rf_set_rootpartition(RF_Raid_t *, int);
    322 void rf_release_all_vps(RF_ConfigSet_t *);
    323 void rf_cleanup_config_set(RF_ConfigSet_t *);
    324 int rf_have_enough_components(RF_ConfigSet_t *);
    325 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    326 
    327 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    328 				  allow autoconfig to take place.
    329 				  Note that this is overridden by having
    330 				  RAID_AUTOCONFIG as an option in the
    331 				  kernel config file.  */
    332 
    333 struct RF_Pools_s rf_pools;
    334 
    335 void
    336 raidattach(int num)
    337 {
    338 	int raidID;
    339 	int i, rc;
    340 
    341 #ifdef DEBUG
    342 	printf("raidattach: Asked for %d units\n", num);
    343 #endif
    344 
    345 	if (num <= 0) {
    346 #ifdef DIAGNOSTIC
    347 		panic("raidattach: count <= 0");
    348 #endif
    349 		return;
    350 	}
    351 	/* This is where all the initialization stuff gets done. */
    352 
    353 	numraid = num;
    354 
    355 	/* Make some space for requested number of units... */
    356 
    357 	RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
    358 	if (raidPtrs == NULL) {
    359 		panic("raidPtrs is NULL!!");
    360 	}
    361 
    362 	rf_mutex_init(&rf_sparet_wait_mutex);
    363 
    364 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    365 
    366 	for (i = 0; i < num; i++)
    367 		raidPtrs[i] = NULL;
    368 	rc = rf_BootRaidframe();
    369 	if (rc == 0)
    370 		printf("Kernelized RAIDframe activated\n");
    371 	else
    372 		panic("Serious error booting RAID!!");
    373 
    374 	/* put together some datastructures like the CCD device does.. This
    375 	 * lets us lock the device and what-not when it gets opened. */
    376 
    377 	raid_softc = (struct raid_softc *)
    378 		malloc(num * sizeof(struct raid_softc),
    379 		       M_RAIDFRAME, M_NOWAIT);
    380 	if (raid_softc == NULL) {
    381 		printf("WARNING: no memory for RAIDframe driver\n");
    382 		return;
    383 	}
    384 
    385 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    386 
    387 	for (raidID = 0; raidID < num; raidID++) {
    388 		bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
    389 
    390 		RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
    391 			  (RF_Raid_t *));
    392 		if (raidPtrs[raidID] == NULL) {
    393 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    394 			numraid = raidID;
    395 			return;
    396 		}
    397 	}
    398 
    399 	if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
    400 		printf("config_cfattach_attach failed?\n");
    401 	}
    402 
    403 #ifdef RAID_AUTOCONFIG
    404 	raidautoconfig = 1;
    405 #endif
    406 
    407 	/*
    408 	 * Register a finalizer which will be used to auto-config RAID
    409 	 * sets once all real hardware devices have been found.
    410 	 */
    411 	if (config_finalize_register(NULL, rf_autoconfig) != 0)
    412 		printf("WARNING: unable to register RAIDframe finalizer\n");
    413 }
    414 
    415 int
    416 rf_autoconfig(struct device *self)
    417 {
    418 	RF_AutoConfig_t *ac_list;
    419 	RF_ConfigSet_t *config_sets;
    420 
    421 	if (raidautoconfig == 0)
    422 		return (0);
    423 
    424 	/* XXX This code can only be run once. */
    425 	raidautoconfig = 0;
    426 
    427 	/* 1. locate all RAID components on the system */
    428 #ifdef DEBUG
    429 	printf("Searching for RAID components...\n");
    430 #endif
    431 	ac_list = rf_find_raid_components();
    432 
    433 	/* 2. Sort them into their respective sets. */
    434 	config_sets = rf_create_auto_sets(ac_list);
    435 
    436 	/*
    437 	 * 3. Evaluate each set andconfigure the valid ones.
    438 	 * This gets done in rf_buildroothack().
    439 	 */
    440 	rf_buildroothack(config_sets);
    441 
    442 	return 1;
    443 }
    444 
    445 void
    446 rf_buildroothack(RF_ConfigSet_t *config_sets)
    447 {
    448 	RF_ConfigSet_t *cset;
    449 	RF_ConfigSet_t *next_cset;
    450 	int retcode;
    451 	int raidID;
    452 	int rootID;
    453 	int col;
    454 	int num_root;
    455 	char *devname;
    456 
    457 	rootID = 0;
    458 	num_root = 0;
    459 	cset = config_sets;
    460 	while(cset != NULL ) {
    461 		next_cset = cset->next;
    462 		if (rf_have_enough_components(cset) &&
    463 		    cset->ac->clabel->autoconfigure==1) {
    464 			retcode = rf_auto_config_set(cset,&raidID);
    465 			if (!retcode) {
    466 #ifdef DEBUG
    467 				printf("raid%d: configured ok\n", raidID);
    468 #endif
    469 				if (cset->rootable) {
    470 					rootID = raidID;
    471 					num_root++;
    472 				}
    473 			} else {
    474 				/* The autoconfig didn't work :( */
    475 #ifdef DEBUG
    476 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    477 #endif
    478 				rf_release_all_vps(cset);
    479 			}
    480 		} else {
    481 #ifdef DEBUG
    482 			printf("raid%d: not enough components\n", raidID);
    483 #endif
    484 			/* we're not autoconfiguring this set...
    485 			   release the associated resources */
    486 			rf_release_all_vps(cset);
    487 		}
    488 		/* cleanup */
    489 		rf_cleanup_config_set(cset);
    490 		cset = next_cset;
    491 	}
    492 
    493 	/* if the user has specified what the root device should be
    494 	   then we don't touch booted_device or boothowto... */
    495 
    496 	if (rootspec != NULL)
    497 		return;
    498 
    499 	/* we found something bootable... */
    500 
    501 	if (num_root == 1) {
    502 		booted_device = raid_softc[rootID].sc_dev;
    503 	} else if (num_root > 1) {
    504 
    505 		/*
    506 		 * Maybe the MD code can help. If it cannot, then
    507 		 * setroot() will discover that we have no
    508 		 * booted_device and will ask the user if nothing was
    509 		 * hardwired in the kernel config file
    510 		 */
    511 
    512 		if (booted_device == NULL)
    513 			cpu_rootconf();
    514 		if (booted_device == NULL)
    515 			return;
    516 
    517 		num_root = 0;
    518 		for (raidID = 0; raidID < numraid; raidID++) {
    519 			if (raidPtrs[raidID]->valid == 0)
    520 				continue;
    521 
    522 			if (raidPtrs[raidID]->root_partition == 0)
    523 				continue;
    524 
    525 			for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
    526 				devname = raidPtrs[raidID]->Disks[col].devname;
    527 				devname += sizeof("/dev/") - 1;
    528 				if (strncmp(devname, booted_device->dv_xname,
    529 					    strlen(booted_device->dv_xname)) != 0)
    530 					continue;
    531 #ifdef DEBUG
    532 				printf("raid%d includes boot device %s\n",
    533 				       raidID, devname);
    534 #endif
    535 				num_root++;
    536 				rootID = raidID;
    537 			}
    538 		}
    539 
    540 		if (num_root == 1) {
    541 			booted_device = raid_softc[rootID].sc_dev;
    542 		} else {
    543 			/* we can't guess.. require the user to answer... */
    544 			boothowto |= RB_ASKNAME;
    545 		}
    546 	}
    547 }
    548 
    549 
    550 int
    551 raidsize(dev_t dev)
    552 {
    553 	struct raid_softc *rs;
    554 	struct disklabel *lp;
    555 	int     part, unit, omask, size;
    556 
    557 	unit = raidunit(dev);
    558 	if (unit >= numraid)
    559 		return (-1);
    560 	rs = &raid_softc[unit];
    561 
    562 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    563 		return (-1);
    564 
    565 	part = DISKPART(dev);
    566 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    567 	lp = rs->sc_dkdev.dk_label;
    568 
    569 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
    570 		return (-1);
    571 
    572 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    573 		size = -1;
    574 	else
    575 		size = lp->d_partitions[part].p_size *
    576 		    (lp->d_secsize / DEV_BSIZE);
    577 
    578 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
    579 		return (-1);
    580 
    581 	return (size);
    582 
    583 }
    584 
    585 int
    586 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
    587 {
    588 	int     unit = raidunit(dev);
    589 	struct raid_softc *rs;
    590 	const struct bdevsw *bdev;
    591 	struct disklabel *lp;
    592 	RF_Raid_t *raidPtr;
    593 	daddr_t offset;
    594 	int     part, c, sparecol, j, scol, dumpto;
    595 	int     error = 0;
    596 
    597 	if (unit >= numraid)
    598 		return (ENXIO);
    599 
    600 	rs = &raid_softc[unit];
    601 	raidPtr = raidPtrs[unit];
    602 
    603 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    604 		return ENXIO;
    605 
    606 	/* we only support dumping to RAID 1 sets */
    607 	if (raidPtr->Layout.numDataCol != 1 ||
    608 	    raidPtr->Layout.numParityCol != 1)
    609 		return EINVAL;
    610 
    611 
    612 	if ((error = raidlock(rs)) != 0)
    613 		return error;
    614 
    615 	if (size % DEV_BSIZE != 0) {
    616 		error = EINVAL;
    617 		goto out;
    618 	}
    619 
    620 	if (blkno + size / DEV_BSIZE > rs->sc_size) {
    621 		printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
    622 		    "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
    623 		    size / DEV_BSIZE, rs->sc_size);
    624 		error = EINVAL;
    625 		goto out;
    626 	}
    627 
    628 	part = DISKPART(dev);
    629 	lp = rs->sc_dkdev.dk_label;
    630 	offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
    631 
    632 	/* figure out what device is alive.. */
    633 
    634 	/*
    635 	   Look for a component to dump to.  The preference for the
    636 	   component to dump to is as follows:
    637 	   1) the master
    638 	   2) a used_spare of the master
    639 	   3) the slave
    640 	   4) a used_spare of the slave
    641 	*/
    642 
    643 	dumpto = -1;
    644 	for (c = 0; c < raidPtr->numCol; c++) {
    645 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
    646 			/* this might be the one */
    647 			dumpto = c;
    648 			break;
    649 		}
    650 	}
    651 
    652 	/*
    653 	   At this point we have possibly selected a live master or a
    654 	   live slave.  We now check to see if there is a spared
    655 	   master (or a spared slave), if we didn't find a live master
    656 	   or a live slave.
    657 	*/
    658 
    659 	for (c = 0; c < raidPtr->numSpare; c++) {
    660 		sparecol = raidPtr->numCol + c;
    661 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
    662 			/* How about this one? */
    663 			scol = -1;
    664 			for(j=0;j<raidPtr->numCol;j++) {
    665 				if (raidPtr->Disks[j].spareCol == sparecol) {
    666 					scol = j;
    667 					break;
    668 				}
    669 			}
    670 			if (scol == 0) {
    671 				/*
    672 				   We must have found a spared master!
    673 				   We'll take that over anything else
    674 				   found so far.  (We couldn't have
    675 				   found a real master before, since
    676 				   this is a used spare, and it's
    677 				   saying that it's replacing the
    678 				   master.)  On reboot (with
    679 				   autoconfiguration turned on)
    680 				   sparecol will become the 1st
    681 				   component (component0) of this set.
    682 				*/
    683 				dumpto = sparecol;
    684 				break;
    685 			} else if (scol != -1) {
    686 				/*
    687 				   Must be a spared slave.  We'll dump
    688 				   to that if we havn't found anything
    689 				   else so far.
    690 				*/
    691 				if (dumpto == -1)
    692 					dumpto = sparecol;
    693 			}
    694 		}
    695 	}
    696 
    697 	if (dumpto == -1) {
    698 		/* we couldn't find any live components to dump to!?!?
    699 		 */
    700 		error = EINVAL;
    701 		goto out;
    702 	}
    703 
    704 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
    705 
    706 	/*
    707 	   Note that blkno is relative to this particular partition.
    708 	   By adding the offset of this partition in the RAID
    709 	   set, and also adding RF_PROTECTED_SECTORS, we get a
    710 	   value that is relative to the partition used for the
    711 	   underlying component.
    712 	*/
    713 
    714 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
    715 				blkno + offset, va, size);
    716 
    717 out:
    718 	raidunlock(rs);
    719 
    720 	return error;
    721 }
    722 /* ARGSUSED */
    723 int
    724 raidopen(dev_t dev, int flags, int fmt,
    725     struct lwp *l)
    726 {
    727 	int     unit = raidunit(dev);
    728 	struct raid_softc *rs;
    729 	struct disklabel *lp;
    730 	int     part, pmask;
    731 	int     error = 0;
    732 
    733 	if (unit >= numraid)
    734 		return (ENXIO);
    735 	rs = &raid_softc[unit];
    736 
    737 	if ((error = raidlock(rs)) != 0)
    738 		return (error);
    739 	lp = rs->sc_dkdev.dk_label;
    740 
    741 	part = DISKPART(dev);
    742 
    743 	/*
    744 	 * If there are wedges, and this is not RAW_PART, then we
    745 	 * need to fail.
    746 	 */
    747 	if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
    748 		error = EBUSY;
    749 		goto bad;
    750 	}
    751 	pmask = (1 << part);
    752 
    753 	if ((rs->sc_flags & RAIDF_INITED) &&
    754 	    (rs->sc_dkdev.dk_openmask == 0))
    755 		raidgetdisklabel(dev);
    756 
    757 	/* make sure that this partition exists */
    758 
    759 	if (part != RAW_PART) {
    760 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    761 		    ((part >= lp->d_npartitions) ||
    762 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    763 			error = ENXIO;
    764 			goto bad;
    765 		}
    766 	}
    767 	/* Prevent this unit from being unconfigured while open. */
    768 	switch (fmt) {
    769 	case S_IFCHR:
    770 		rs->sc_dkdev.dk_copenmask |= pmask;
    771 		break;
    772 
    773 	case S_IFBLK:
    774 		rs->sc_dkdev.dk_bopenmask |= pmask;
    775 		break;
    776 	}
    777 
    778 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    779 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    780 		/* First one... mark things as dirty... Note that we *MUST*
    781 		 have done a configure before this.  I DO NOT WANT TO BE
    782 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    783 		 THAT THEY BELONG TOGETHER!!!!! */
    784 		/* XXX should check to see if we're only open for reading
    785 		   here... If so, we needn't do this, but then need some
    786 		   other way of keeping track of what's happened.. */
    787 
    788 		rf_markalldirty( raidPtrs[unit] );
    789 	}
    790 
    791 
    792 	rs->sc_dkdev.dk_openmask =
    793 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    794 
    795 bad:
    796 	raidunlock(rs);
    797 
    798 	return (error);
    799 
    800 
    801 }
    802 /* ARGSUSED */
    803 int
    804 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
    805 {
    806 	int     unit = raidunit(dev);
    807 	struct cfdata *cf;
    808 	struct raid_softc *rs;
    809 	int     error = 0;
    810 	int     part;
    811 
    812 	if (unit >= numraid)
    813 		return (ENXIO);
    814 	rs = &raid_softc[unit];
    815 
    816 	if ((error = raidlock(rs)) != 0)
    817 		return (error);
    818 
    819 	part = DISKPART(dev);
    820 
    821 	/* ...that much closer to allowing unconfiguration... */
    822 	switch (fmt) {
    823 	case S_IFCHR:
    824 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    825 		break;
    826 
    827 	case S_IFBLK:
    828 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    829 		break;
    830 	}
    831 	rs->sc_dkdev.dk_openmask =
    832 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    833 
    834 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    835 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    836 		/* Last one... device is not unconfigured yet.
    837 		   Device shutdown has taken care of setting the
    838 		   clean bits if RAIDF_INITED is not set
    839 		   mark things as clean... */
    840 
    841 		rf_update_component_labels(raidPtrs[unit],
    842 						 RF_FINAL_COMPONENT_UPDATE);
    843 		if (doing_shutdown) {
    844 			/* last one, and we're going down, so
    845 			   lights out for this RAID set too. */
    846 			error = rf_Shutdown(raidPtrs[unit]);
    847 
    848 			/* It's no longer initialized... */
    849 			rs->sc_flags &= ~RAIDF_INITED;
    850 
    851 			/* detach the device */
    852 
    853 			cf = device_cfdata(rs->sc_dev);
    854 			error = config_detach(rs->sc_dev, DETACH_QUIET);
    855 			free(cf, M_RAIDFRAME);
    856 
    857 			/* Detach the disk. */
    858 			disk_detach(&rs->sc_dkdev);
    859 			disk_destroy(&rs->sc_dkdev);
    860 		}
    861 	}
    862 
    863 	raidunlock(rs);
    864 	return (0);
    865 
    866 }
    867 
    868 void
    869 raidstrategy(struct buf *bp)
    870 {
    871 	int s;
    872 
    873 	unsigned int raidID = raidunit(bp->b_dev);
    874 	RF_Raid_t *raidPtr;
    875 	struct raid_softc *rs = &raid_softc[raidID];
    876 	int     wlabel;
    877 
    878 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    879 		bp->b_error = ENXIO;
    880 		goto done;
    881 	}
    882 	if (raidID >= numraid || !raidPtrs[raidID]) {
    883 		bp->b_error = ENODEV;
    884 		goto done;
    885 	}
    886 	raidPtr = raidPtrs[raidID];
    887 	if (!raidPtr->valid) {
    888 		bp->b_error = ENODEV;
    889 		goto done;
    890 	}
    891 	if (bp->b_bcount == 0) {
    892 		db1_printf(("b_bcount is zero..\n"));
    893 		goto done;
    894 	}
    895 
    896 	/*
    897 	 * Do bounds checking and adjust transfer.  If there's an
    898 	 * error, the bounds check will flag that for us.
    899 	 */
    900 
    901 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    902 	if (DISKPART(bp->b_dev) == RAW_PART) {
    903 		uint64_t size; /* device size in DEV_BSIZE unit */
    904 
    905 		if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
    906 			size = raidPtr->totalSectors <<
    907 			    (raidPtr->logBytesPerSector - DEV_BSHIFT);
    908 		} else {
    909 			size = raidPtr->totalSectors >>
    910 			    (DEV_BSHIFT - raidPtr->logBytesPerSector);
    911 		}
    912 		if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
    913 			goto done;
    914 		}
    915 	} else {
    916 		if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
    917 			db1_printf(("Bounds check failed!!:%d %d\n",
    918 				(int) bp->b_blkno, (int) wlabel));
    919 			goto done;
    920 		}
    921 	}
    922 	s = splbio();
    923 
    924 	bp->b_resid = 0;
    925 
    926 	/* stuff it onto our queue */
    927 	BUFQ_PUT(rs->buf_queue, bp);
    928 
    929 	/* scheduled the IO to happen at the next convenient time */
    930 	wakeup(&(raidPtrs[raidID]->iodone));
    931 
    932 	splx(s);
    933 	return;
    934 
    935 done:
    936 	bp->b_resid = bp->b_bcount;
    937 	biodone(bp);
    938 }
    939 /* ARGSUSED */
    940 int
    941 raidread(dev_t dev, struct uio *uio, int flags)
    942 {
    943 	int     unit = raidunit(dev);
    944 	struct raid_softc *rs;
    945 
    946 	if (unit >= numraid)
    947 		return (ENXIO);
    948 	rs = &raid_softc[unit];
    949 
    950 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    951 		return (ENXIO);
    952 
    953 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    954 
    955 }
    956 /* ARGSUSED */
    957 int
    958 raidwrite(dev_t dev, struct uio *uio, int flags)
    959 {
    960 	int     unit = raidunit(dev);
    961 	struct raid_softc *rs;
    962 
    963 	if (unit >= numraid)
    964 		return (ENXIO);
    965 	rs = &raid_softc[unit];
    966 
    967 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    968 		return (ENXIO);
    969 
    970 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    971 
    972 }
    973 
    974 int
    975 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
    976 {
    977 	int     unit = raidunit(dev);
    978 	int     error = 0;
    979 	int     part, pmask;
    980 	struct cfdata *cf;
    981 	struct raid_softc *rs;
    982 	RF_Config_t *k_cfg, *u_cfg;
    983 	RF_Raid_t *raidPtr;
    984 	RF_RaidDisk_t *diskPtr;
    985 	RF_AccTotals_t *totals;
    986 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    987 	u_char *specific_buf;
    988 	int retcode = 0;
    989 	int column;
    990 	int raidid;
    991 	struct rf_recon_req *rrcopy, *rr;
    992 	RF_ComponentLabel_t *clabel;
    993 	RF_ComponentLabel_t *ci_label;
    994 	RF_ComponentLabel_t **clabel_ptr;
    995 	RF_SingleComponent_t *sparePtr,*componentPtr;
    996 	RF_SingleComponent_t component;
    997 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    998 	int i, j, d;
    999 #ifdef __HAVE_OLD_DISKLABEL
   1000 	struct disklabel newlabel;
   1001 #endif
   1002 	struct dkwedge_info *dkw;
   1003 
   1004 	if (unit >= numraid)
   1005 		return (ENXIO);
   1006 	rs = &raid_softc[unit];
   1007 	raidPtr = raidPtrs[unit];
   1008 
   1009 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
   1010 		(int) DISKPART(dev), (int) unit, (int) cmd));
   1011 
   1012 	/* Must be open for writes for these commands... */
   1013 	switch (cmd) {
   1014 #ifdef DIOCGSECTORSIZE
   1015 	case DIOCGSECTORSIZE:
   1016 		*(u_int *)data = raidPtr->bytesPerSector;
   1017 		return 0;
   1018 	case DIOCGMEDIASIZE:
   1019 		*(off_t *)data =
   1020 		    (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
   1021 		return 0;
   1022 #endif
   1023 	case DIOCSDINFO:
   1024 	case DIOCWDINFO:
   1025 #ifdef __HAVE_OLD_DISKLABEL
   1026 	case ODIOCWDINFO:
   1027 	case ODIOCSDINFO:
   1028 #endif
   1029 	case DIOCWLABEL:
   1030 	case DIOCAWEDGE:
   1031 	case DIOCDWEDGE:
   1032 		if ((flag & FWRITE) == 0)
   1033 			return (EBADF);
   1034 	}
   1035 
   1036 	/* Must be initialized for these... */
   1037 	switch (cmd) {
   1038 	case DIOCGDINFO:
   1039 	case DIOCSDINFO:
   1040 	case DIOCWDINFO:
   1041 #ifdef __HAVE_OLD_DISKLABEL
   1042 	case ODIOCGDINFO:
   1043 	case ODIOCWDINFO:
   1044 	case ODIOCSDINFO:
   1045 	case ODIOCGDEFLABEL:
   1046 #endif
   1047 	case DIOCGPART:
   1048 	case DIOCWLABEL:
   1049 	case DIOCGDEFLABEL:
   1050 	case DIOCAWEDGE:
   1051 	case DIOCDWEDGE:
   1052 	case DIOCLWEDGES:
   1053 	case RAIDFRAME_SHUTDOWN:
   1054 	case RAIDFRAME_REWRITEPARITY:
   1055 	case RAIDFRAME_GET_INFO:
   1056 	case RAIDFRAME_RESET_ACCTOTALS:
   1057 	case RAIDFRAME_GET_ACCTOTALS:
   1058 	case RAIDFRAME_KEEP_ACCTOTALS:
   1059 	case RAIDFRAME_GET_SIZE:
   1060 	case RAIDFRAME_FAIL_DISK:
   1061 	case RAIDFRAME_COPYBACK:
   1062 	case RAIDFRAME_CHECK_RECON_STATUS:
   1063 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1064 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1065 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1066 	case RAIDFRAME_ADD_HOT_SPARE:
   1067 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1068 	case RAIDFRAME_INIT_LABELS:
   1069 	case RAIDFRAME_REBUILD_IN_PLACE:
   1070 	case RAIDFRAME_CHECK_PARITY:
   1071 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1072 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1073 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1074 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1075 	case RAIDFRAME_SET_AUTOCONFIG:
   1076 	case RAIDFRAME_SET_ROOT:
   1077 	case RAIDFRAME_DELETE_COMPONENT:
   1078 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1079 		if ((rs->sc_flags & RAIDF_INITED) == 0)
   1080 			return (ENXIO);
   1081 	}
   1082 
   1083 	switch (cmd) {
   1084 
   1085 		/* configure the system */
   1086 	case RAIDFRAME_CONFIGURE:
   1087 
   1088 		if (raidPtr->valid) {
   1089 			/* There is a valid RAID set running on this unit! */
   1090 			printf("raid%d: Device already configured!\n",unit);
   1091 			return(EINVAL);
   1092 		}
   1093 
   1094 		/* copy-in the configuration information */
   1095 		/* data points to a pointer to the configuration structure */
   1096 
   1097 		u_cfg = *((RF_Config_t **) data);
   1098 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
   1099 		if (k_cfg == NULL) {
   1100 			return (ENOMEM);
   1101 		}
   1102 		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
   1103 		if (retcode) {
   1104 			RF_Free(k_cfg, sizeof(RF_Config_t));
   1105 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
   1106 				retcode));
   1107 			return (retcode);
   1108 		}
   1109 		/* allocate a buffer for the layout-specific data, and copy it
   1110 		 * in */
   1111 		if (k_cfg->layoutSpecificSize) {
   1112 			if (k_cfg->layoutSpecificSize > 10000) {
   1113 				/* sanity check */
   1114 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1115 				return (EINVAL);
   1116 			}
   1117 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
   1118 			    (u_char *));
   1119 			if (specific_buf == NULL) {
   1120 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1121 				return (ENOMEM);
   1122 			}
   1123 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
   1124 			    k_cfg->layoutSpecificSize);
   1125 			if (retcode) {
   1126 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1127 				RF_Free(specific_buf,
   1128 					k_cfg->layoutSpecificSize);
   1129 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
   1130 					retcode));
   1131 				return (retcode);
   1132 			}
   1133 		} else
   1134 			specific_buf = NULL;
   1135 		k_cfg->layoutSpecific = specific_buf;
   1136 
   1137 		/* should do some kind of sanity check on the configuration.
   1138 		 * Store the sum of all the bytes in the last byte? */
   1139 
   1140 		/* configure the system */
   1141 
   1142 		/*
   1143 		 * Clear the entire RAID descriptor, just to make sure
   1144 		 *  there is no stale data left in the case of a
   1145 		 *  reconfiguration
   1146 		 */
   1147 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
   1148 		raidPtr->raidid = unit;
   1149 
   1150 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
   1151 
   1152 		if (retcode == 0) {
   1153 
   1154 			/* allow this many simultaneous IO's to
   1155 			   this RAID device */
   1156 			raidPtr->openings = RAIDOUTSTANDING;
   1157 
   1158 			raidinit(raidPtr);
   1159 			rf_markalldirty(raidPtr);
   1160 		}
   1161 		/* free the buffers.  No return code here. */
   1162 		if (k_cfg->layoutSpecificSize) {
   1163 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
   1164 		}
   1165 		RF_Free(k_cfg, sizeof(RF_Config_t));
   1166 
   1167 		return (retcode);
   1168 
   1169 		/* shutdown the system */
   1170 	case RAIDFRAME_SHUTDOWN:
   1171 
   1172 		if ((error = raidlock(rs)) != 0)
   1173 			return (error);
   1174 
   1175 		/*
   1176 		 * If somebody has a partition mounted, we shouldn't
   1177 		 * shutdown.
   1178 		 */
   1179 
   1180 		part = DISKPART(dev);
   1181 		pmask = (1 << part);
   1182 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
   1183 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
   1184 			(rs->sc_dkdev.dk_copenmask & pmask))) {
   1185 			raidunlock(rs);
   1186 			return (EBUSY);
   1187 		}
   1188 
   1189 		retcode = rf_Shutdown(raidPtr);
   1190 
   1191 		/* It's no longer initialized... */
   1192 		rs->sc_flags &= ~RAIDF_INITED;
   1193 
   1194 		/* free the pseudo device attach bits */
   1195 
   1196 		cf = device_cfdata(rs->sc_dev);
   1197 		/* XXX this causes us to not return any errors
   1198 		   from the above call to rf_Shutdown() */
   1199 		retcode = config_detach(rs->sc_dev, DETACH_QUIET);
   1200 		free(cf, M_RAIDFRAME);
   1201 
   1202 		/* Detach the disk. */
   1203 		disk_detach(&rs->sc_dkdev);
   1204 		disk_destroy(&rs->sc_dkdev);
   1205 
   1206 		raidunlock(rs);
   1207 
   1208 		return (retcode);
   1209 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1210 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1211 		/* need to read the component label for the disk indicated
   1212 		   by row,column in clabel */
   1213 
   1214 		/* For practice, let's get it directly fromdisk, rather
   1215 		   than from the in-core copy */
   1216 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
   1217 			   (RF_ComponentLabel_t *));
   1218 		if (clabel == NULL)
   1219 			return (ENOMEM);
   1220 
   1221 		retcode = copyin( *clabel_ptr, clabel,
   1222 				  sizeof(RF_ComponentLabel_t));
   1223 
   1224 		if (retcode) {
   1225 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1226 			return(retcode);
   1227 		}
   1228 
   1229 		clabel->row = 0; /* Don't allow looking at anything else.*/
   1230 
   1231 		column = clabel->column;
   1232 
   1233 		if ((column < 0) || (column >= raidPtr->numCol +
   1234 				     raidPtr->numSpare)) {
   1235 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1236 			return(EINVAL);
   1237 		}
   1238 
   1239 		retcode = raidread_component_label(raidPtr->Disks[column].dev,
   1240 				raidPtr->raid_cinfo[column].ci_vp,
   1241 				clabel );
   1242 
   1243 		if (retcode == 0) {
   1244 			retcode = copyout(clabel, *clabel_ptr,
   1245 					  sizeof(RF_ComponentLabel_t));
   1246 		}
   1247 		RF_Free(clabel, sizeof(RF_ComponentLabel_t));
   1248 		return (retcode);
   1249 
   1250 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1251 		clabel = (RF_ComponentLabel_t *) data;
   1252 
   1253 		/* XXX check the label for valid stuff... */
   1254 		/* Note that some things *should not* get modified --
   1255 		   the user should be re-initing the labels instead of
   1256 		   trying to patch things.
   1257 		   */
   1258 
   1259 		raidid = raidPtr->raidid;
   1260 #ifdef DEBUG
   1261 		printf("raid%d: Got component label:\n", raidid);
   1262 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1263 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1264 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1265 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1266 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1267 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1268 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1269 #endif
   1270 		clabel->row = 0;
   1271 		column = clabel->column;
   1272 
   1273 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1274 			return(EINVAL);
   1275 		}
   1276 
   1277 		/* XXX this isn't allowed to do anything for now :-) */
   1278 
   1279 		/* XXX and before it is, we need to fill in the rest
   1280 		   of the fields!?!?!?! */
   1281 #if 0
   1282 		raidwrite_component_label(
   1283 		     raidPtr->Disks[column].dev,
   1284 			    raidPtr->raid_cinfo[column].ci_vp,
   1285 			    clabel );
   1286 #endif
   1287 		return (0);
   1288 
   1289 	case RAIDFRAME_INIT_LABELS:
   1290 		clabel = (RF_ComponentLabel_t *) data;
   1291 		/*
   1292 		   we only want the serial number from
   1293 		   the above.  We get all the rest of the information
   1294 		   from the config that was used to create this RAID
   1295 		   set.
   1296 		   */
   1297 
   1298 		raidPtr->serial_number = clabel->serial_number;
   1299 
   1300 		RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t),
   1301 			  (RF_ComponentLabel_t *));
   1302 		if (ci_label == NULL)
   1303 			return (ENOMEM);
   1304 
   1305 		raid_init_component_label(raidPtr, ci_label);
   1306 		ci_label->serial_number = clabel->serial_number;
   1307 		ci_label->row = 0; /* we dont' pretend to support more */
   1308 
   1309 		for(column=0;column<raidPtr->numCol;column++) {
   1310 			diskPtr = &raidPtr->Disks[column];
   1311 			if (!RF_DEAD_DISK(diskPtr->status)) {
   1312 				ci_label->partitionSize = diskPtr->partitionSize;
   1313 				ci_label->column = column;
   1314 				raidwrite_component_label(
   1315 							  raidPtr->Disks[column].dev,
   1316 							  raidPtr->raid_cinfo[column].ci_vp,
   1317 							  ci_label );
   1318 			}
   1319 		}
   1320 		RF_Free(ci_label, sizeof(RF_ComponentLabel_t));
   1321 
   1322 		return (retcode);
   1323 	case RAIDFRAME_SET_AUTOCONFIG:
   1324 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1325 		printf("raid%d: New autoconfig value is: %d\n",
   1326 		       raidPtr->raidid, d);
   1327 		*(int *) data = d;
   1328 		return (retcode);
   1329 
   1330 	case RAIDFRAME_SET_ROOT:
   1331 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1332 		printf("raid%d: New rootpartition value is: %d\n",
   1333 		       raidPtr->raidid, d);
   1334 		*(int *) data = d;
   1335 		return (retcode);
   1336 
   1337 		/* initialize all parity */
   1338 	case RAIDFRAME_REWRITEPARITY:
   1339 
   1340 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1341 			/* Parity for RAID 0 is trivially correct */
   1342 			raidPtr->parity_good = RF_RAID_CLEAN;
   1343 			return(0);
   1344 		}
   1345 
   1346 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1347 			/* Re-write is already in progress! */
   1348 			return(EINVAL);
   1349 		}
   1350 
   1351 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1352 					   rf_RewriteParityThread,
   1353 					   raidPtr,"raid_parity");
   1354 		return (retcode);
   1355 
   1356 
   1357 	case RAIDFRAME_ADD_HOT_SPARE:
   1358 		sparePtr = (RF_SingleComponent_t *) data;
   1359 		memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
   1360 		retcode = rf_add_hot_spare(raidPtr, &component);
   1361 		return(retcode);
   1362 
   1363 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1364 		return(retcode);
   1365 
   1366 	case RAIDFRAME_DELETE_COMPONENT:
   1367 		componentPtr = (RF_SingleComponent_t *)data;
   1368 		memcpy( &component, componentPtr,
   1369 			sizeof(RF_SingleComponent_t));
   1370 		retcode = rf_delete_component(raidPtr, &component);
   1371 		return(retcode);
   1372 
   1373 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1374 		componentPtr = (RF_SingleComponent_t *)data;
   1375 		memcpy( &component, componentPtr,
   1376 			sizeof(RF_SingleComponent_t));
   1377 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1378 		return(retcode);
   1379 
   1380 	case RAIDFRAME_REBUILD_IN_PLACE:
   1381 
   1382 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1383 			/* Can't do this on a RAID 0!! */
   1384 			return(EINVAL);
   1385 		}
   1386 
   1387 		if (raidPtr->recon_in_progress == 1) {
   1388 			/* a reconstruct is already in progress! */
   1389 			return(EINVAL);
   1390 		}
   1391 
   1392 		componentPtr = (RF_SingleComponent_t *) data;
   1393 		memcpy( &component, componentPtr,
   1394 			sizeof(RF_SingleComponent_t));
   1395 		component.row = 0; /* we don't support any more */
   1396 		column = component.column;
   1397 
   1398 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1399 			return(EINVAL);
   1400 		}
   1401 
   1402 		RF_LOCK_MUTEX(raidPtr->mutex);
   1403 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
   1404 		    (raidPtr->numFailures > 0)) {
   1405 			/* XXX 0 above shouldn't be constant!!! */
   1406 			/* some component other than this has failed.
   1407 			   Let's not make things worse than they already
   1408 			   are... */
   1409 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1410 			       raidPtr->raidid);
   1411 			printf("raid%d:     Col: %d   Too many failures.\n",
   1412 			       raidPtr->raidid, column);
   1413 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1414 			return (EINVAL);
   1415 		}
   1416 		if (raidPtr->Disks[column].status ==
   1417 		    rf_ds_reconstructing) {
   1418 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1419 			       raidPtr->raidid);
   1420 			printf("raid%d:    Col: %d   Reconstruction already occuring!\n", raidPtr->raidid, column);
   1421 
   1422 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1423 			return (EINVAL);
   1424 		}
   1425 		if (raidPtr->Disks[column].status == rf_ds_spared) {
   1426 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1427 			return (EINVAL);
   1428 		}
   1429 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1430 
   1431 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1432 		if (rrcopy == NULL)
   1433 			return(ENOMEM);
   1434 
   1435 		rrcopy->raidPtr = (void *) raidPtr;
   1436 		rrcopy->col = column;
   1437 
   1438 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1439 					   rf_ReconstructInPlaceThread,
   1440 					   rrcopy,"raid_reconip");
   1441 		return(retcode);
   1442 
   1443 	case RAIDFRAME_GET_INFO:
   1444 		if (!raidPtr->valid)
   1445 			return (ENODEV);
   1446 		ucfgp = (RF_DeviceConfig_t **) data;
   1447 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1448 			  (RF_DeviceConfig_t *));
   1449 		if (d_cfg == NULL)
   1450 			return (ENOMEM);
   1451 		d_cfg->rows = 1; /* there is only 1 row now */
   1452 		d_cfg->cols = raidPtr->numCol;
   1453 		d_cfg->ndevs = raidPtr->numCol;
   1454 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1455 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1456 			return (ENOMEM);
   1457 		}
   1458 		d_cfg->nspares = raidPtr->numSpare;
   1459 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1460 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1461 			return (ENOMEM);
   1462 		}
   1463 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1464 		d = 0;
   1465 		for (j = 0; j < d_cfg->cols; j++) {
   1466 			d_cfg->devs[d] = raidPtr->Disks[j];
   1467 			d++;
   1468 		}
   1469 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1470 			d_cfg->spares[i] = raidPtr->Disks[j];
   1471 		}
   1472 		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
   1473 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1474 
   1475 		return (retcode);
   1476 
   1477 	case RAIDFRAME_CHECK_PARITY:
   1478 		*(int *) data = raidPtr->parity_good;
   1479 		return (0);
   1480 
   1481 	case RAIDFRAME_RESET_ACCTOTALS:
   1482 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1483 		return (0);
   1484 
   1485 	case RAIDFRAME_GET_ACCTOTALS:
   1486 		totals = (RF_AccTotals_t *) data;
   1487 		*totals = raidPtr->acc_totals;
   1488 		return (0);
   1489 
   1490 	case RAIDFRAME_KEEP_ACCTOTALS:
   1491 		raidPtr->keep_acc_totals = *(int *)data;
   1492 		return (0);
   1493 
   1494 	case RAIDFRAME_GET_SIZE:
   1495 		*(int *) data = raidPtr->totalSectors;
   1496 		return (0);
   1497 
   1498 		/* fail a disk & optionally start reconstruction */
   1499 	case RAIDFRAME_FAIL_DISK:
   1500 
   1501 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1502 			/* Can't do this on a RAID 0!! */
   1503 			return(EINVAL);
   1504 		}
   1505 
   1506 		rr = (struct rf_recon_req *) data;
   1507 		rr->row = 0;
   1508 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
   1509 			return (EINVAL);
   1510 
   1511 
   1512 		RF_LOCK_MUTEX(raidPtr->mutex);
   1513 		if (raidPtr->status == rf_rs_reconstructing) {
   1514 			/* you can't fail a disk while we're reconstructing! */
   1515 			/* XXX wrong for RAID6 */
   1516 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1517 			return (EINVAL);
   1518 		}
   1519 		if ((raidPtr->Disks[rr->col].status ==
   1520 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
   1521 			/* some other component has failed.  Let's not make
   1522 			   things worse. XXX wrong for RAID6 */
   1523 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1524 			return (EINVAL);
   1525 		}
   1526 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
   1527 			/* Can't fail a spared disk! */
   1528 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1529 			return (EINVAL);
   1530 		}
   1531 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1532 
   1533 		/* make a copy of the recon request so that we don't rely on
   1534 		 * the user's buffer */
   1535 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1536 		if (rrcopy == NULL)
   1537 			return(ENOMEM);
   1538 		memcpy(rrcopy, rr, sizeof(*rr));
   1539 		rrcopy->raidPtr = (void *) raidPtr;
   1540 
   1541 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1542 					   rf_ReconThread,
   1543 					   rrcopy,"raid_recon");
   1544 		return (0);
   1545 
   1546 		/* invoke a copyback operation after recon on whatever disk
   1547 		 * needs it, if any */
   1548 	case RAIDFRAME_COPYBACK:
   1549 
   1550 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1551 			/* This makes no sense on a RAID 0!! */
   1552 			return(EINVAL);
   1553 		}
   1554 
   1555 		if (raidPtr->copyback_in_progress == 1) {
   1556 			/* Copyback is already in progress! */
   1557 			return(EINVAL);
   1558 		}
   1559 
   1560 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1561 					   rf_CopybackThread,
   1562 					   raidPtr,"raid_copyback");
   1563 		return (retcode);
   1564 
   1565 		/* return the percentage completion of reconstruction */
   1566 	case RAIDFRAME_CHECK_RECON_STATUS:
   1567 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1568 			/* This makes no sense on a RAID 0, so tell the
   1569 			   user it's done. */
   1570 			*(int *) data = 100;
   1571 			return(0);
   1572 		}
   1573 		if (raidPtr->status != rf_rs_reconstructing)
   1574 			*(int *) data = 100;
   1575 		else {
   1576 			if (raidPtr->reconControl->numRUsTotal > 0) {
   1577 				*(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
   1578 			} else {
   1579 				*(int *) data = 0;
   1580 			}
   1581 		}
   1582 		return (0);
   1583 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1584 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1585 		if (raidPtr->status != rf_rs_reconstructing) {
   1586 			progressInfo.remaining = 0;
   1587 			progressInfo.completed = 100;
   1588 			progressInfo.total = 100;
   1589 		} else {
   1590 			progressInfo.total =
   1591 				raidPtr->reconControl->numRUsTotal;
   1592 			progressInfo.completed =
   1593 				raidPtr->reconControl->numRUsComplete;
   1594 			progressInfo.remaining = progressInfo.total -
   1595 				progressInfo.completed;
   1596 		}
   1597 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1598 				  sizeof(RF_ProgressInfo_t));
   1599 		return (retcode);
   1600 
   1601 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1602 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1603 			/* This makes no sense on a RAID 0, so tell the
   1604 			   user it's done. */
   1605 			*(int *) data = 100;
   1606 			return(0);
   1607 		}
   1608 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1609 			*(int *) data = 100 *
   1610 				raidPtr->parity_rewrite_stripes_done /
   1611 				raidPtr->Layout.numStripe;
   1612 		} else {
   1613 			*(int *) data = 100;
   1614 		}
   1615 		return (0);
   1616 
   1617 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1618 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1619 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1620 			progressInfo.total = raidPtr->Layout.numStripe;
   1621 			progressInfo.completed =
   1622 				raidPtr->parity_rewrite_stripes_done;
   1623 			progressInfo.remaining = progressInfo.total -
   1624 				progressInfo.completed;
   1625 		} else {
   1626 			progressInfo.remaining = 0;
   1627 			progressInfo.completed = 100;
   1628 			progressInfo.total = 100;
   1629 		}
   1630 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1631 				  sizeof(RF_ProgressInfo_t));
   1632 		return (retcode);
   1633 
   1634 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1635 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1636 			/* This makes no sense on a RAID 0 */
   1637 			*(int *) data = 100;
   1638 			return(0);
   1639 		}
   1640 		if (raidPtr->copyback_in_progress == 1) {
   1641 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1642 				raidPtr->Layout.numStripe;
   1643 		} else {
   1644 			*(int *) data = 100;
   1645 		}
   1646 		return (0);
   1647 
   1648 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1649 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1650 		if (raidPtr->copyback_in_progress == 1) {
   1651 			progressInfo.total = raidPtr->Layout.numStripe;
   1652 			progressInfo.completed =
   1653 				raidPtr->copyback_stripes_done;
   1654 			progressInfo.remaining = progressInfo.total -
   1655 				progressInfo.completed;
   1656 		} else {
   1657 			progressInfo.remaining = 0;
   1658 			progressInfo.completed = 100;
   1659 			progressInfo.total = 100;
   1660 		}
   1661 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1662 				  sizeof(RF_ProgressInfo_t));
   1663 		return (retcode);
   1664 
   1665 		/* the sparetable daemon calls this to wait for the kernel to
   1666 		 * need a spare table. this ioctl does not return until a
   1667 		 * spare table is needed. XXX -- calling mpsleep here in the
   1668 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1669 		 * -- I should either compute the spare table in the kernel,
   1670 		 * or have a different -- XXX XXX -- interface (a different
   1671 		 * character device) for delivering the table     -- XXX */
   1672 #if 0
   1673 	case RAIDFRAME_SPARET_WAIT:
   1674 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1675 		while (!rf_sparet_wait_queue)
   1676 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1677 		waitreq = rf_sparet_wait_queue;
   1678 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1679 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1680 
   1681 		/* structure assignment */
   1682 		*((RF_SparetWait_t *) data) = *waitreq;
   1683 
   1684 		RF_Free(waitreq, sizeof(*waitreq));
   1685 		return (0);
   1686 
   1687 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1688 		 * code in it that will cause the dameon to exit */
   1689 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1690 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1691 		waitreq->fcol = -1;
   1692 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1693 		waitreq->next = rf_sparet_wait_queue;
   1694 		rf_sparet_wait_queue = waitreq;
   1695 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1696 		wakeup(&rf_sparet_wait_queue);
   1697 		return (0);
   1698 
   1699 		/* used by the spare table daemon to deliver a spare table
   1700 		 * into the kernel */
   1701 	case RAIDFRAME_SEND_SPARET:
   1702 
   1703 		/* install the spare table */
   1704 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1705 
   1706 		/* respond to the requestor.  the return status of the spare
   1707 		 * table installation is passed in the "fcol" field */
   1708 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1709 		waitreq->fcol = retcode;
   1710 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1711 		waitreq->next = rf_sparet_resp_queue;
   1712 		rf_sparet_resp_queue = waitreq;
   1713 		wakeup(&rf_sparet_resp_queue);
   1714 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1715 
   1716 		return (retcode);
   1717 #endif
   1718 
   1719 	default:
   1720 		break; /* fall through to the os-specific code below */
   1721 
   1722 	}
   1723 
   1724 	if (!raidPtr->valid)
   1725 		return (EINVAL);
   1726 
   1727 	/*
   1728 	 * Add support for "regular" device ioctls here.
   1729 	 */
   1730 
   1731 	switch (cmd) {
   1732 	case DIOCGDINFO:
   1733 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1734 		break;
   1735 #ifdef __HAVE_OLD_DISKLABEL
   1736 	case ODIOCGDINFO:
   1737 		newlabel = *(rs->sc_dkdev.dk_label);
   1738 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1739 			return ENOTTY;
   1740 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1741 		break;
   1742 #endif
   1743 
   1744 	case DIOCGPART:
   1745 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1746 		((struct partinfo *) data)->part =
   1747 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1748 		break;
   1749 
   1750 	case DIOCWDINFO:
   1751 	case DIOCSDINFO:
   1752 #ifdef __HAVE_OLD_DISKLABEL
   1753 	case ODIOCWDINFO:
   1754 	case ODIOCSDINFO:
   1755 #endif
   1756 	{
   1757 		struct disklabel *lp;
   1758 #ifdef __HAVE_OLD_DISKLABEL
   1759 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1760 			memset(&newlabel, 0, sizeof newlabel);
   1761 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1762 			lp = &newlabel;
   1763 		} else
   1764 #endif
   1765 		lp = (struct disklabel *)data;
   1766 
   1767 		if ((error = raidlock(rs)) != 0)
   1768 			return (error);
   1769 
   1770 		rs->sc_flags |= RAIDF_LABELLING;
   1771 
   1772 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1773 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1774 		if (error == 0) {
   1775 			if (cmd == DIOCWDINFO
   1776 #ifdef __HAVE_OLD_DISKLABEL
   1777 			    || cmd == ODIOCWDINFO
   1778 #endif
   1779 			   )
   1780 				error = writedisklabel(RAIDLABELDEV(dev),
   1781 				    raidstrategy, rs->sc_dkdev.dk_label,
   1782 				    rs->sc_dkdev.dk_cpulabel);
   1783 		}
   1784 		rs->sc_flags &= ~RAIDF_LABELLING;
   1785 
   1786 		raidunlock(rs);
   1787 
   1788 		if (error)
   1789 			return (error);
   1790 		break;
   1791 	}
   1792 
   1793 	case DIOCWLABEL:
   1794 		if (*(int *) data != 0)
   1795 			rs->sc_flags |= RAIDF_WLABEL;
   1796 		else
   1797 			rs->sc_flags &= ~RAIDF_WLABEL;
   1798 		break;
   1799 
   1800 	case DIOCGDEFLABEL:
   1801 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1802 		break;
   1803 
   1804 #ifdef __HAVE_OLD_DISKLABEL
   1805 	case ODIOCGDEFLABEL:
   1806 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1807 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1808 			return ENOTTY;
   1809 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1810 		break;
   1811 #endif
   1812 
   1813 	case DIOCAWEDGE:
   1814 	case DIOCDWEDGE:
   1815 	    	dkw = (void *)data;
   1816 
   1817 		/* If the ioctl happens here, the parent is us. */
   1818 		(void)strcpy(dkw->dkw_parent, rs->sc_xname);
   1819 		return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
   1820 
   1821 	case DIOCLWEDGES:
   1822 		return dkwedge_list(&rs->sc_dkdev,
   1823 		    (struct dkwedge_list *)data, l);
   1824 
   1825 	default:
   1826 		retcode = ENOTTY;
   1827 	}
   1828 	return (retcode);
   1829 
   1830 }
   1831 
   1832 
   1833 /* raidinit -- complete the rest of the initialization for the
   1834    RAIDframe device.  */
   1835 
   1836 
   1837 static void
   1838 raidinit(RF_Raid_t *raidPtr)
   1839 {
   1840 	struct cfdata *cf;
   1841 	struct raid_softc *rs;
   1842 	int     unit;
   1843 
   1844 	unit = raidPtr->raidid;
   1845 
   1846 	rs = &raid_softc[unit];
   1847 
   1848 	/* XXX should check return code first... */
   1849 	rs->sc_flags |= RAIDF_INITED;
   1850 
   1851 	/* XXX doesn't check bounds. */
   1852 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
   1853 
   1854 	/* attach the pseudo device */
   1855 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
   1856 	cf->cf_name = raid_cd.cd_name;
   1857 	cf->cf_atname = raid_cd.cd_name;
   1858 	cf->cf_unit = unit;
   1859 	cf->cf_fstate = FSTATE_STAR;
   1860 
   1861 	rs->sc_dev = config_attach_pseudo(cf);
   1862 
   1863 	if (rs->sc_dev==NULL) {
   1864 		printf("raid%d: config_attach_pseudo failed\n",
   1865 		       raidPtr->raidid);
   1866 	}
   1867 
   1868 	/* disk_attach actually creates space for the CPU disklabel, among
   1869 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1870 	 * with disklabels. */
   1871 
   1872 	disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
   1873 	disk_attach(&rs->sc_dkdev);
   1874 
   1875 	/* XXX There may be a weird interaction here between this, and
   1876 	 * protectedSectors, as used in RAIDframe.  */
   1877 
   1878 	rs->sc_size = raidPtr->totalSectors;
   1879 
   1880 	dkwedge_discover(&rs->sc_dkdev);
   1881 
   1882 	rf_set_properties(rs, raidPtr);
   1883 
   1884 }
   1885 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   1886 /* wake up the daemon & tell it to get us a spare table
   1887  * XXX
   1888  * the entries in the queues should be tagged with the raidPtr
   1889  * so that in the extremely rare case that two recons happen at once,
   1890  * we know for which device were requesting a spare table
   1891  * XXX
   1892  *
   1893  * XXX This code is not currently used. GO
   1894  */
   1895 int
   1896 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
   1897 {
   1898 	int     retcode;
   1899 
   1900 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1901 	req->next = rf_sparet_wait_queue;
   1902 	rf_sparet_wait_queue = req;
   1903 	wakeup(&rf_sparet_wait_queue);
   1904 
   1905 	/* mpsleep unlocks the mutex */
   1906 	while (!rf_sparet_resp_queue) {
   1907 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1908 		    "raidframe getsparetable", 0);
   1909 	}
   1910 	req = rf_sparet_resp_queue;
   1911 	rf_sparet_resp_queue = req->next;
   1912 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1913 
   1914 	retcode = req->fcol;
   1915 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1916 					 * alloc'd */
   1917 	return (retcode);
   1918 }
   1919 #endif
   1920 
   1921 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1922  * bp & passes it down.
   1923  * any calls originating in the kernel must use non-blocking I/O
   1924  * do some extra sanity checking to return "appropriate" error values for
   1925  * certain conditions (to make some standard utilities work)
   1926  *
   1927  * Formerly known as: rf_DoAccessKernel
   1928  */
   1929 void
   1930 raidstart(RF_Raid_t *raidPtr)
   1931 {
   1932 	RF_SectorCount_t num_blocks, pb, sum;
   1933 	RF_RaidAddr_t raid_addr;
   1934 	struct partition *pp;
   1935 	daddr_t blocknum;
   1936 	int     unit;
   1937 	struct raid_softc *rs;
   1938 	int     do_async;
   1939 	struct buf *bp;
   1940 	int rc;
   1941 
   1942 	unit = raidPtr->raidid;
   1943 	rs = &raid_softc[unit];
   1944 
   1945 	/* quick check to see if anything has died recently */
   1946 	RF_LOCK_MUTEX(raidPtr->mutex);
   1947 	if (raidPtr->numNewFailures > 0) {
   1948 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1949 		rf_update_component_labels(raidPtr,
   1950 					   RF_NORMAL_COMPONENT_UPDATE);
   1951 		RF_LOCK_MUTEX(raidPtr->mutex);
   1952 		raidPtr->numNewFailures--;
   1953 	}
   1954 
   1955 	/* Check to see if we're at the limit... */
   1956 	while (raidPtr->openings > 0) {
   1957 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1958 
   1959 		/* get the next item, if any, from the queue */
   1960 		if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
   1961 			/* nothing more to do */
   1962 			return;
   1963 		}
   1964 
   1965 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1966 		 * partition.. Need to make it absolute to the underlying
   1967 		 * device.. */
   1968 
   1969 		blocknum = bp->b_blkno;
   1970 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1971 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1972 			blocknum += pp->p_offset;
   1973 		}
   1974 
   1975 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1976 			    (int) blocknum));
   1977 
   1978 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1979 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1980 
   1981 		/* *THIS* is where we adjust what block we're going to...
   1982 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1983 		raid_addr = blocknum;
   1984 
   1985 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1986 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1987 		sum = raid_addr + num_blocks + pb;
   1988 		if (1 || rf_debugKernelAccess) {
   1989 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1990 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1991 				    (int) pb, (int) bp->b_resid));
   1992 		}
   1993 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1994 		    || (sum < num_blocks) || (sum < pb)) {
   1995 			bp->b_error = ENOSPC;
   1996 			bp->b_resid = bp->b_bcount;
   1997 			biodone(bp);
   1998 			RF_LOCK_MUTEX(raidPtr->mutex);
   1999 			continue;
   2000 		}
   2001 		/*
   2002 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   2003 		 */
   2004 
   2005 		if (bp->b_bcount & raidPtr->sectorMask) {
   2006 			bp->b_error = EINVAL;
   2007 			bp->b_resid = bp->b_bcount;
   2008 			biodone(bp);
   2009 			RF_LOCK_MUTEX(raidPtr->mutex);
   2010 			continue;
   2011 
   2012 		}
   2013 		db1_printf(("Calling DoAccess..\n"));
   2014 
   2015 
   2016 		RF_LOCK_MUTEX(raidPtr->mutex);
   2017 		raidPtr->openings--;
   2018 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   2019 
   2020 		/*
   2021 		 * Everything is async.
   2022 		 */
   2023 		do_async = 1;
   2024 
   2025 		disk_busy(&rs->sc_dkdev);
   2026 
   2027 		/* XXX we're still at splbio() here... do we *really*
   2028 		   need to be? */
   2029 
   2030 		/* don't ever condition on bp->b_flags & B_WRITE.
   2031 		 * always condition on B_READ instead */
   2032 
   2033 		rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   2034 				 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   2035 				 do_async, raid_addr, num_blocks,
   2036 				 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   2037 
   2038 		if (rc) {
   2039 			bp->b_error = rc;
   2040 			bp->b_resid = bp->b_bcount;
   2041 			biodone(bp);
   2042 			/* continue loop */
   2043 		}
   2044 
   2045 		RF_LOCK_MUTEX(raidPtr->mutex);
   2046 	}
   2047 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   2048 }
   2049 
   2050 
   2051 
   2052 
   2053 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   2054 
   2055 int
   2056 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
   2057 {
   2058 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   2059 	struct buf *bp;
   2060 
   2061 	req->queue = queue;
   2062 
   2063 #if DIAGNOSTIC
   2064 	if (queue->raidPtr->raidid >= numraid) {
   2065 		printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
   2066 		    numraid);
   2067 		panic("Invalid Unit number in rf_DispatchKernelIO");
   2068 	}
   2069 #endif
   2070 
   2071 	bp = req->bp;
   2072 
   2073 	switch (req->type) {
   2074 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   2075 		/* XXX need to do something extra here.. */
   2076 		/* I'm leaving this in, as I've never actually seen it used,
   2077 		 * and I'd like folks to report it... GO */
   2078 		printf(("WAKEUP CALLED\n"));
   2079 		queue->numOutstanding++;
   2080 
   2081 		bp->b_flags = 0;
   2082 		bp->b_private = req;
   2083 
   2084 		KernelWakeupFunc(bp);
   2085 		break;
   2086 
   2087 	case RF_IO_TYPE_READ:
   2088 	case RF_IO_TYPE_WRITE:
   2089 #if RF_ACC_TRACE > 0
   2090 		if (req->tracerec) {
   2091 			RF_ETIMER_START(req->tracerec->timer);
   2092 		}
   2093 #endif
   2094 		InitBP(bp, queue->rf_cinfo->ci_vp,
   2095 		    op, queue->rf_cinfo->ci_dev,
   2096 		    req->sectorOffset, req->numSector,
   2097 		    req->buf, KernelWakeupFunc, (void *) req,
   2098 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   2099 
   2100 		if (rf_debugKernelAccess) {
   2101 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   2102 				(long) bp->b_blkno));
   2103 		}
   2104 		queue->numOutstanding++;
   2105 		queue->last_deq_sector = req->sectorOffset;
   2106 		/* acc wouldn't have been let in if there were any pending
   2107 		 * reqs at any other priority */
   2108 		queue->curPriority = req->priority;
   2109 
   2110 		db1_printf(("Going for %c to unit %d col %d\n",
   2111 			    req->type, queue->raidPtr->raidid,
   2112 			    queue->col));
   2113 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   2114 			(int) req->sectorOffset, (int) req->numSector,
   2115 			(int) (req->numSector <<
   2116 			    queue->raidPtr->logBytesPerSector),
   2117 			(int) queue->raidPtr->logBytesPerSector));
   2118 		VOP_STRATEGY(bp->b_vp, bp);
   2119 
   2120 		break;
   2121 
   2122 	default:
   2123 		panic("bad req->type in rf_DispatchKernelIO");
   2124 	}
   2125 	db1_printf(("Exiting from DispatchKernelIO\n"));
   2126 
   2127 	return (0);
   2128 }
   2129 /* this is the callback function associated with a I/O invoked from
   2130    kernel code.
   2131  */
   2132 static void
   2133 KernelWakeupFunc(struct buf *bp)
   2134 {
   2135 	RF_DiskQueueData_t *req = NULL;
   2136 	RF_DiskQueue_t *queue;
   2137 	int s;
   2138 
   2139 	s = splbio();
   2140 	db1_printf(("recovering the request queue:\n"));
   2141 	req = bp->b_private;
   2142 
   2143 	queue = (RF_DiskQueue_t *) req->queue;
   2144 
   2145 #if RF_ACC_TRACE > 0
   2146 	if (req->tracerec) {
   2147 		RF_ETIMER_STOP(req->tracerec->timer);
   2148 		RF_ETIMER_EVAL(req->tracerec->timer);
   2149 		RF_LOCK_MUTEX(rf_tracing_mutex);
   2150 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2151 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2152 		req->tracerec->num_phys_ios++;
   2153 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   2154 	}
   2155 #endif
   2156 
   2157 	/* XXX Ok, let's get aggressive... If b_error is set, let's go
   2158 	 * ballistic, and mark the component as hosed... */
   2159 
   2160 	if (bp->b_error != 0) {
   2161 		/* Mark the disk as dead */
   2162 		/* but only mark it once... */
   2163 		/* and only if it wouldn't leave this RAID set
   2164 		   completely broken */
   2165 		if (((queue->raidPtr->Disks[queue->col].status ==
   2166 		      rf_ds_optimal) ||
   2167 		     (queue->raidPtr->Disks[queue->col].status ==
   2168 		      rf_ds_used_spare)) &&
   2169 		     (queue->raidPtr->numFailures <
   2170 		      queue->raidPtr->Layout.map->faultsTolerated)) {
   2171 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   2172 			       queue->raidPtr->raidid,
   2173 			       queue->raidPtr->Disks[queue->col].devname);
   2174 			queue->raidPtr->Disks[queue->col].status =
   2175 			    rf_ds_failed;
   2176 			queue->raidPtr->status = rf_rs_degraded;
   2177 			queue->raidPtr->numFailures++;
   2178 			queue->raidPtr->numNewFailures++;
   2179 		} else {	/* Disk is already dead... */
   2180 			/* printf("Disk already marked as dead!\n"); */
   2181 		}
   2182 
   2183 	}
   2184 
   2185 	/* Fill in the error value */
   2186 
   2187 	req->error = bp->b_error;
   2188 
   2189 	simple_lock(&queue->raidPtr->iodone_lock);
   2190 
   2191 	/* Drop this one on the "finished" queue... */
   2192 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   2193 
   2194 	/* Let the raidio thread know there is work to be done. */
   2195 	wakeup(&(queue->raidPtr->iodone));
   2196 
   2197 	simple_unlock(&queue->raidPtr->iodone_lock);
   2198 
   2199 	splx(s);
   2200 }
   2201 
   2202 
   2203 
   2204 /*
   2205  * initialize a buf structure for doing an I/O in the kernel.
   2206  */
   2207 static void
   2208 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
   2209        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
   2210        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
   2211        struct proc *b_proc)
   2212 {
   2213 	/* bp->b_flags       = B_PHYS | rw_flag; */
   2214 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   2215 	bp->b_bcount = numSect << logBytesPerSector;
   2216 	bp->b_bufsize = bp->b_bcount;
   2217 	bp->b_error = 0;
   2218 	bp->b_dev = dev;
   2219 	bp->b_data = bf;
   2220 	bp->b_blkno = startSect;
   2221 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   2222 	if (bp->b_bcount == 0) {
   2223 		panic("bp->b_bcount is zero in InitBP!!");
   2224 	}
   2225 	bp->b_proc = b_proc;
   2226 	bp->b_iodone = cbFunc;
   2227 	bp->b_private = cbArg;
   2228 	bp->b_vp = b_vp;
   2229 	if ((bp->b_flags & B_READ) == 0) {
   2230 		bp->b_vp->v_numoutput++;
   2231 	}
   2232 
   2233 }
   2234 
   2235 static void
   2236 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
   2237 		    struct disklabel *lp)
   2238 {
   2239 	memset(lp, 0, sizeof(*lp));
   2240 
   2241 	/* fabricate a label... */
   2242 	lp->d_secperunit = raidPtr->totalSectors;
   2243 	lp->d_secsize = raidPtr->bytesPerSector;
   2244 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2245 	lp->d_ntracks = 4 * raidPtr->numCol;
   2246 	lp->d_ncylinders = raidPtr->totalSectors /
   2247 		(lp->d_nsectors * lp->d_ntracks);
   2248 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2249 
   2250 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2251 	lp->d_type = DTYPE_RAID;
   2252 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2253 	lp->d_rpm = 3600;
   2254 	lp->d_interleave = 1;
   2255 	lp->d_flags = 0;
   2256 
   2257 	lp->d_partitions[RAW_PART].p_offset = 0;
   2258 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2259 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2260 	lp->d_npartitions = RAW_PART + 1;
   2261 
   2262 	lp->d_magic = DISKMAGIC;
   2263 	lp->d_magic2 = DISKMAGIC;
   2264 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2265 
   2266 }
   2267 /*
   2268  * Read the disklabel from the raid device.  If one is not present, fake one
   2269  * up.
   2270  */
   2271 static void
   2272 raidgetdisklabel(dev_t dev)
   2273 {
   2274 	int     unit = raidunit(dev);
   2275 	struct raid_softc *rs = &raid_softc[unit];
   2276 	const char   *errstring;
   2277 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2278 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2279 	RF_Raid_t *raidPtr;
   2280 
   2281 	db1_printf(("Getting the disklabel...\n"));
   2282 
   2283 	memset(clp, 0, sizeof(*clp));
   2284 
   2285 	raidPtr = raidPtrs[unit];
   2286 
   2287 	raidgetdefaultlabel(raidPtr, rs, lp);
   2288 
   2289 	/*
   2290 	 * Call the generic disklabel extraction routine.
   2291 	 */
   2292 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2293 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2294 	if (errstring)
   2295 		raidmakedisklabel(rs);
   2296 	else {
   2297 		int     i;
   2298 		struct partition *pp;
   2299 
   2300 		/*
   2301 		 * Sanity check whether the found disklabel is valid.
   2302 		 *
   2303 		 * This is necessary since total size of the raid device
   2304 		 * may vary when an interleave is changed even though exactly
   2305 		 * same components are used, and old disklabel may used
   2306 		 * if that is found.
   2307 		 */
   2308 		if (lp->d_secperunit != rs->sc_size)
   2309 			printf("raid%d: WARNING: %s: "
   2310 			    "total sector size in disklabel (%d) != "
   2311 			    "the size of raid (%ld)\n", unit, rs->sc_xname,
   2312 			    lp->d_secperunit, (long) rs->sc_size);
   2313 		for (i = 0; i < lp->d_npartitions; i++) {
   2314 			pp = &lp->d_partitions[i];
   2315 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2316 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2317 				       "exceeds the size of raid (%ld)\n",
   2318 				       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2319 		}
   2320 	}
   2321 
   2322 }
   2323 /*
   2324  * Take care of things one might want to take care of in the event
   2325  * that a disklabel isn't present.
   2326  */
   2327 static void
   2328 raidmakedisklabel(struct raid_softc *rs)
   2329 {
   2330 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2331 	db1_printf(("Making a label..\n"));
   2332 
   2333 	/*
   2334 	 * For historical reasons, if there's no disklabel present
   2335 	 * the raw partition must be marked FS_BSDFFS.
   2336 	 */
   2337 
   2338 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2339 
   2340 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2341 
   2342 	lp->d_checksum = dkcksum(lp);
   2343 }
   2344 /*
   2345  * Wait interruptibly for an exclusive lock.
   2346  *
   2347  * XXX
   2348  * Several drivers do this; it should be abstracted and made MP-safe.
   2349  * (Hmm... where have we seen this warning before :->  GO )
   2350  */
   2351 static int
   2352 raidlock(struct raid_softc *rs)
   2353 {
   2354 	int     error;
   2355 
   2356 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2357 		rs->sc_flags |= RAIDF_WANTED;
   2358 		if ((error =
   2359 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2360 			return (error);
   2361 	}
   2362 	rs->sc_flags |= RAIDF_LOCKED;
   2363 	return (0);
   2364 }
   2365 /*
   2366  * Unlock and wake up any waiters.
   2367  */
   2368 static void
   2369 raidunlock(struct raid_softc *rs)
   2370 {
   2371 
   2372 	rs->sc_flags &= ~RAIDF_LOCKED;
   2373 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2374 		rs->sc_flags &= ~RAIDF_WANTED;
   2375 		wakeup(rs);
   2376 	}
   2377 }
   2378 
   2379 
   2380 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2381 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2382 
   2383 int
   2384 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2385 {
   2386 	RF_ComponentLabel_t clabel;
   2387 	raidread_component_label(dev, b_vp, &clabel);
   2388 	clabel.mod_counter = mod_counter;
   2389 	clabel.clean = RF_RAID_CLEAN;
   2390 	raidwrite_component_label(dev, b_vp, &clabel);
   2391 	return(0);
   2392 }
   2393 
   2394 
   2395 int
   2396 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2397 {
   2398 	RF_ComponentLabel_t clabel;
   2399 	raidread_component_label(dev, b_vp, &clabel);
   2400 	clabel.mod_counter = mod_counter;
   2401 	clabel.clean = RF_RAID_DIRTY;
   2402 	raidwrite_component_label(dev, b_vp, &clabel);
   2403 	return(0);
   2404 }
   2405 
   2406 /* ARGSUSED */
   2407 int
   2408 raidread_component_label(dev_t dev, struct vnode *b_vp,
   2409 			 RF_ComponentLabel_t *clabel)
   2410 {
   2411 	struct buf *bp;
   2412 	const struct bdevsw *bdev;
   2413 	int error;
   2414 
   2415 	/* XXX should probably ensure that we don't try to do this if
   2416 	   someone has changed rf_protected_sectors. */
   2417 
   2418 	if (b_vp == NULL) {
   2419 		/* For whatever reason, this component is not valid.
   2420 		   Don't try to read a component label from it. */
   2421 		return(EINVAL);
   2422 	}
   2423 
   2424 	/* get a block of the appropriate size... */
   2425 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2426 	bp->b_dev = dev;
   2427 
   2428 	/* get our ducks in a row for the read */
   2429 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2430 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2431 	bp->b_flags |= B_READ;
   2432  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2433 
   2434 	bdev = bdevsw_lookup(bp->b_dev);
   2435 	if (bdev == NULL)
   2436 		return (ENXIO);
   2437 	(*bdev->d_strategy)(bp);
   2438 
   2439 	error = biowait(bp);
   2440 
   2441 	if (!error) {
   2442 		memcpy(clabel, bp->b_data,
   2443 		       sizeof(RF_ComponentLabel_t));
   2444 	}
   2445 
   2446 	brelse(bp, 0);
   2447 	return(error);
   2448 }
   2449 /* ARGSUSED */
   2450 int
   2451 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
   2452 			  RF_ComponentLabel_t *clabel)
   2453 {
   2454 	struct buf *bp;
   2455 	const struct bdevsw *bdev;
   2456 	int error;
   2457 
   2458 	/* get a block of the appropriate size... */
   2459 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2460 	bp->b_dev = dev;
   2461 
   2462 	/* get our ducks in a row for the write */
   2463 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2464 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2465 	bp->b_flags |= B_WRITE;
   2466  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2467 
   2468 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2469 
   2470 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2471 
   2472 	bdev = bdevsw_lookup(bp->b_dev);
   2473 	if (bdev == NULL)
   2474 		return (ENXIO);
   2475 	(*bdev->d_strategy)(bp);
   2476 	error = biowait(bp);
   2477 	brelse(bp, 0);
   2478 	if (error) {
   2479 #if 1
   2480 		printf("Failed to write RAID component info!\n");
   2481 #endif
   2482 	}
   2483 
   2484 	return(error);
   2485 }
   2486 
   2487 void
   2488 rf_markalldirty(RF_Raid_t *raidPtr)
   2489 {
   2490 	RF_ComponentLabel_t clabel;
   2491 	int sparecol;
   2492 	int c;
   2493 	int j;
   2494 	int scol = -1;
   2495 
   2496 	raidPtr->mod_counter++;
   2497 	for (c = 0; c < raidPtr->numCol; c++) {
   2498 		/* we don't want to touch (at all) a disk that has
   2499 		   failed */
   2500 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   2501 			raidread_component_label(
   2502 						 raidPtr->Disks[c].dev,
   2503 						 raidPtr->raid_cinfo[c].ci_vp,
   2504 						 &clabel);
   2505 			if (clabel.status == rf_ds_spared) {
   2506 				/* XXX do something special...
   2507 				   but whatever you do, don't
   2508 				   try to access it!! */
   2509 			} else {
   2510 				raidmarkdirty(
   2511 					      raidPtr->Disks[c].dev,
   2512 					      raidPtr->raid_cinfo[c].ci_vp,
   2513 					      raidPtr->mod_counter);
   2514 			}
   2515 		}
   2516 	}
   2517 
   2518 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2519 		sparecol = raidPtr->numCol + c;
   2520 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2521 			/*
   2522 
   2523 			   we claim this disk is "optimal" if it's
   2524 			   rf_ds_used_spare, as that means it should be
   2525 			   directly substitutable for the disk it replaced.
   2526 			   We note that too...
   2527 
   2528 			 */
   2529 
   2530 			for(j=0;j<raidPtr->numCol;j++) {
   2531 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2532 					scol = j;
   2533 					break;
   2534 				}
   2535 			}
   2536 
   2537 			raidread_component_label(
   2538 				 raidPtr->Disks[sparecol].dev,
   2539 				 raidPtr->raid_cinfo[sparecol].ci_vp,
   2540 				 &clabel);
   2541 			/* make sure status is noted */
   2542 
   2543 			raid_init_component_label(raidPtr, &clabel);
   2544 
   2545 			clabel.row = 0;
   2546 			clabel.column = scol;
   2547 			/* Note: we *don't* change status from rf_ds_used_spare
   2548 			   to rf_ds_optimal */
   2549 			/* clabel.status = rf_ds_optimal; */
   2550 
   2551 			raidmarkdirty(raidPtr->Disks[sparecol].dev,
   2552 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2553 				      raidPtr->mod_counter);
   2554 		}
   2555 	}
   2556 }
   2557 
   2558 
   2559 void
   2560 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
   2561 {
   2562 	RF_ComponentLabel_t clabel;
   2563 	int sparecol;
   2564 	int c;
   2565 	int j;
   2566 	int scol;
   2567 
   2568 	scol = -1;
   2569 
   2570 	/* XXX should do extra checks to make sure things really are clean,
   2571 	   rather than blindly setting the clean bit... */
   2572 
   2573 	raidPtr->mod_counter++;
   2574 
   2575 	for (c = 0; c < raidPtr->numCol; c++) {
   2576 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   2577 			raidread_component_label(
   2578 						 raidPtr->Disks[c].dev,
   2579 						 raidPtr->raid_cinfo[c].ci_vp,
   2580 						 &clabel);
   2581 			/* make sure status is noted */
   2582 			clabel.status = rf_ds_optimal;
   2583 
   2584 			/* bump the counter */
   2585 			clabel.mod_counter = raidPtr->mod_counter;
   2586 
   2587 			/* note what unit we are configured as */
   2588 			clabel.last_unit = raidPtr->raidid;
   2589 
   2590 			raidwrite_component_label(
   2591 						  raidPtr->Disks[c].dev,
   2592 						  raidPtr->raid_cinfo[c].ci_vp,
   2593 						  &clabel);
   2594 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2595 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2596 					raidmarkclean(
   2597 						      raidPtr->Disks[c].dev,
   2598 						      raidPtr->raid_cinfo[c].ci_vp,
   2599 						      raidPtr->mod_counter);
   2600 				}
   2601 			}
   2602 		}
   2603 		/* else we don't touch it.. */
   2604 	}
   2605 
   2606 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2607 		sparecol = raidPtr->numCol + c;
   2608 		/* Need to ensure that the reconstruct actually completed! */
   2609 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2610 			/*
   2611 
   2612 			   we claim this disk is "optimal" if it's
   2613 			   rf_ds_used_spare, as that means it should be
   2614 			   directly substitutable for the disk it replaced.
   2615 			   We note that too...
   2616 
   2617 			 */
   2618 
   2619 			for(j=0;j<raidPtr->numCol;j++) {
   2620 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2621 					scol = j;
   2622 					break;
   2623 				}
   2624 			}
   2625 
   2626 			/* XXX shouldn't *really* need this... */
   2627 			raidread_component_label(
   2628 				      raidPtr->Disks[sparecol].dev,
   2629 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2630 				      &clabel);
   2631 			/* make sure status is noted */
   2632 
   2633 			raid_init_component_label(raidPtr, &clabel);
   2634 
   2635 			clabel.mod_counter = raidPtr->mod_counter;
   2636 			clabel.column = scol;
   2637 			clabel.status = rf_ds_optimal;
   2638 			clabel.last_unit = raidPtr->raidid;
   2639 
   2640 			raidwrite_component_label(
   2641 				      raidPtr->Disks[sparecol].dev,
   2642 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2643 				      &clabel);
   2644 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2645 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2646 					raidmarkclean( raidPtr->Disks[sparecol].dev,
   2647 						       raidPtr->raid_cinfo[sparecol].ci_vp,
   2648 						       raidPtr->mod_counter);
   2649 				}
   2650 			}
   2651 		}
   2652 	}
   2653 }
   2654 
   2655 void
   2656 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
   2657 {
   2658 
   2659 	if (vp != NULL) {
   2660 		if (auto_configured == 1) {
   2661 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2662 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2663 			vput(vp);
   2664 
   2665 		} else {
   2666 			(void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred, curlwp);
   2667 		}
   2668 	}
   2669 }
   2670 
   2671 
   2672 void
   2673 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
   2674 {
   2675 	int r,c;
   2676 	struct vnode *vp;
   2677 	int acd;
   2678 
   2679 
   2680 	/* We take this opportunity to close the vnodes like we should.. */
   2681 
   2682 	for (c = 0; c < raidPtr->numCol; c++) {
   2683 		vp = raidPtr->raid_cinfo[c].ci_vp;
   2684 		acd = raidPtr->Disks[c].auto_configured;
   2685 		rf_close_component(raidPtr, vp, acd);
   2686 		raidPtr->raid_cinfo[c].ci_vp = NULL;
   2687 		raidPtr->Disks[c].auto_configured = 0;
   2688 	}
   2689 
   2690 	for (r = 0; r < raidPtr->numSpare; r++) {
   2691 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
   2692 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
   2693 		rf_close_component(raidPtr, vp, acd);
   2694 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
   2695 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
   2696 	}
   2697 }
   2698 
   2699 
   2700 void
   2701 rf_ReconThread(struct rf_recon_req *req)
   2702 {
   2703 	int     s;
   2704 	RF_Raid_t *raidPtr;
   2705 
   2706 	s = splbio();
   2707 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2708 	raidPtr->recon_in_progress = 1;
   2709 
   2710 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
   2711 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2712 
   2713 	RF_Free(req, sizeof(*req));
   2714 
   2715 	raidPtr->recon_in_progress = 0;
   2716 	splx(s);
   2717 
   2718 	/* That's all... */
   2719 	kthread_exit(0);	/* does not return */
   2720 }
   2721 
   2722 void
   2723 rf_RewriteParityThread(RF_Raid_t *raidPtr)
   2724 {
   2725 	int retcode;
   2726 	int s;
   2727 
   2728 	raidPtr->parity_rewrite_stripes_done = 0;
   2729 	raidPtr->parity_rewrite_in_progress = 1;
   2730 	s = splbio();
   2731 	retcode = rf_RewriteParity(raidPtr);
   2732 	splx(s);
   2733 	if (retcode) {
   2734 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2735 	} else {
   2736 		/* set the clean bit!  If we shutdown correctly,
   2737 		   the clean bit on each component label will get
   2738 		   set */
   2739 		raidPtr->parity_good = RF_RAID_CLEAN;
   2740 	}
   2741 	raidPtr->parity_rewrite_in_progress = 0;
   2742 
   2743 	/* Anyone waiting for us to stop?  If so, inform them... */
   2744 	if (raidPtr->waitShutdown) {
   2745 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2746 	}
   2747 
   2748 	/* That's all... */
   2749 	kthread_exit(0);	/* does not return */
   2750 }
   2751 
   2752 
   2753 void
   2754 rf_CopybackThread(RF_Raid_t *raidPtr)
   2755 {
   2756 	int s;
   2757 
   2758 	raidPtr->copyback_in_progress = 1;
   2759 	s = splbio();
   2760 	rf_CopybackReconstructedData(raidPtr);
   2761 	splx(s);
   2762 	raidPtr->copyback_in_progress = 0;
   2763 
   2764 	/* That's all... */
   2765 	kthread_exit(0);	/* does not return */
   2766 }
   2767 
   2768 
   2769 void
   2770 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
   2771 {
   2772 	int s;
   2773 	RF_Raid_t *raidPtr;
   2774 
   2775 	s = splbio();
   2776 	raidPtr = req->raidPtr;
   2777 	raidPtr->recon_in_progress = 1;
   2778 	rf_ReconstructInPlace(raidPtr, req->col);
   2779 	RF_Free(req, sizeof(*req));
   2780 	raidPtr->recon_in_progress = 0;
   2781 	splx(s);
   2782 
   2783 	/* That's all... */
   2784 	kthread_exit(0);	/* does not return */
   2785 }
   2786 
   2787 static RF_AutoConfig_t *
   2788 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
   2789     const char *cname, RF_SectorCount_t size)
   2790 {
   2791 	int good_one = 0;
   2792 	RF_ComponentLabel_t *clabel;
   2793 	RF_AutoConfig_t *ac;
   2794 
   2795 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
   2796 	if (clabel == NULL) {
   2797 oomem:
   2798 		    while(ac_list) {
   2799 			    ac = ac_list;
   2800 			    if (ac->clabel)
   2801 				    free(ac->clabel, M_RAIDFRAME);
   2802 			    ac_list = ac_list->next;
   2803 			    free(ac, M_RAIDFRAME);
   2804 		    }
   2805 		    printf("RAID auto config: out of memory!\n");
   2806 		    return NULL; /* XXX probably should panic? */
   2807 	}
   2808 
   2809 	if (!raidread_component_label(dev, vp, clabel)) {
   2810 		    /* Got the label.  Does it look reasonable? */
   2811 		    if (rf_reasonable_label(clabel) &&
   2812 			(clabel->partitionSize <= size)) {
   2813 #ifdef DEBUG
   2814 			    printf("Component on: %s: %llu\n",
   2815 				cname, (unsigned long long)size);
   2816 			    rf_print_component_label(clabel);
   2817 #endif
   2818 			    /* if it's reasonable, add it, else ignore it. */
   2819 			    ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
   2820 				M_NOWAIT);
   2821 			    if (ac == NULL) {
   2822 				    free(clabel, M_RAIDFRAME);
   2823 				    goto oomem;
   2824 			    }
   2825 			    strlcpy(ac->devname, cname, sizeof(ac->devname));
   2826 			    ac->dev = dev;
   2827 			    ac->vp = vp;
   2828 			    ac->clabel = clabel;
   2829 			    ac->next = ac_list;
   2830 			    ac_list = ac;
   2831 			    good_one = 1;
   2832 		    }
   2833 	}
   2834 	if (!good_one) {
   2835 		/* cleanup */
   2836 		free(clabel, M_RAIDFRAME);
   2837 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2838 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2839 		vput(vp);
   2840 	}
   2841 	return ac_list;
   2842 }
   2843 
   2844 RF_AutoConfig_t *
   2845 rf_find_raid_components()
   2846 {
   2847 	struct vnode *vp;
   2848 	struct disklabel label;
   2849 	struct device *dv;
   2850 	dev_t dev;
   2851 	int bmajor, bminor, wedge;
   2852 	int error;
   2853 	int i;
   2854 	RF_AutoConfig_t *ac_list;
   2855 
   2856 
   2857 	/* initialize the AutoConfig list */
   2858 	ac_list = NULL;
   2859 
   2860 	/* we begin by trolling through *all* the devices on the system */
   2861 
   2862 	for (dv = alldevs.tqh_first; dv != NULL;
   2863 	     dv = dv->dv_list.tqe_next) {
   2864 
   2865 		/* we are only interested in disks... */
   2866 		if (device_class(dv) != DV_DISK)
   2867 			continue;
   2868 
   2869 		/* we don't care about floppies... */
   2870 		if (device_is_a(dv, "fd")) {
   2871 			continue;
   2872 		}
   2873 
   2874 		/* we don't care about CD's... */
   2875 		if (device_is_a(dv, "cd")) {
   2876 			continue;
   2877 		}
   2878 
   2879 		/* hdfd is the Atari/Hades floppy driver */
   2880 		if (device_is_a(dv, "hdfd")) {
   2881 			continue;
   2882 		}
   2883 
   2884 		/* fdisa is the Atari/Milan floppy driver */
   2885 		if (device_is_a(dv, "fdisa")) {
   2886 			continue;
   2887 		}
   2888 
   2889 		/* need to find the device_name_to_block_device_major stuff */
   2890 		bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
   2891 
   2892 		/* get a vnode for the raw partition of this disk */
   2893 
   2894 		wedge = device_is_a(dv, "dk");
   2895 		bminor = minor(device_unit(dv));
   2896 		dev = wedge ? makedev(bmajor, bminor) :
   2897 		    MAKEDISKDEV(bmajor, bminor, RAW_PART);
   2898 		if (bdevvp(dev, &vp))
   2899 			panic("RAID can't alloc vnode");
   2900 
   2901 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2902 
   2903 		if (error) {
   2904 			/* "Who cares."  Continue looking
   2905 			   for something that exists*/
   2906 			vput(vp);
   2907 			continue;
   2908 		}
   2909 
   2910 		if (wedge) {
   2911 			struct dkwedge_info dkw;
   2912 			error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
   2913 			    NOCRED, 0);
   2914 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2915 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2916 			vput(vp);
   2917 			if (error) {
   2918 				printf("RAIDframe: can't get wedge info for "
   2919 				    "dev %s (%d)\n", dv->dv_xname, error);
   2920 				continue;
   2921 			}
   2922 
   2923 			if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0)
   2924 				continue;
   2925 
   2926 			ac_list = rf_get_component(ac_list, dev, vp,
   2927 			    dv->dv_xname, dkw.dkw_size);
   2928 			continue;
   2929 		}
   2930 
   2931 		/* Ok, the disk exists.  Go get the disklabel. */
   2932 		error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
   2933 		if (error) {
   2934 			/*
   2935 			 * XXX can't happen - open() would
   2936 			 * have errored out (or faked up one)
   2937 			 */
   2938 			if (error != ENOTTY)
   2939 				printf("RAIDframe: can't get label for dev "
   2940 				    "%s (%d)\n", dv->dv_xname, error);
   2941 		}
   2942 
   2943 		/* don't need this any more.  We'll allocate it again
   2944 		   a little later if we really do... */
   2945 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2946 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2947 		vput(vp);
   2948 
   2949 		if (error)
   2950 			continue;
   2951 
   2952 		for (i = 0; i < label.d_npartitions; i++) {
   2953 			char cname[sizeof(ac_list->devname)];
   2954 
   2955 			/* We only support partitions marked as RAID */
   2956 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2957 				continue;
   2958 
   2959 			dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
   2960 			if (bdevvp(dev, &vp))
   2961 				panic("RAID can't alloc vnode");
   2962 
   2963 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2964 			if (error) {
   2965 				/* Whatever... */
   2966 				vput(vp);
   2967 				continue;
   2968 			}
   2969 			snprintf(cname, sizeof(cname), "%s%c",
   2970 			    dv->dv_xname, 'a' + i);
   2971 			ac_list = rf_get_component(ac_list, dev, vp, cname,
   2972 				label.d_partitions[i].p_size);
   2973 		}
   2974 	}
   2975 	return ac_list;
   2976 }
   2977 
   2978 
   2979 static int
   2980 rf_reasonable_label(RF_ComponentLabel_t *clabel)
   2981 {
   2982 
   2983 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2984 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2985 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2986 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2987 	    clabel->row >=0 &&
   2988 	    clabel->column >= 0 &&
   2989 	    clabel->num_rows > 0 &&
   2990 	    clabel->num_columns > 0 &&
   2991 	    clabel->row < clabel->num_rows &&
   2992 	    clabel->column < clabel->num_columns &&
   2993 	    clabel->blockSize > 0 &&
   2994 	    clabel->numBlocks > 0) {
   2995 		/* label looks reasonable enough... */
   2996 		return(1);
   2997 	}
   2998 	return(0);
   2999 }
   3000 
   3001 
   3002 #ifdef DEBUG
   3003 void
   3004 rf_print_component_label(RF_ComponentLabel_t *clabel)
   3005 {
   3006 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   3007 	       clabel->row, clabel->column,
   3008 	       clabel->num_rows, clabel->num_columns);
   3009 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   3010 	       clabel->version, clabel->serial_number,
   3011 	       clabel->mod_counter);
   3012 	printf("   Clean: %s Status: %d\n",
   3013 	       clabel->clean ? "Yes" : "No", clabel->status );
   3014 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   3015 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   3016 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   3017 	       (char) clabel->parityConfig, clabel->blockSize,
   3018 	       clabel->numBlocks);
   3019 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   3020 	printf("   Contains root partition: %s\n",
   3021 	       clabel->root_partition ? "Yes" : "No" );
   3022 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   3023 #if 0
   3024 	   printf("   Config order: %d\n", clabel->config_order);
   3025 #endif
   3026 
   3027 }
   3028 #endif
   3029 
   3030 RF_ConfigSet_t *
   3031 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
   3032 {
   3033 	RF_AutoConfig_t *ac;
   3034 	RF_ConfigSet_t *config_sets;
   3035 	RF_ConfigSet_t *cset;
   3036 	RF_AutoConfig_t *ac_next;
   3037 
   3038 
   3039 	config_sets = NULL;
   3040 
   3041 	/* Go through the AutoConfig list, and figure out which components
   3042 	   belong to what sets.  */
   3043 	ac = ac_list;
   3044 	while(ac!=NULL) {
   3045 		/* we're going to putz with ac->next, so save it here
   3046 		   for use at the end of the loop */
   3047 		ac_next = ac->next;
   3048 
   3049 		if (config_sets == NULL) {
   3050 			/* will need at least this one... */
   3051 			config_sets = (RF_ConfigSet_t *)
   3052 				malloc(sizeof(RF_ConfigSet_t),
   3053 				       M_RAIDFRAME, M_NOWAIT);
   3054 			if (config_sets == NULL) {
   3055 				panic("rf_create_auto_sets: No memory!");
   3056 			}
   3057 			/* this one is easy :) */
   3058 			config_sets->ac = ac;
   3059 			config_sets->next = NULL;
   3060 			config_sets->rootable = 0;
   3061 			ac->next = NULL;
   3062 		} else {
   3063 			/* which set does this component fit into? */
   3064 			cset = config_sets;
   3065 			while(cset!=NULL) {
   3066 				if (rf_does_it_fit(cset, ac)) {
   3067 					/* looks like it matches... */
   3068 					ac->next = cset->ac;
   3069 					cset->ac = ac;
   3070 					break;
   3071 				}
   3072 				cset = cset->next;
   3073 			}
   3074 			if (cset==NULL) {
   3075 				/* didn't find a match above... new set..*/
   3076 				cset = (RF_ConfigSet_t *)
   3077 					malloc(sizeof(RF_ConfigSet_t),
   3078 					       M_RAIDFRAME, M_NOWAIT);
   3079 				if (cset == NULL) {
   3080 					panic("rf_create_auto_sets: No memory!");
   3081 				}
   3082 				cset->ac = ac;
   3083 				ac->next = NULL;
   3084 				cset->next = config_sets;
   3085 				cset->rootable = 0;
   3086 				config_sets = cset;
   3087 			}
   3088 		}
   3089 		ac = ac_next;
   3090 	}
   3091 
   3092 
   3093 	return(config_sets);
   3094 }
   3095 
   3096 static int
   3097 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
   3098 {
   3099 	RF_ComponentLabel_t *clabel1, *clabel2;
   3100 
   3101 	/* If this one matches the *first* one in the set, that's good
   3102 	   enough, since the other members of the set would have been
   3103 	   through here too... */
   3104 	/* note that we are not checking partitionSize here..
   3105 
   3106 	   Note that we are also not checking the mod_counters here.
   3107 	   If everything else matches execpt the mod_counter, that's
   3108 	   good enough for this test.  We will deal with the mod_counters
   3109 	   a little later in the autoconfiguration process.
   3110 
   3111 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   3112 
   3113 	   The reason we don't check for this is that failed disks
   3114 	   will have lower modification counts.  If those disks are
   3115 	   not added to the set they used to belong to, then they will
   3116 	   form their own set, which may result in 2 different sets,
   3117 	   for example, competing to be configured at raid0, and
   3118 	   perhaps competing to be the root filesystem set.  If the
   3119 	   wrong ones get configured, or both attempt to become /,
   3120 	   weird behaviour and or serious lossage will occur.  Thus we
   3121 	   need to bring them into the fold here, and kick them out at
   3122 	   a later point.
   3123 
   3124 	*/
   3125 
   3126 	clabel1 = cset->ac->clabel;
   3127 	clabel2 = ac->clabel;
   3128 	if ((clabel1->version == clabel2->version) &&
   3129 	    (clabel1->serial_number == clabel2->serial_number) &&
   3130 	    (clabel1->num_rows == clabel2->num_rows) &&
   3131 	    (clabel1->num_columns == clabel2->num_columns) &&
   3132 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   3133 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   3134 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   3135 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   3136 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   3137 	    (clabel1->blockSize == clabel2->blockSize) &&
   3138 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   3139 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   3140 	    (clabel1->root_partition == clabel2->root_partition) &&
   3141 	    (clabel1->last_unit == clabel2->last_unit) &&
   3142 	    (clabel1->config_order == clabel2->config_order)) {
   3143 		/* if it get's here, it almost *has* to be a match */
   3144 	} else {
   3145 		/* it's not consistent with somebody in the set..
   3146 		   punt */
   3147 		return(0);
   3148 	}
   3149 	/* all was fine.. it must fit... */
   3150 	return(1);
   3151 }
   3152 
   3153 int
   3154 rf_have_enough_components(RF_ConfigSet_t *cset)
   3155 {
   3156 	RF_AutoConfig_t *ac;
   3157 	RF_AutoConfig_t *auto_config;
   3158 	RF_ComponentLabel_t *clabel;
   3159 	int c;
   3160 	int num_cols;
   3161 	int num_missing;
   3162 	int mod_counter;
   3163 	int mod_counter_found;
   3164 	int even_pair_failed;
   3165 	char parity_type;
   3166 
   3167 
   3168 	/* check to see that we have enough 'live' components
   3169 	   of this set.  If so, we can configure it if necessary */
   3170 
   3171 	num_cols = cset->ac->clabel->num_columns;
   3172 	parity_type = cset->ac->clabel->parityConfig;
   3173 
   3174 	/* XXX Check for duplicate components!?!?!? */
   3175 
   3176 	/* Determine what the mod_counter is supposed to be for this set. */
   3177 
   3178 	mod_counter_found = 0;
   3179 	mod_counter = 0;
   3180 	ac = cset->ac;
   3181 	while(ac!=NULL) {
   3182 		if (mod_counter_found==0) {
   3183 			mod_counter = ac->clabel->mod_counter;
   3184 			mod_counter_found = 1;
   3185 		} else {
   3186 			if (ac->clabel->mod_counter > mod_counter) {
   3187 				mod_counter = ac->clabel->mod_counter;
   3188 			}
   3189 		}
   3190 		ac = ac->next;
   3191 	}
   3192 
   3193 	num_missing = 0;
   3194 	auto_config = cset->ac;
   3195 
   3196 	even_pair_failed = 0;
   3197 	for(c=0; c<num_cols; c++) {
   3198 		ac = auto_config;
   3199 		while(ac!=NULL) {
   3200 			if ((ac->clabel->column == c) &&
   3201 			    (ac->clabel->mod_counter == mod_counter)) {
   3202 				/* it's this one... */
   3203 #ifdef DEBUG
   3204 				printf("Found: %s at %d\n",
   3205 				       ac->devname,c);
   3206 #endif
   3207 				break;
   3208 			}
   3209 			ac=ac->next;
   3210 		}
   3211 		if (ac==NULL) {
   3212 				/* Didn't find one here! */
   3213 				/* special case for RAID 1, especially
   3214 				   where there are more than 2
   3215 				   components (where RAIDframe treats
   3216 				   things a little differently :( ) */
   3217 			if (parity_type == '1') {
   3218 				if (c%2 == 0) { /* even component */
   3219 					even_pair_failed = 1;
   3220 				} else { /* odd component.  If
   3221 					    we're failed, and
   3222 					    so is the even
   3223 					    component, it's
   3224 					    "Good Night, Charlie" */
   3225 					if (even_pair_failed == 1) {
   3226 						return(0);
   3227 					}
   3228 				}
   3229 			} else {
   3230 				/* normal accounting */
   3231 				num_missing++;
   3232 			}
   3233 		}
   3234 		if ((parity_type == '1') && (c%2 == 1)) {
   3235 				/* Just did an even component, and we didn't
   3236 				   bail.. reset the even_pair_failed flag,
   3237 				   and go on to the next component.... */
   3238 			even_pair_failed = 0;
   3239 		}
   3240 	}
   3241 
   3242 	clabel = cset->ac->clabel;
   3243 
   3244 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3245 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3246 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3247 		/* XXX this needs to be made *much* more general */
   3248 		/* Too many failures */
   3249 		return(0);
   3250 	}
   3251 	/* otherwise, all is well, and we've got enough to take a kick
   3252 	   at autoconfiguring this set */
   3253 	return(1);
   3254 }
   3255 
   3256 void
   3257 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
   3258 			RF_Raid_t *raidPtr)
   3259 {
   3260 	RF_ComponentLabel_t *clabel;
   3261 	int i;
   3262 
   3263 	clabel = ac->clabel;
   3264 
   3265 	/* 1. Fill in the common stuff */
   3266 	config->numRow = clabel->num_rows = 1;
   3267 	config->numCol = clabel->num_columns;
   3268 	config->numSpare = 0; /* XXX should this be set here? */
   3269 	config->sectPerSU = clabel->sectPerSU;
   3270 	config->SUsPerPU = clabel->SUsPerPU;
   3271 	config->SUsPerRU = clabel->SUsPerRU;
   3272 	config->parityConfig = clabel->parityConfig;
   3273 	/* XXX... */
   3274 	strcpy(config->diskQueueType,"fifo");
   3275 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3276 	config->layoutSpecificSize = 0; /* XXX ?? */
   3277 
   3278 	while(ac!=NULL) {
   3279 		/* row/col values will be in range due to the checks
   3280 		   in reasonable_label() */
   3281 		strcpy(config->devnames[0][ac->clabel->column],
   3282 		       ac->devname);
   3283 		ac = ac->next;
   3284 	}
   3285 
   3286 	for(i=0;i<RF_MAXDBGV;i++) {
   3287 		config->debugVars[i][0] = 0;
   3288 	}
   3289 }
   3290 
   3291 int
   3292 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
   3293 {
   3294 	RF_ComponentLabel_t clabel;
   3295 	struct vnode *vp;
   3296 	dev_t dev;
   3297 	int column;
   3298 	int sparecol;
   3299 
   3300 	raidPtr->autoconfigure = new_value;
   3301 
   3302 	for(column=0; column<raidPtr->numCol; column++) {
   3303 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3304 			dev = raidPtr->Disks[column].dev;
   3305 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3306 			raidread_component_label(dev, vp, &clabel);
   3307 			clabel.autoconfigure = new_value;
   3308 			raidwrite_component_label(dev, vp, &clabel);
   3309 		}
   3310 	}
   3311 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3312 		sparecol = raidPtr->numCol + column;
   3313 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3314 			dev = raidPtr->Disks[sparecol].dev;
   3315 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3316 			raidread_component_label(dev, vp, &clabel);
   3317 			clabel.autoconfigure = new_value;
   3318 			raidwrite_component_label(dev, vp, &clabel);
   3319 		}
   3320 	}
   3321 	return(new_value);
   3322 }
   3323 
   3324 int
   3325 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
   3326 {
   3327 	RF_ComponentLabel_t clabel;
   3328 	struct vnode *vp;
   3329 	dev_t dev;
   3330 	int column;
   3331 	int sparecol;
   3332 
   3333 	raidPtr->root_partition = new_value;
   3334 	for(column=0; column<raidPtr->numCol; column++) {
   3335 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3336 			dev = raidPtr->Disks[column].dev;
   3337 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3338 			raidread_component_label(dev, vp, &clabel);
   3339 			clabel.root_partition = new_value;
   3340 			raidwrite_component_label(dev, vp, &clabel);
   3341 		}
   3342 	}
   3343 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3344 		sparecol = raidPtr->numCol + column;
   3345 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3346 			dev = raidPtr->Disks[sparecol].dev;
   3347 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3348 			raidread_component_label(dev, vp, &clabel);
   3349 			clabel.root_partition = new_value;
   3350 			raidwrite_component_label(dev, vp, &clabel);
   3351 		}
   3352 	}
   3353 	return(new_value);
   3354 }
   3355 
   3356 void
   3357 rf_release_all_vps(RF_ConfigSet_t *cset)
   3358 {
   3359 	RF_AutoConfig_t *ac;
   3360 
   3361 	ac = cset->ac;
   3362 	while(ac!=NULL) {
   3363 		/* Close the vp, and give it back */
   3364 		if (ac->vp) {
   3365 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3366 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3367 			vput(ac->vp);
   3368 			ac->vp = NULL;
   3369 		}
   3370 		ac = ac->next;
   3371 	}
   3372 }
   3373 
   3374 
   3375 void
   3376 rf_cleanup_config_set(RF_ConfigSet_t *cset)
   3377 {
   3378 	RF_AutoConfig_t *ac;
   3379 	RF_AutoConfig_t *next_ac;
   3380 
   3381 	ac = cset->ac;
   3382 	while(ac!=NULL) {
   3383 		next_ac = ac->next;
   3384 		/* nuke the label */
   3385 		free(ac->clabel, M_RAIDFRAME);
   3386 		/* cleanup the config structure */
   3387 		free(ac, M_RAIDFRAME);
   3388 		/* "next.." */
   3389 		ac = next_ac;
   3390 	}
   3391 	/* and, finally, nuke the config set */
   3392 	free(cset, M_RAIDFRAME);
   3393 }
   3394 
   3395 
   3396 void
   3397 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   3398 {
   3399 	/* current version number */
   3400 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3401 	clabel->serial_number = raidPtr->serial_number;
   3402 	clabel->mod_counter = raidPtr->mod_counter;
   3403 	clabel->num_rows = 1;
   3404 	clabel->num_columns = raidPtr->numCol;
   3405 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3406 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3407 
   3408 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3409 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3410 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3411 
   3412 	clabel->blockSize = raidPtr->bytesPerSector;
   3413 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3414 
   3415 	/* XXX not portable */
   3416 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3417 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3418 	clabel->autoconfigure = raidPtr->autoconfigure;
   3419 	clabel->root_partition = raidPtr->root_partition;
   3420 	clabel->last_unit = raidPtr->raidid;
   3421 	clabel->config_order = raidPtr->config_order;
   3422 }
   3423 
   3424 int
   3425 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
   3426 {
   3427 	RF_Raid_t *raidPtr;
   3428 	RF_Config_t *config;
   3429 	int raidID;
   3430 	int retcode;
   3431 
   3432 #ifdef DEBUG
   3433 	printf("RAID autoconfigure\n");
   3434 #endif
   3435 
   3436 	retcode = 0;
   3437 	*unit = -1;
   3438 
   3439 	/* 1. Create a config structure */
   3440 
   3441 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3442 				       M_RAIDFRAME,
   3443 				       M_NOWAIT);
   3444 	if (config==NULL) {
   3445 		printf("Out of mem!?!?\n");
   3446 				/* XXX do something more intelligent here. */
   3447 		return(1);
   3448 	}
   3449 
   3450 	memset(config, 0, sizeof(RF_Config_t));
   3451 
   3452 	/*
   3453 	   2. Figure out what RAID ID this one is supposed to live at
   3454 	   See if we can get the same RAID dev that it was configured
   3455 	   on last time..
   3456 	*/
   3457 
   3458 	raidID = cset->ac->clabel->last_unit;
   3459 	if ((raidID < 0) || (raidID >= numraid)) {
   3460 		/* let's not wander off into lala land. */
   3461 		raidID = numraid - 1;
   3462 	}
   3463 	if (raidPtrs[raidID]->valid != 0) {
   3464 
   3465 		/*
   3466 		   Nope... Go looking for an alternative...
   3467 		   Start high so we don't immediately use raid0 if that's
   3468 		   not taken.
   3469 		*/
   3470 
   3471 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3472 			if (raidPtrs[raidID]->valid == 0) {
   3473 				/* can use this one! */
   3474 				break;
   3475 			}
   3476 		}
   3477 	}
   3478 
   3479 	if (raidID < 0) {
   3480 		/* punt... */
   3481 		printf("Unable to auto configure this set!\n");
   3482 		printf("(Out of RAID devs!)\n");
   3483 		free(config, M_RAIDFRAME);
   3484 		return(1);
   3485 	}
   3486 
   3487 #ifdef DEBUG
   3488 	printf("Configuring raid%d:\n",raidID);
   3489 #endif
   3490 
   3491 	raidPtr = raidPtrs[raidID];
   3492 
   3493 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3494 	raidPtr->raidid = raidID;
   3495 	raidPtr->openings = RAIDOUTSTANDING;
   3496 
   3497 	/* 3. Build the configuration structure */
   3498 	rf_create_configuration(cset->ac, config, raidPtr);
   3499 
   3500 	/* 4. Do the configuration */
   3501 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3502 
   3503 	if (retcode == 0) {
   3504 
   3505 		raidinit(raidPtrs[raidID]);
   3506 
   3507 		rf_markalldirty(raidPtrs[raidID]);
   3508 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3509 		if (cset->ac->clabel->root_partition==1) {
   3510 			/* everything configured just fine.  Make a note
   3511 			   that this set is eligible to be root. */
   3512 			cset->rootable = 1;
   3513 			/* XXX do this here? */
   3514 			raidPtrs[raidID]->root_partition = 1;
   3515 		}
   3516 	}
   3517 
   3518 	/* 5. Cleanup */
   3519 	free(config, M_RAIDFRAME);
   3520 
   3521 	*unit = raidID;
   3522 	return(retcode);
   3523 }
   3524 
   3525 void
   3526 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
   3527 {
   3528 	struct buf *bp;
   3529 
   3530 	bp = (struct buf *)desc->bp;
   3531 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3532 	    (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
   3533 }
   3534 
   3535 void
   3536 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
   3537 	     size_t xmin, size_t xmax)
   3538 {
   3539 	pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
   3540 	pool_sethiwat(p, xmax);
   3541 	pool_prime(p, xmin);
   3542 	pool_setlowat(p, xmin);
   3543 }
   3544 
   3545 /*
   3546  * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
   3547  * if there is IO pending and if that IO could possibly be done for a
   3548  * given RAID set.  Returns 0 if IO is waiting and can be done, 1
   3549  * otherwise.
   3550  *
   3551  */
   3552 
   3553 int
   3554 rf_buf_queue_check(int raidid)
   3555 {
   3556 	if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
   3557 	    raidPtrs[raidid]->openings > 0) {
   3558 		/* there is work to do */
   3559 		return 0;
   3560 	}
   3561 	/* default is nothing to do */
   3562 	return 1;
   3563 }
   3564 
   3565 int
   3566 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
   3567 {
   3568 	struct partinfo dpart;
   3569 	struct dkwedge_info dkw;
   3570 	int error;
   3571 
   3572 	error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred, l);
   3573 	if (error == 0) {
   3574 		diskPtr->blockSize = dpart.disklab->d_secsize;
   3575 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
   3576 		diskPtr->partitionSize = dpart.part->p_size;
   3577 		return 0;
   3578 	}
   3579 
   3580 	error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred, l);
   3581 	if (error == 0) {
   3582 		diskPtr->blockSize = 512;	/* XXX */
   3583 		diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
   3584 		diskPtr->partitionSize = dkw.dkw_size;
   3585 		return 0;
   3586 	}
   3587 	return error;
   3588 }
   3589 
   3590 static int
   3591 raid_match(struct device *self, struct cfdata *cfdata,
   3592     void *aux)
   3593 {
   3594 	return 1;
   3595 }
   3596 
   3597 static void
   3598 raid_attach(struct device *parent, struct device *self,
   3599     void *aux)
   3600 {
   3601 
   3602 }
   3603 
   3604 
   3605 static int
   3606 raid_detach(struct device *self, int flags)
   3607 {
   3608 	struct raid_softc *rs = (struct raid_softc *)self;
   3609 
   3610 	if (rs->sc_flags & RAIDF_INITED)
   3611 		return EBUSY;
   3612 
   3613 	return 0;
   3614 }
   3615 
   3616 static void
   3617 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
   3618 {
   3619 	prop_dictionary_t disk_info, odisk_info, geom;
   3620 	disk_info = prop_dictionary_create();
   3621 	geom = prop_dictionary_create();
   3622 	prop_dictionary_set_uint64(geom, "sectors-per-unit",
   3623 				   raidPtr->totalSectors);
   3624 	prop_dictionary_set_uint32(geom, "sector-size",
   3625 				   raidPtr->bytesPerSector);
   3626 
   3627 	prop_dictionary_set_uint16(geom, "sectors-per-track",
   3628 				   raidPtr->Layout.dataSectorsPerStripe);
   3629 	prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
   3630 				   4 * raidPtr->numCol);
   3631 
   3632 	prop_dictionary_set_uint64(geom, "cylinders-per-unit",
   3633 	   raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
   3634 	   (4 * raidPtr->numCol)));
   3635 
   3636 	prop_dictionary_set(disk_info, "geometry", geom);
   3637 	prop_object_release(geom);
   3638 	prop_dictionary_set(device_properties(rs->sc_dev),
   3639 			    "disk-info", disk_info);
   3640 	odisk_info = rs->sc_dkdev.dk_info;
   3641 	rs->sc_dkdev.dk_info = disk_info;
   3642 	if (odisk_info)
   3643 		prop_object_release(odisk_info);
   3644 }
   3645