Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.240
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.240 2007/12/11 01:54:46 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1990, 1993
     40  *      The Regents of the University of California.  All rights reserved.
     41  *
     42  * This code is derived from software contributed to Berkeley by
     43  * the Systems Programming Group of the University of Utah Computer
     44  * Science Department.
     45  *
     46  * Redistribution and use in source and binary forms, with or without
     47  * modification, are permitted provided that the following conditions
     48  * are met:
     49  * 1. Redistributions of source code must retain the above copyright
     50  *    notice, this list of conditions and the following disclaimer.
     51  * 2. Redistributions in binary form must reproduce the above copyright
     52  *    notice, this list of conditions and the following disclaimer in the
     53  *    documentation and/or other materials provided with the distribution.
     54  * 3. Neither the name of the University nor the names of its contributors
     55  *    may be used to endorse or promote products derived from this software
     56  *    without specific prior written permission.
     57  *
     58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     68  * SUCH DAMAGE.
     69  *
     70  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     71  *
     72  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     73  */
     74 
     75 /*
     76  * Copyright (c) 1988 University of Utah.
     77  *
     78  * This code is derived from software contributed to Berkeley by
     79  * the Systems Programming Group of the University of Utah Computer
     80  * Science Department.
     81  *
     82  * Redistribution and use in source and binary forms, with or without
     83  * modification, are permitted provided that the following conditions
     84  * are met:
     85  * 1. Redistributions of source code must retain the above copyright
     86  *    notice, this list of conditions and the following disclaimer.
     87  * 2. Redistributions in binary form must reproduce the above copyright
     88  *    notice, this list of conditions and the following disclaimer in the
     89  *    documentation and/or other materials provided with the distribution.
     90  * 3. All advertising materials mentioning features or use of this software
     91  *    must display the following acknowledgement:
     92  *      This product includes software developed by the University of
     93  *      California, Berkeley and its contributors.
     94  * 4. Neither the name of the University nor the names of its contributors
     95  *    may be used to endorse or promote products derived from this software
     96  *    without specific prior written permission.
     97  *
     98  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     99  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    100  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    101  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
    102  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    103  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    104  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    105  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    106  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    107  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    108  * SUCH DAMAGE.
    109  *
    110  * from: Utah $Hdr: cd.c 1.6 90/11/28$
    111  *
    112  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
    113  */
    114 
    115 /*
    116  * Copyright (c) 1995 Carnegie-Mellon University.
    117  * All rights reserved.
    118  *
    119  * Authors: Mark Holland, Jim Zelenka
    120  *
    121  * Permission to use, copy, modify and distribute this software and
    122  * its documentation is hereby granted, provided that both the copyright
    123  * notice and this permission notice appear in all copies of the
    124  * software, derivative works or modified versions, and any portions
    125  * thereof, and that both notices appear in supporting documentation.
    126  *
    127  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
    128  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
    129  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
    130  *
    131  * Carnegie Mellon requests users of this software to return to
    132  *
    133  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    134  *  School of Computer Science
    135  *  Carnegie Mellon University
    136  *  Pittsburgh PA 15213-3890
    137  *
    138  * any improvements or extensions that they make and grant Carnegie the
    139  * rights to redistribute these changes.
    140  */
    141 
    142 /***********************************************************
    143  *
    144  * rf_kintf.c -- the kernel interface routines for RAIDframe
    145  *
    146  ***********************************************************/
    147 
    148 #include <sys/cdefs.h>
    149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.240 2007/12/11 01:54:46 oster Exp $");
    150 
    151 #include <sys/param.h>
    152 #include <sys/errno.h>
    153 #include <sys/pool.h>
    154 #include <sys/proc.h>
    155 #include <sys/queue.h>
    156 #include <sys/disk.h>
    157 #include <sys/device.h>
    158 #include <sys/stat.h>
    159 #include <sys/ioctl.h>
    160 #include <sys/fcntl.h>
    161 #include <sys/systm.h>
    162 #include <sys/vnode.h>
    163 #include <sys/disklabel.h>
    164 #include <sys/conf.h>
    165 #include <sys/lock.h>
    166 #include <sys/buf.h>
    167 #include <sys/bufq.h>
    168 #include <sys/user.h>
    169 #include <sys/reboot.h>
    170 #include <sys/kauth.h>
    171 
    172 #include <prop/proplib.h>
    173 
    174 #include <dev/raidframe/raidframevar.h>
    175 #include <dev/raidframe/raidframeio.h>
    176 #include "raid.h"
    177 #include "opt_raid_autoconfig.h"
    178 #include "rf_raid.h"
    179 #include "rf_copyback.h"
    180 #include "rf_dag.h"
    181 #include "rf_dagflags.h"
    182 #include "rf_desc.h"
    183 #include "rf_diskqueue.h"
    184 #include "rf_etimer.h"
    185 #include "rf_general.h"
    186 #include "rf_kintf.h"
    187 #include "rf_options.h"
    188 #include "rf_driver.h"
    189 #include "rf_parityscan.h"
    190 #include "rf_threadstuff.h"
    191 
    192 #ifdef DEBUG
    193 int     rf_kdebug_level = 0;
    194 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    195 #else				/* DEBUG */
    196 #define db1_printf(a) { }
    197 #endif				/* DEBUG */
    198 
    199 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    200 
    201 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    202 
    203 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    204 						 * spare table */
    205 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    206 						 * installation process */
    207 
    208 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
    209 
    210 /* prototypes */
    211 static void KernelWakeupFunc(struct buf *);
    212 static void InitBP(struct buf *, struct vnode *, unsigned,
    213     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
    214     void *, int, struct proc *);
    215 static void raidinit(RF_Raid_t *);
    216 
    217 void raidattach(int);
    218 static int raid_match(struct device *, struct cfdata *, void *);
    219 static void raid_attach(struct device *, struct device *, void *);
    220 static int raid_detach(struct device *, int);
    221 
    222 dev_type_open(raidopen);
    223 dev_type_close(raidclose);
    224 dev_type_read(raidread);
    225 dev_type_write(raidwrite);
    226 dev_type_ioctl(raidioctl);
    227 dev_type_strategy(raidstrategy);
    228 dev_type_dump(raiddump);
    229 dev_type_size(raidsize);
    230 
    231 const struct bdevsw raid_bdevsw = {
    232 	raidopen, raidclose, raidstrategy, raidioctl,
    233 	raiddump, raidsize, D_DISK
    234 };
    235 
    236 const struct cdevsw raid_cdevsw = {
    237 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    238 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
    239 };
    240 
    241 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
    242 
    243 /* XXX Not sure if the following should be replacing the raidPtrs above,
    244    or if it should be used in conjunction with that...
    245 */
    246 
    247 struct raid_softc {
    248 	struct device *sc_dev;
    249 	int     sc_flags;	/* flags */
    250 	int     sc_cflags;	/* configuration flags */
    251 	uint64_t sc_size;	/* size of the raid device */
    252 	char    sc_xname[20];	/* XXX external name */
    253 	struct disk sc_dkdev;	/* generic disk device info */
    254 	struct bufq_state *buf_queue;	/* used for the device queue */
    255 };
    256 /* sc_flags */
    257 #define RAIDF_INITED	0x01	/* unit has been initialized */
    258 #define RAIDF_WLABEL	0x02	/* label area is writable */
    259 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    260 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    261 #define RAIDF_LOCKED	0x80	/* unit is locked */
    262 
    263 #define	raidunit(x)	DISKUNIT(x)
    264 int numraid = 0;
    265 
    266 extern struct cfdriver raid_cd;
    267 CFATTACH_DECL_NEW(raid, sizeof(struct raid_softc),
    268     raid_match, raid_attach, raid_detach, NULL);
    269 
    270 /*
    271  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    272  * Be aware that large numbers can allow the driver to consume a lot of
    273  * kernel memory, especially on writes, and in degraded mode reads.
    274  *
    275  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    276  * a single 64K write will typically require 64K for the old data,
    277  * 64K for the old parity, and 64K for the new parity, for a total
    278  * of 192K (if the parity buffer is not re-used immediately).
    279  * Even it if is used immediately, that's still 128K, which when multiplied
    280  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    281  *
    282  * Now in degraded mode, for example, a 64K read on the above setup may
    283  * require data reconstruction, which will require *all* of the 4 remaining
    284  * disks to participate -- 4 * 32K/disk == 128K again.
    285  */
    286 
    287 #ifndef RAIDOUTSTANDING
    288 #define RAIDOUTSTANDING   6
    289 #endif
    290 
    291 #define RAIDLABELDEV(dev)	\
    292 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    293 
    294 /* declared here, and made public, for the benefit of KVM stuff.. */
    295 struct raid_softc *raid_softc;
    296 
    297 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    298 				     struct disklabel *);
    299 static void raidgetdisklabel(dev_t);
    300 static void raidmakedisklabel(struct raid_softc *);
    301 
    302 static int raidlock(struct raid_softc *);
    303 static void raidunlock(struct raid_softc *);
    304 
    305 static void rf_markalldirty(RF_Raid_t *);
    306 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
    307 
    308 void rf_ReconThread(struct rf_recon_req *);
    309 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    310 void rf_CopybackThread(RF_Raid_t *raidPtr);
    311 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    312 int rf_autoconfig(struct device *self);
    313 void rf_buildroothack(RF_ConfigSet_t *);
    314 
    315 RF_AutoConfig_t *rf_find_raid_components(void);
    316 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    317 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    318 static int rf_reasonable_label(RF_ComponentLabel_t *);
    319 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    320 int rf_set_autoconfig(RF_Raid_t *, int);
    321 int rf_set_rootpartition(RF_Raid_t *, int);
    322 void rf_release_all_vps(RF_ConfigSet_t *);
    323 void rf_cleanup_config_set(RF_ConfigSet_t *);
    324 int rf_have_enough_components(RF_ConfigSet_t *);
    325 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    326 
    327 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    328 				  allow autoconfig to take place.
    329 				  Note that this is overridden by having
    330 				  RAID_AUTOCONFIG as an option in the
    331 				  kernel config file.  */
    332 
    333 struct RF_Pools_s rf_pools;
    334 
    335 void
    336 raidattach(int num)
    337 {
    338 	int raidID;
    339 	int i, rc;
    340 
    341 #ifdef DEBUG
    342 	printf("raidattach: Asked for %d units\n", num);
    343 #endif
    344 
    345 	if (num <= 0) {
    346 #ifdef DIAGNOSTIC
    347 		panic("raidattach: count <= 0");
    348 #endif
    349 		return;
    350 	}
    351 	/* This is where all the initialization stuff gets done. */
    352 
    353 	numraid = num;
    354 
    355 	/* Make some space for requested number of units... */
    356 
    357 	RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
    358 	if (raidPtrs == NULL) {
    359 		panic("raidPtrs is NULL!!");
    360 	}
    361 
    362 	rf_mutex_init(&rf_sparet_wait_mutex);
    363 
    364 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    365 
    366 	for (i = 0; i < num; i++)
    367 		raidPtrs[i] = NULL;
    368 	rc = rf_BootRaidframe();
    369 	if (rc == 0)
    370 		aprint_normal("Kernelized RAIDframe activated\n");
    371 	else
    372 		panic("Serious error booting RAID!!");
    373 
    374 	/* put together some datastructures like the CCD device does.. This
    375 	 * lets us lock the device and what-not when it gets opened. */
    376 
    377 	raid_softc = (struct raid_softc *)
    378 		malloc(num * sizeof(struct raid_softc),
    379 		       M_RAIDFRAME, M_NOWAIT);
    380 	if (raid_softc == NULL) {
    381 		aprint_error("WARNING: no memory for RAIDframe driver\n");
    382 		return;
    383 	}
    384 
    385 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    386 
    387 	for (raidID = 0; raidID < num; raidID++) {
    388 		bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
    389 
    390 		RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
    391 			  (RF_Raid_t *));
    392 		if (raidPtrs[raidID] == NULL) {
    393 			aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID);
    394 			numraid = raidID;
    395 			return;
    396 		}
    397 	}
    398 
    399 	if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
    400 		aprint_error("raidattach: config_cfattach_attach failed?\n");
    401 	}
    402 
    403 #ifdef RAID_AUTOCONFIG
    404 	raidautoconfig = 1;
    405 #endif
    406 
    407 	/*
    408 	 * Register a finalizer which will be used to auto-config RAID
    409 	 * sets once all real hardware devices have been found.
    410 	 */
    411 	if (config_finalize_register(NULL, rf_autoconfig) != 0)
    412 		aprint_error("WARNING: unable to register RAIDframe finalizer\n");
    413 }
    414 
    415 int
    416 rf_autoconfig(struct device *self)
    417 {
    418 	RF_AutoConfig_t *ac_list;
    419 	RF_ConfigSet_t *config_sets;
    420 
    421 	if (raidautoconfig == 0)
    422 		return (0);
    423 
    424 	/* XXX This code can only be run once. */
    425 	raidautoconfig = 0;
    426 
    427 	/* 1. locate all RAID components on the system */
    428 #ifdef DEBUG
    429 	printf("Searching for RAID components...\n");
    430 #endif
    431 	ac_list = rf_find_raid_components();
    432 
    433 	/* 2. Sort them into their respective sets. */
    434 	config_sets = rf_create_auto_sets(ac_list);
    435 
    436 	/*
    437 	 * 3. Evaluate each set andconfigure the valid ones.
    438 	 * This gets done in rf_buildroothack().
    439 	 */
    440 	rf_buildroothack(config_sets);
    441 
    442 	return 1;
    443 }
    444 
    445 void
    446 rf_buildroothack(RF_ConfigSet_t *config_sets)
    447 {
    448 	RF_ConfigSet_t *cset;
    449 	RF_ConfigSet_t *next_cset;
    450 	int retcode;
    451 	int raidID;
    452 	int rootID;
    453 	int col;
    454 	int num_root;
    455 	char *devname;
    456 
    457 	rootID = 0;
    458 	num_root = 0;
    459 	cset = config_sets;
    460 	while(cset != NULL ) {
    461 		next_cset = cset->next;
    462 		if (rf_have_enough_components(cset) &&
    463 		    cset->ac->clabel->autoconfigure==1) {
    464 			retcode = rf_auto_config_set(cset,&raidID);
    465 			if (!retcode) {
    466 #ifdef DEBUG
    467 				printf("raid%d: configured ok\n", raidID);
    468 #endif
    469 				if (cset->rootable) {
    470 					rootID = raidID;
    471 					num_root++;
    472 				}
    473 			} else {
    474 				/* The autoconfig didn't work :( */
    475 #ifdef DEBUG
    476 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    477 #endif
    478 				rf_release_all_vps(cset);
    479 			}
    480 		} else {
    481 			/* we're not autoconfiguring this set...
    482 			   release the associated resources */
    483 			rf_release_all_vps(cset);
    484 		}
    485 		/* cleanup */
    486 		rf_cleanup_config_set(cset);
    487 		cset = next_cset;
    488 	}
    489 
    490 	/* if the user has specified what the root device should be
    491 	   then we don't touch booted_device or boothowto... */
    492 
    493 	if (rootspec != NULL)
    494 		return;
    495 
    496 	/* we found something bootable... */
    497 
    498 	if (num_root == 1) {
    499 		booted_device = raid_softc[rootID].sc_dev;
    500 	} else if (num_root > 1) {
    501 
    502 		/*
    503 		 * Maybe the MD code can help. If it cannot, then
    504 		 * setroot() will discover that we have no
    505 		 * booted_device and will ask the user if nothing was
    506 		 * hardwired in the kernel config file
    507 		 */
    508 
    509 		if (booted_device == NULL)
    510 			cpu_rootconf();
    511 		if (booted_device == NULL)
    512 			return;
    513 
    514 		num_root = 0;
    515 		for (raidID = 0; raidID < numraid; raidID++) {
    516 			if (raidPtrs[raidID]->valid == 0)
    517 				continue;
    518 
    519 			if (raidPtrs[raidID]->root_partition == 0)
    520 				continue;
    521 
    522 			for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
    523 				devname = raidPtrs[raidID]->Disks[col].devname;
    524 				devname += sizeof("/dev/") - 1;
    525 				if (strncmp(devname, booted_device->dv_xname,
    526 					    strlen(booted_device->dv_xname)) != 0)
    527 					continue;
    528 #ifdef DEBUG
    529 				printf("raid%d includes boot device %s\n",
    530 				       raidID, devname);
    531 #endif
    532 				num_root++;
    533 				rootID = raidID;
    534 			}
    535 		}
    536 
    537 		if (num_root == 1) {
    538 			booted_device = raid_softc[rootID].sc_dev;
    539 		} else {
    540 			/* we can't guess.. require the user to answer... */
    541 			boothowto |= RB_ASKNAME;
    542 		}
    543 	}
    544 }
    545 
    546 
    547 int
    548 raidsize(dev_t dev)
    549 {
    550 	struct raid_softc *rs;
    551 	struct disklabel *lp;
    552 	int     part, unit, omask, size;
    553 
    554 	unit = raidunit(dev);
    555 	if (unit >= numraid)
    556 		return (-1);
    557 	rs = &raid_softc[unit];
    558 
    559 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    560 		return (-1);
    561 
    562 	part = DISKPART(dev);
    563 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    564 	lp = rs->sc_dkdev.dk_label;
    565 
    566 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
    567 		return (-1);
    568 
    569 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    570 		size = -1;
    571 	else
    572 		size = lp->d_partitions[part].p_size *
    573 		    (lp->d_secsize / DEV_BSIZE);
    574 
    575 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
    576 		return (-1);
    577 
    578 	return (size);
    579 
    580 }
    581 
    582 int
    583 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
    584 {
    585 	int     unit = raidunit(dev);
    586 	struct raid_softc *rs;
    587 	const struct bdevsw *bdev;
    588 	struct disklabel *lp;
    589 	RF_Raid_t *raidPtr;
    590 	daddr_t offset;
    591 	int     part, c, sparecol, j, scol, dumpto;
    592 	int     error = 0;
    593 
    594 	if (unit >= numraid)
    595 		return (ENXIO);
    596 
    597 	rs = &raid_softc[unit];
    598 	raidPtr = raidPtrs[unit];
    599 
    600 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    601 		return ENXIO;
    602 
    603 	/* we only support dumping to RAID 1 sets */
    604 	if (raidPtr->Layout.numDataCol != 1 ||
    605 	    raidPtr->Layout.numParityCol != 1)
    606 		return EINVAL;
    607 
    608 
    609 	if ((error = raidlock(rs)) != 0)
    610 		return error;
    611 
    612 	if (size % DEV_BSIZE != 0) {
    613 		error = EINVAL;
    614 		goto out;
    615 	}
    616 
    617 	if (blkno + size / DEV_BSIZE > rs->sc_size) {
    618 		printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
    619 		    "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
    620 		    size / DEV_BSIZE, rs->sc_size);
    621 		error = EINVAL;
    622 		goto out;
    623 	}
    624 
    625 	part = DISKPART(dev);
    626 	lp = rs->sc_dkdev.dk_label;
    627 	offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
    628 
    629 	/* figure out what device is alive.. */
    630 
    631 	/*
    632 	   Look for a component to dump to.  The preference for the
    633 	   component to dump to is as follows:
    634 	   1) the master
    635 	   2) a used_spare of the master
    636 	   3) the slave
    637 	   4) a used_spare of the slave
    638 	*/
    639 
    640 	dumpto = -1;
    641 	for (c = 0; c < raidPtr->numCol; c++) {
    642 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
    643 			/* this might be the one */
    644 			dumpto = c;
    645 			break;
    646 		}
    647 	}
    648 
    649 	/*
    650 	   At this point we have possibly selected a live master or a
    651 	   live slave.  We now check to see if there is a spared
    652 	   master (or a spared slave), if we didn't find a live master
    653 	   or a live slave.
    654 	*/
    655 
    656 	for (c = 0; c < raidPtr->numSpare; c++) {
    657 		sparecol = raidPtr->numCol + c;
    658 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
    659 			/* How about this one? */
    660 			scol = -1;
    661 			for(j=0;j<raidPtr->numCol;j++) {
    662 				if (raidPtr->Disks[j].spareCol == sparecol) {
    663 					scol = j;
    664 					break;
    665 				}
    666 			}
    667 			if (scol == 0) {
    668 				/*
    669 				   We must have found a spared master!
    670 				   We'll take that over anything else
    671 				   found so far.  (We couldn't have
    672 				   found a real master before, since
    673 				   this is a used spare, and it's
    674 				   saying that it's replacing the
    675 				   master.)  On reboot (with
    676 				   autoconfiguration turned on)
    677 				   sparecol will become the 1st
    678 				   component (component0) of this set.
    679 				*/
    680 				dumpto = sparecol;
    681 				break;
    682 			} else if (scol != -1) {
    683 				/*
    684 				   Must be a spared slave.  We'll dump
    685 				   to that if we havn't found anything
    686 				   else so far.
    687 				*/
    688 				if (dumpto == -1)
    689 					dumpto = sparecol;
    690 			}
    691 		}
    692 	}
    693 
    694 	if (dumpto == -1) {
    695 		/* we couldn't find any live components to dump to!?!?
    696 		 */
    697 		error = EINVAL;
    698 		goto out;
    699 	}
    700 
    701 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
    702 
    703 	/*
    704 	   Note that blkno is relative to this particular partition.
    705 	   By adding the offset of this partition in the RAID
    706 	   set, and also adding RF_PROTECTED_SECTORS, we get a
    707 	   value that is relative to the partition used for the
    708 	   underlying component.
    709 	*/
    710 
    711 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
    712 				blkno + offset, va, size);
    713 
    714 out:
    715 	raidunlock(rs);
    716 
    717 	return error;
    718 }
    719 /* ARGSUSED */
    720 int
    721 raidopen(dev_t dev, int flags, int fmt,
    722     struct lwp *l)
    723 {
    724 	int     unit = raidunit(dev);
    725 	struct raid_softc *rs;
    726 	struct disklabel *lp;
    727 	int     part, pmask;
    728 	int     error = 0;
    729 
    730 	if (unit >= numraid)
    731 		return (ENXIO);
    732 	rs = &raid_softc[unit];
    733 
    734 	if ((error = raidlock(rs)) != 0)
    735 		return (error);
    736 	lp = rs->sc_dkdev.dk_label;
    737 
    738 	part = DISKPART(dev);
    739 
    740 	/*
    741 	 * If there are wedges, and this is not RAW_PART, then we
    742 	 * need to fail.
    743 	 */
    744 	if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
    745 		error = EBUSY;
    746 		goto bad;
    747 	}
    748 	pmask = (1 << part);
    749 
    750 	if ((rs->sc_flags & RAIDF_INITED) &&
    751 	    (rs->sc_dkdev.dk_openmask == 0))
    752 		raidgetdisklabel(dev);
    753 
    754 	/* make sure that this partition exists */
    755 
    756 	if (part != RAW_PART) {
    757 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    758 		    ((part >= lp->d_npartitions) ||
    759 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    760 			error = ENXIO;
    761 			goto bad;
    762 		}
    763 	}
    764 	/* Prevent this unit from being unconfigured while open. */
    765 	switch (fmt) {
    766 	case S_IFCHR:
    767 		rs->sc_dkdev.dk_copenmask |= pmask;
    768 		break;
    769 
    770 	case S_IFBLK:
    771 		rs->sc_dkdev.dk_bopenmask |= pmask;
    772 		break;
    773 	}
    774 
    775 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    776 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    777 		/* First one... mark things as dirty... Note that we *MUST*
    778 		 have done a configure before this.  I DO NOT WANT TO BE
    779 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    780 		 THAT THEY BELONG TOGETHER!!!!! */
    781 		/* XXX should check to see if we're only open for reading
    782 		   here... If so, we needn't do this, but then need some
    783 		   other way of keeping track of what's happened.. */
    784 
    785 		rf_markalldirty( raidPtrs[unit] );
    786 	}
    787 
    788 
    789 	rs->sc_dkdev.dk_openmask =
    790 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    791 
    792 bad:
    793 	raidunlock(rs);
    794 
    795 	return (error);
    796 
    797 
    798 }
    799 /* ARGSUSED */
    800 int
    801 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
    802 {
    803 	int     unit = raidunit(dev);
    804 	struct cfdata *cf;
    805 	struct raid_softc *rs;
    806 	int     error = 0;
    807 	int     part;
    808 
    809 	if (unit >= numraid)
    810 		return (ENXIO);
    811 	rs = &raid_softc[unit];
    812 
    813 	if ((error = raidlock(rs)) != 0)
    814 		return (error);
    815 
    816 	part = DISKPART(dev);
    817 
    818 	/* ...that much closer to allowing unconfiguration... */
    819 	switch (fmt) {
    820 	case S_IFCHR:
    821 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    822 		break;
    823 
    824 	case S_IFBLK:
    825 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    826 		break;
    827 	}
    828 	rs->sc_dkdev.dk_openmask =
    829 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    830 
    831 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    832 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    833 		/* Last one... device is not unconfigured yet.
    834 		   Device shutdown has taken care of setting the
    835 		   clean bits if RAIDF_INITED is not set
    836 		   mark things as clean... */
    837 
    838 		rf_update_component_labels(raidPtrs[unit],
    839 						 RF_FINAL_COMPONENT_UPDATE);
    840 		if (doing_shutdown) {
    841 			/* last one, and we're going down, so
    842 			   lights out for this RAID set too. */
    843 			error = rf_Shutdown(raidPtrs[unit]);
    844 
    845 			/* It's no longer initialized... */
    846 			rs->sc_flags &= ~RAIDF_INITED;
    847 
    848 			/* detach the device */
    849 
    850 			cf = device_cfdata(rs->sc_dev);
    851 			error = config_detach(rs->sc_dev, DETACH_QUIET);
    852 			free(cf, M_RAIDFRAME);
    853 
    854 			/* Detach the disk. */
    855 			disk_detach(&rs->sc_dkdev);
    856 			disk_destroy(&rs->sc_dkdev);
    857 		}
    858 	}
    859 
    860 	raidunlock(rs);
    861 	return (0);
    862 
    863 }
    864 
    865 void
    866 raidstrategy(struct buf *bp)
    867 {
    868 	int s;
    869 
    870 	unsigned int raidID = raidunit(bp->b_dev);
    871 	RF_Raid_t *raidPtr;
    872 	struct raid_softc *rs = &raid_softc[raidID];
    873 	int     wlabel;
    874 
    875 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    876 		bp->b_error = ENXIO;
    877 		goto done;
    878 	}
    879 	if (raidID >= numraid || !raidPtrs[raidID]) {
    880 		bp->b_error = ENODEV;
    881 		goto done;
    882 	}
    883 	raidPtr = raidPtrs[raidID];
    884 	if (!raidPtr->valid) {
    885 		bp->b_error = ENODEV;
    886 		goto done;
    887 	}
    888 	if (bp->b_bcount == 0) {
    889 		db1_printf(("b_bcount is zero..\n"));
    890 		goto done;
    891 	}
    892 
    893 	/*
    894 	 * Do bounds checking and adjust transfer.  If there's an
    895 	 * error, the bounds check will flag that for us.
    896 	 */
    897 
    898 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    899 	if (DISKPART(bp->b_dev) == RAW_PART) {
    900 		uint64_t size; /* device size in DEV_BSIZE unit */
    901 
    902 		if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
    903 			size = raidPtr->totalSectors <<
    904 			    (raidPtr->logBytesPerSector - DEV_BSHIFT);
    905 		} else {
    906 			size = raidPtr->totalSectors >>
    907 			    (DEV_BSHIFT - raidPtr->logBytesPerSector);
    908 		}
    909 		if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
    910 			goto done;
    911 		}
    912 	} else {
    913 		if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
    914 			db1_printf(("Bounds check failed!!:%d %d\n",
    915 				(int) bp->b_blkno, (int) wlabel));
    916 			goto done;
    917 		}
    918 	}
    919 	s = splbio();
    920 
    921 	bp->b_resid = 0;
    922 
    923 	/* stuff it onto our queue */
    924 	BUFQ_PUT(rs->buf_queue, bp);
    925 
    926 	/* scheduled the IO to happen at the next convenient time */
    927 	wakeup(&(raidPtrs[raidID]->iodone));
    928 
    929 	splx(s);
    930 	return;
    931 
    932 done:
    933 	bp->b_resid = bp->b_bcount;
    934 	biodone(bp);
    935 }
    936 /* ARGSUSED */
    937 int
    938 raidread(dev_t dev, struct uio *uio, int flags)
    939 {
    940 	int     unit = raidunit(dev);
    941 	struct raid_softc *rs;
    942 
    943 	if (unit >= numraid)
    944 		return (ENXIO);
    945 	rs = &raid_softc[unit];
    946 
    947 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    948 		return (ENXIO);
    949 
    950 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    951 
    952 }
    953 /* ARGSUSED */
    954 int
    955 raidwrite(dev_t dev, struct uio *uio, int flags)
    956 {
    957 	int     unit = raidunit(dev);
    958 	struct raid_softc *rs;
    959 
    960 	if (unit >= numraid)
    961 		return (ENXIO);
    962 	rs = &raid_softc[unit];
    963 
    964 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    965 		return (ENXIO);
    966 
    967 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    968 
    969 }
    970 
    971 int
    972 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
    973 {
    974 	int     unit = raidunit(dev);
    975 	int     error = 0;
    976 	int     part, pmask;
    977 	struct cfdata *cf;
    978 	struct raid_softc *rs;
    979 	RF_Config_t *k_cfg, *u_cfg;
    980 	RF_Raid_t *raidPtr;
    981 	RF_RaidDisk_t *diskPtr;
    982 	RF_AccTotals_t *totals;
    983 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    984 	u_char *specific_buf;
    985 	int retcode = 0;
    986 	int column;
    987 	int raidid;
    988 	struct rf_recon_req *rrcopy, *rr;
    989 	RF_ComponentLabel_t *clabel;
    990 	RF_ComponentLabel_t *ci_label;
    991 	RF_ComponentLabel_t **clabel_ptr;
    992 	RF_SingleComponent_t *sparePtr,*componentPtr;
    993 	RF_SingleComponent_t component;
    994 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    995 	int i, j, d;
    996 #ifdef __HAVE_OLD_DISKLABEL
    997 	struct disklabel newlabel;
    998 #endif
    999 	struct dkwedge_info *dkw;
   1000 
   1001 	if (unit >= numraid)
   1002 		return (ENXIO);
   1003 	rs = &raid_softc[unit];
   1004 	raidPtr = raidPtrs[unit];
   1005 
   1006 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
   1007 		(int) DISKPART(dev), (int) unit, (int) cmd));
   1008 
   1009 	/* Must be open for writes for these commands... */
   1010 	switch (cmd) {
   1011 #ifdef DIOCGSECTORSIZE
   1012 	case DIOCGSECTORSIZE:
   1013 		*(u_int *)data = raidPtr->bytesPerSector;
   1014 		return 0;
   1015 	case DIOCGMEDIASIZE:
   1016 		*(off_t *)data =
   1017 		    (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
   1018 		return 0;
   1019 #endif
   1020 	case DIOCSDINFO:
   1021 	case DIOCWDINFO:
   1022 #ifdef __HAVE_OLD_DISKLABEL
   1023 	case ODIOCWDINFO:
   1024 	case ODIOCSDINFO:
   1025 #endif
   1026 	case DIOCWLABEL:
   1027 	case DIOCAWEDGE:
   1028 	case DIOCDWEDGE:
   1029 		if ((flag & FWRITE) == 0)
   1030 			return (EBADF);
   1031 	}
   1032 
   1033 	/* Must be initialized for these... */
   1034 	switch (cmd) {
   1035 	case DIOCGDINFO:
   1036 	case DIOCSDINFO:
   1037 	case DIOCWDINFO:
   1038 #ifdef __HAVE_OLD_DISKLABEL
   1039 	case ODIOCGDINFO:
   1040 	case ODIOCWDINFO:
   1041 	case ODIOCSDINFO:
   1042 	case ODIOCGDEFLABEL:
   1043 #endif
   1044 	case DIOCGPART:
   1045 	case DIOCWLABEL:
   1046 	case DIOCGDEFLABEL:
   1047 	case DIOCAWEDGE:
   1048 	case DIOCDWEDGE:
   1049 	case DIOCLWEDGES:
   1050 	case RAIDFRAME_SHUTDOWN:
   1051 	case RAIDFRAME_REWRITEPARITY:
   1052 	case RAIDFRAME_GET_INFO:
   1053 	case RAIDFRAME_RESET_ACCTOTALS:
   1054 	case RAIDFRAME_GET_ACCTOTALS:
   1055 	case RAIDFRAME_KEEP_ACCTOTALS:
   1056 	case RAIDFRAME_GET_SIZE:
   1057 	case RAIDFRAME_FAIL_DISK:
   1058 	case RAIDFRAME_COPYBACK:
   1059 	case RAIDFRAME_CHECK_RECON_STATUS:
   1060 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1061 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1062 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1063 	case RAIDFRAME_ADD_HOT_SPARE:
   1064 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1065 	case RAIDFRAME_INIT_LABELS:
   1066 	case RAIDFRAME_REBUILD_IN_PLACE:
   1067 	case RAIDFRAME_CHECK_PARITY:
   1068 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1069 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1070 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1071 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1072 	case RAIDFRAME_SET_AUTOCONFIG:
   1073 	case RAIDFRAME_SET_ROOT:
   1074 	case RAIDFRAME_DELETE_COMPONENT:
   1075 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1076 		if ((rs->sc_flags & RAIDF_INITED) == 0)
   1077 			return (ENXIO);
   1078 	}
   1079 
   1080 	switch (cmd) {
   1081 
   1082 		/* configure the system */
   1083 	case RAIDFRAME_CONFIGURE:
   1084 
   1085 		if (raidPtr->valid) {
   1086 			/* There is a valid RAID set running on this unit! */
   1087 			printf("raid%d: Device already configured!\n",unit);
   1088 			return(EINVAL);
   1089 		}
   1090 
   1091 		/* copy-in the configuration information */
   1092 		/* data points to a pointer to the configuration structure */
   1093 
   1094 		u_cfg = *((RF_Config_t **) data);
   1095 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
   1096 		if (k_cfg == NULL) {
   1097 			return (ENOMEM);
   1098 		}
   1099 		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
   1100 		if (retcode) {
   1101 			RF_Free(k_cfg, sizeof(RF_Config_t));
   1102 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
   1103 				retcode));
   1104 			return (retcode);
   1105 		}
   1106 		/* allocate a buffer for the layout-specific data, and copy it
   1107 		 * in */
   1108 		if (k_cfg->layoutSpecificSize) {
   1109 			if (k_cfg->layoutSpecificSize > 10000) {
   1110 				/* sanity check */
   1111 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1112 				return (EINVAL);
   1113 			}
   1114 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
   1115 			    (u_char *));
   1116 			if (specific_buf == NULL) {
   1117 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1118 				return (ENOMEM);
   1119 			}
   1120 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
   1121 			    k_cfg->layoutSpecificSize);
   1122 			if (retcode) {
   1123 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1124 				RF_Free(specific_buf,
   1125 					k_cfg->layoutSpecificSize);
   1126 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
   1127 					retcode));
   1128 				return (retcode);
   1129 			}
   1130 		} else
   1131 			specific_buf = NULL;
   1132 		k_cfg->layoutSpecific = specific_buf;
   1133 
   1134 		/* should do some kind of sanity check on the configuration.
   1135 		 * Store the sum of all the bytes in the last byte? */
   1136 
   1137 		/* configure the system */
   1138 
   1139 		/*
   1140 		 * Clear the entire RAID descriptor, just to make sure
   1141 		 *  there is no stale data left in the case of a
   1142 		 *  reconfiguration
   1143 		 */
   1144 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
   1145 		raidPtr->raidid = unit;
   1146 
   1147 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
   1148 
   1149 		if (retcode == 0) {
   1150 
   1151 			/* allow this many simultaneous IO's to
   1152 			   this RAID device */
   1153 			raidPtr->openings = RAIDOUTSTANDING;
   1154 
   1155 			raidinit(raidPtr);
   1156 			rf_markalldirty(raidPtr);
   1157 		}
   1158 		/* free the buffers.  No return code here. */
   1159 		if (k_cfg->layoutSpecificSize) {
   1160 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
   1161 		}
   1162 		RF_Free(k_cfg, sizeof(RF_Config_t));
   1163 
   1164 		return (retcode);
   1165 
   1166 		/* shutdown the system */
   1167 	case RAIDFRAME_SHUTDOWN:
   1168 
   1169 		if ((error = raidlock(rs)) != 0)
   1170 			return (error);
   1171 
   1172 		/*
   1173 		 * If somebody has a partition mounted, we shouldn't
   1174 		 * shutdown.
   1175 		 */
   1176 
   1177 		part = DISKPART(dev);
   1178 		pmask = (1 << part);
   1179 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
   1180 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
   1181 			(rs->sc_dkdev.dk_copenmask & pmask))) {
   1182 			raidunlock(rs);
   1183 			return (EBUSY);
   1184 		}
   1185 
   1186 		retcode = rf_Shutdown(raidPtr);
   1187 
   1188 		/* It's no longer initialized... */
   1189 		rs->sc_flags &= ~RAIDF_INITED;
   1190 
   1191 		/* free the pseudo device attach bits */
   1192 
   1193 		cf = device_cfdata(rs->sc_dev);
   1194 		/* XXX this causes us to not return any errors
   1195 		   from the above call to rf_Shutdown() */
   1196 		retcode = config_detach(rs->sc_dev, DETACH_QUIET);
   1197 		free(cf, M_RAIDFRAME);
   1198 
   1199 		/* Detach the disk. */
   1200 		disk_detach(&rs->sc_dkdev);
   1201 		disk_destroy(&rs->sc_dkdev);
   1202 
   1203 		raidunlock(rs);
   1204 
   1205 		return (retcode);
   1206 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1207 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1208 		/* need to read the component label for the disk indicated
   1209 		   by row,column in clabel */
   1210 
   1211 		/* For practice, let's get it directly fromdisk, rather
   1212 		   than from the in-core copy */
   1213 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
   1214 			   (RF_ComponentLabel_t *));
   1215 		if (clabel == NULL)
   1216 			return (ENOMEM);
   1217 
   1218 		retcode = copyin( *clabel_ptr, clabel,
   1219 				  sizeof(RF_ComponentLabel_t));
   1220 
   1221 		if (retcode) {
   1222 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1223 			return(retcode);
   1224 		}
   1225 
   1226 		clabel->row = 0; /* Don't allow looking at anything else.*/
   1227 
   1228 		column = clabel->column;
   1229 
   1230 		if ((column < 0) || (column >= raidPtr->numCol +
   1231 				     raidPtr->numSpare)) {
   1232 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1233 			return(EINVAL);
   1234 		}
   1235 
   1236 		retcode = raidread_component_label(raidPtr->Disks[column].dev,
   1237 				raidPtr->raid_cinfo[column].ci_vp,
   1238 				clabel );
   1239 
   1240 		if (retcode == 0) {
   1241 			retcode = copyout(clabel, *clabel_ptr,
   1242 					  sizeof(RF_ComponentLabel_t));
   1243 		}
   1244 		RF_Free(clabel, sizeof(RF_ComponentLabel_t));
   1245 		return (retcode);
   1246 
   1247 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1248 		clabel = (RF_ComponentLabel_t *) data;
   1249 
   1250 		/* XXX check the label for valid stuff... */
   1251 		/* Note that some things *should not* get modified --
   1252 		   the user should be re-initing the labels instead of
   1253 		   trying to patch things.
   1254 		   */
   1255 
   1256 		raidid = raidPtr->raidid;
   1257 #ifdef DEBUG
   1258 		printf("raid%d: Got component label:\n", raidid);
   1259 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1260 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1261 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1262 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1263 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1264 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1265 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1266 #endif
   1267 		clabel->row = 0;
   1268 		column = clabel->column;
   1269 
   1270 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1271 			return(EINVAL);
   1272 		}
   1273 
   1274 		/* XXX this isn't allowed to do anything for now :-) */
   1275 
   1276 		/* XXX and before it is, we need to fill in the rest
   1277 		   of the fields!?!?!?! */
   1278 #if 0
   1279 		raidwrite_component_label(
   1280 		     raidPtr->Disks[column].dev,
   1281 			    raidPtr->raid_cinfo[column].ci_vp,
   1282 			    clabel );
   1283 #endif
   1284 		return (0);
   1285 
   1286 	case RAIDFRAME_INIT_LABELS:
   1287 		clabel = (RF_ComponentLabel_t *) data;
   1288 		/*
   1289 		   we only want the serial number from
   1290 		   the above.  We get all the rest of the information
   1291 		   from the config that was used to create this RAID
   1292 		   set.
   1293 		   */
   1294 
   1295 		raidPtr->serial_number = clabel->serial_number;
   1296 
   1297 		RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t),
   1298 			  (RF_ComponentLabel_t *));
   1299 		if (ci_label == NULL)
   1300 			return (ENOMEM);
   1301 
   1302 		raid_init_component_label(raidPtr, ci_label);
   1303 		ci_label->serial_number = clabel->serial_number;
   1304 		ci_label->row = 0; /* we dont' pretend to support more */
   1305 
   1306 		for(column=0;column<raidPtr->numCol;column++) {
   1307 			diskPtr = &raidPtr->Disks[column];
   1308 			if (!RF_DEAD_DISK(diskPtr->status)) {
   1309 				ci_label->partitionSize = diskPtr->partitionSize;
   1310 				ci_label->column = column;
   1311 				raidwrite_component_label(
   1312 							  raidPtr->Disks[column].dev,
   1313 							  raidPtr->raid_cinfo[column].ci_vp,
   1314 							  ci_label );
   1315 			}
   1316 		}
   1317 		RF_Free(ci_label, sizeof(RF_ComponentLabel_t));
   1318 
   1319 		return (retcode);
   1320 	case RAIDFRAME_SET_AUTOCONFIG:
   1321 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1322 		printf("raid%d: New autoconfig value is: %d\n",
   1323 		       raidPtr->raidid, d);
   1324 		*(int *) data = d;
   1325 		return (retcode);
   1326 
   1327 	case RAIDFRAME_SET_ROOT:
   1328 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1329 		printf("raid%d: New rootpartition value is: %d\n",
   1330 		       raidPtr->raidid, d);
   1331 		*(int *) data = d;
   1332 		return (retcode);
   1333 
   1334 		/* initialize all parity */
   1335 	case RAIDFRAME_REWRITEPARITY:
   1336 
   1337 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1338 			/* Parity for RAID 0 is trivially correct */
   1339 			raidPtr->parity_good = RF_RAID_CLEAN;
   1340 			return(0);
   1341 		}
   1342 
   1343 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1344 			/* Re-write is already in progress! */
   1345 			return(EINVAL);
   1346 		}
   1347 
   1348 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1349 					   rf_RewriteParityThread,
   1350 					   raidPtr,"raid_parity");
   1351 		return (retcode);
   1352 
   1353 
   1354 	case RAIDFRAME_ADD_HOT_SPARE:
   1355 		sparePtr = (RF_SingleComponent_t *) data;
   1356 		memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
   1357 		retcode = rf_add_hot_spare(raidPtr, &component);
   1358 		return(retcode);
   1359 
   1360 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1361 		return(retcode);
   1362 
   1363 	case RAIDFRAME_DELETE_COMPONENT:
   1364 		componentPtr = (RF_SingleComponent_t *)data;
   1365 		memcpy( &component, componentPtr,
   1366 			sizeof(RF_SingleComponent_t));
   1367 		retcode = rf_delete_component(raidPtr, &component);
   1368 		return(retcode);
   1369 
   1370 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1371 		componentPtr = (RF_SingleComponent_t *)data;
   1372 		memcpy( &component, componentPtr,
   1373 			sizeof(RF_SingleComponent_t));
   1374 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1375 		return(retcode);
   1376 
   1377 	case RAIDFRAME_REBUILD_IN_PLACE:
   1378 
   1379 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1380 			/* Can't do this on a RAID 0!! */
   1381 			return(EINVAL);
   1382 		}
   1383 
   1384 		if (raidPtr->recon_in_progress == 1) {
   1385 			/* a reconstruct is already in progress! */
   1386 			return(EINVAL);
   1387 		}
   1388 
   1389 		componentPtr = (RF_SingleComponent_t *) data;
   1390 		memcpy( &component, componentPtr,
   1391 			sizeof(RF_SingleComponent_t));
   1392 		component.row = 0; /* we don't support any more */
   1393 		column = component.column;
   1394 
   1395 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1396 			return(EINVAL);
   1397 		}
   1398 
   1399 		RF_LOCK_MUTEX(raidPtr->mutex);
   1400 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
   1401 		    (raidPtr->numFailures > 0)) {
   1402 			/* XXX 0 above shouldn't be constant!!! */
   1403 			/* some component other than this has failed.
   1404 			   Let's not make things worse than they already
   1405 			   are... */
   1406 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1407 			       raidPtr->raidid);
   1408 			printf("raid%d:     Col: %d   Too many failures.\n",
   1409 			       raidPtr->raidid, column);
   1410 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1411 			return (EINVAL);
   1412 		}
   1413 		if (raidPtr->Disks[column].status ==
   1414 		    rf_ds_reconstructing) {
   1415 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1416 			       raidPtr->raidid);
   1417 			printf("raid%d:    Col: %d   Reconstruction already occuring!\n", raidPtr->raidid, column);
   1418 
   1419 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1420 			return (EINVAL);
   1421 		}
   1422 		if (raidPtr->Disks[column].status == rf_ds_spared) {
   1423 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1424 			return (EINVAL);
   1425 		}
   1426 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1427 
   1428 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1429 		if (rrcopy == NULL)
   1430 			return(ENOMEM);
   1431 
   1432 		rrcopy->raidPtr = (void *) raidPtr;
   1433 		rrcopy->col = column;
   1434 
   1435 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1436 					   rf_ReconstructInPlaceThread,
   1437 					   rrcopy,"raid_reconip");
   1438 		return(retcode);
   1439 
   1440 	case RAIDFRAME_GET_INFO:
   1441 		if (!raidPtr->valid)
   1442 			return (ENODEV);
   1443 		ucfgp = (RF_DeviceConfig_t **) data;
   1444 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1445 			  (RF_DeviceConfig_t *));
   1446 		if (d_cfg == NULL)
   1447 			return (ENOMEM);
   1448 		d_cfg->rows = 1; /* there is only 1 row now */
   1449 		d_cfg->cols = raidPtr->numCol;
   1450 		d_cfg->ndevs = raidPtr->numCol;
   1451 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1452 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1453 			return (ENOMEM);
   1454 		}
   1455 		d_cfg->nspares = raidPtr->numSpare;
   1456 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1457 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1458 			return (ENOMEM);
   1459 		}
   1460 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1461 		d = 0;
   1462 		for (j = 0; j < d_cfg->cols; j++) {
   1463 			d_cfg->devs[d] = raidPtr->Disks[j];
   1464 			d++;
   1465 		}
   1466 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1467 			d_cfg->spares[i] = raidPtr->Disks[j];
   1468 		}
   1469 		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
   1470 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1471 
   1472 		return (retcode);
   1473 
   1474 	case RAIDFRAME_CHECK_PARITY:
   1475 		*(int *) data = raidPtr->parity_good;
   1476 		return (0);
   1477 
   1478 	case RAIDFRAME_RESET_ACCTOTALS:
   1479 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1480 		return (0);
   1481 
   1482 	case RAIDFRAME_GET_ACCTOTALS:
   1483 		totals = (RF_AccTotals_t *) data;
   1484 		*totals = raidPtr->acc_totals;
   1485 		return (0);
   1486 
   1487 	case RAIDFRAME_KEEP_ACCTOTALS:
   1488 		raidPtr->keep_acc_totals = *(int *)data;
   1489 		return (0);
   1490 
   1491 	case RAIDFRAME_GET_SIZE:
   1492 		*(int *) data = raidPtr->totalSectors;
   1493 		return (0);
   1494 
   1495 		/* fail a disk & optionally start reconstruction */
   1496 	case RAIDFRAME_FAIL_DISK:
   1497 
   1498 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1499 			/* Can't do this on a RAID 0!! */
   1500 			return(EINVAL);
   1501 		}
   1502 
   1503 		rr = (struct rf_recon_req *) data;
   1504 		rr->row = 0;
   1505 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
   1506 			return (EINVAL);
   1507 
   1508 
   1509 		RF_LOCK_MUTEX(raidPtr->mutex);
   1510 		if (raidPtr->status == rf_rs_reconstructing) {
   1511 			/* you can't fail a disk while we're reconstructing! */
   1512 			/* XXX wrong for RAID6 */
   1513 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1514 			return (EINVAL);
   1515 		}
   1516 		if ((raidPtr->Disks[rr->col].status ==
   1517 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
   1518 			/* some other component has failed.  Let's not make
   1519 			   things worse. XXX wrong for RAID6 */
   1520 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1521 			return (EINVAL);
   1522 		}
   1523 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
   1524 			/* Can't fail a spared disk! */
   1525 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1526 			return (EINVAL);
   1527 		}
   1528 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1529 
   1530 		/* make a copy of the recon request so that we don't rely on
   1531 		 * the user's buffer */
   1532 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1533 		if (rrcopy == NULL)
   1534 			return(ENOMEM);
   1535 		memcpy(rrcopy, rr, sizeof(*rr));
   1536 		rrcopy->raidPtr = (void *) raidPtr;
   1537 
   1538 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1539 					   rf_ReconThread,
   1540 					   rrcopy,"raid_recon");
   1541 		return (0);
   1542 
   1543 		/* invoke a copyback operation after recon on whatever disk
   1544 		 * needs it, if any */
   1545 	case RAIDFRAME_COPYBACK:
   1546 
   1547 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1548 			/* This makes no sense on a RAID 0!! */
   1549 			return(EINVAL);
   1550 		}
   1551 
   1552 		if (raidPtr->copyback_in_progress == 1) {
   1553 			/* Copyback is already in progress! */
   1554 			return(EINVAL);
   1555 		}
   1556 
   1557 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1558 					   rf_CopybackThread,
   1559 					   raidPtr,"raid_copyback");
   1560 		return (retcode);
   1561 
   1562 		/* return the percentage completion of reconstruction */
   1563 	case RAIDFRAME_CHECK_RECON_STATUS:
   1564 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1565 			/* This makes no sense on a RAID 0, so tell the
   1566 			   user it's done. */
   1567 			*(int *) data = 100;
   1568 			return(0);
   1569 		}
   1570 		if (raidPtr->status != rf_rs_reconstructing)
   1571 			*(int *) data = 100;
   1572 		else {
   1573 			if (raidPtr->reconControl->numRUsTotal > 0) {
   1574 				*(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
   1575 			} else {
   1576 				*(int *) data = 0;
   1577 			}
   1578 		}
   1579 		return (0);
   1580 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1581 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1582 		if (raidPtr->status != rf_rs_reconstructing) {
   1583 			progressInfo.remaining = 0;
   1584 			progressInfo.completed = 100;
   1585 			progressInfo.total = 100;
   1586 		} else {
   1587 			progressInfo.total =
   1588 				raidPtr->reconControl->numRUsTotal;
   1589 			progressInfo.completed =
   1590 				raidPtr->reconControl->numRUsComplete;
   1591 			progressInfo.remaining = progressInfo.total -
   1592 				progressInfo.completed;
   1593 		}
   1594 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1595 				  sizeof(RF_ProgressInfo_t));
   1596 		return (retcode);
   1597 
   1598 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1599 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1600 			/* This makes no sense on a RAID 0, so tell the
   1601 			   user it's done. */
   1602 			*(int *) data = 100;
   1603 			return(0);
   1604 		}
   1605 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1606 			*(int *) data = 100 *
   1607 				raidPtr->parity_rewrite_stripes_done /
   1608 				raidPtr->Layout.numStripe;
   1609 		} else {
   1610 			*(int *) data = 100;
   1611 		}
   1612 		return (0);
   1613 
   1614 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1615 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1616 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1617 			progressInfo.total = raidPtr->Layout.numStripe;
   1618 			progressInfo.completed =
   1619 				raidPtr->parity_rewrite_stripes_done;
   1620 			progressInfo.remaining = progressInfo.total -
   1621 				progressInfo.completed;
   1622 		} else {
   1623 			progressInfo.remaining = 0;
   1624 			progressInfo.completed = 100;
   1625 			progressInfo.total = 100;
   1626 		}
   1627 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1628 				  sizeof(RF_ProgressInfo_t));
   1629 		return (retcode);
   1630 
   1631 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1632 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1633 			/* This makes no sense on a RAID 0 */
   1634 			*(int *) data = 100;
   1635 			return(0);
   1636 		}
   1637 		if (raidPtr->copyback_in_progress == 1) {
   1638 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1639 				raidPtr->Layout.numStripe;
   1640 		} else {
   1641 			*(int *) data = 100;
   1642 		}
   1643 		return (0);
   1644 
   1645 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1646 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1647 		if (raidPtr->copyback_in_progress == 1) {
   1648 			progressInfo.total = raidPtr->Layout.numStripe;
   1649 			progressInfo.completed =
   1650 				raidPtr->copyback_stripes_done;
   1651 			progressInfo.remaining = progressInfo.total -
   1652 				progressInfo.completed;
   1653 		} else {
   1654 			progressInfo.remaining = 0;
   1655 			progressInfo.completed = 100;
   1656 			progressInfo.total = 100;
   1657 		}
   1658 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1659 				  sizeof(RF_ProgressInfo_t));
   1660 		return (retcode);
   1661 
   1662 		/* the sparetable daemon calls this to wait for the kernel to
   1663 		 * need a spare table. this ioctl does not return until a
   1664 		 * spare table is needed. XXX -- calling mpsleep here in the
   1665 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1666 		 * -- I should either compute the spare table in the kernel,
   1667 		 * or have a different -- XXX XXX -- interface (a different
   1668 		 * character device) for delivering the table     -- XXX */
   1669 #if 0
   1670 	case RAIDFRAME_SPARET_WAIT:
   1671 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1672 		while (!rf_sparet_wait_queue)
   1673 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1674 		waitreq = rf_sparet_wait_queue;
   1675 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1676 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1677 
   1678 		/* structure assignment */
   1679 		*((RF_SparetWait_t *) data) = *waitreq;
   1680 
   1681 		RF_Free(waitreq, sizeof(*waitreq));
   1682 		return (0);
   1683 
   1684 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1685 		 * code in it that will cause the dameon to exit */
   1686 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1687 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1688 		waitreq->fcol = -1;
   1689 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1690 		waitreq->next = rf_sparet_wait_queue;
   1691 		rf_sparet_wait_queue = waitreq;
   1692 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1693 		wakeup(&rf_sparet_wait_queue);
   1694 		return (0);
   1695 
   1696 		/* used by the spare table daemon to deliver a spare table
   1697 		 * into the kernel */
   1698 	case RAIDFRAME_SEND_SPARET:
   1699 
   1700 		/* install the spare table */
   1701 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1702 
   1703 		/* respond to the requestor.  the return status of the spare
   1704 		 * table installation is passed in the "fcol" field */
   1705 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1706 		waitreq->fcol = retcode;
   1707 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1708 		waitreq->next = rf_sparet_resp_queue;
   1709 		rf_sparet_resp_queue = waitreq;
   1710 		wakeup(&rf_sparet_resp_queue);
   1711 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1712 
   1713 		return (retcode);
   1714 #endif
   1715 
   1716 	default:
   1717 		break; /* fall through to the os-specific code below */
   1718 
   1719 	}
   1720 
   1721 	if (!raidPtr->valid)
   1722 		return (EINVAL);
   1723 
   1724 	/*
   1725 	 * Add support for "regular" device ioctls here.
   1726 	 */
   1727 
   1728 	switch (cmd) {
   1729 	case DIOCGDINFO:
   1730 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1731 		break;
   1732 #ifdef __HAVE_OLD_DISKLABEL
   1733 	case ODIOCGDINFO:
   1734 		newlabel = *(rs->sc_dkdev.dk_label);
   1735 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1736 			return ENOTTY;
   1737 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1738 		break;
   1739 #endif
   1740 
   1741 	case DIOCGPART:
   1742 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1743 		((struct partinfo *) data)->part =
   1744 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1745 		break;
   1746 
   1747 	case DIOCWDINFO:
   1748 	case DIOCSDINFO:
   1749 #ifdef __HAVE_OLD_DISKLABEL
   1750 	case ODIOCWDINFO:
   1751 	case ODIOCSDINFO:
   1752 #endif
   1753 	{
   1754 		struct disklabel *lp;
   1755 #ifdef __HAVE_OLD_DISKLABEL
   1756 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1757 			memset(&newlabel, 0, sizeof newlabel);
   1758 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1759 			lp = &newlabel;
   1760 		} else
   1761 #endif
   1762 		lp = (struct disklabel *)data;
   1763 
   1764 		if ((error = raidlock(rs)) != 0)
   1765 			return (error);
   1766 
   1767 		rs->sc_flags |= RAIDF_LABELLING;
   1768 
   1769 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1770 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1771 		if (error == 0) {
   1772 			if (cmd == DIOCWDINFO
   1773 #ifdef __HAVE_OLD_DISKLABEL
   1774 			    || cmd == ODIOCWDINFO
   1775 #endif
   1776 			   )
   1777 				error = writedisklabel(RAIDLABELDEV(dev),
   1778 				    raidstrategy, rs->sc_dkdev.dk_label,
   1779 				    rs->sc_dkdev.dk_cpulabel);
   1780 		}
   1781 		rs->sc_flags &= ~RAIDF_LABELLING;
   1782 
   1783 		raidunlock(rs);
   1784 
   1785 		if (error)
   1786 			return (error);
   1787 		break;
   1788 	}
   1789 
   1790 	case DIOCWLABEL:
   1791 		if (*(int *) data != 0)
   1792 			rs->sc_flags |= RAIDF_WLABEL;
   1793 		else
   1794 			rs->sc_flags &= ~RAIDF_WLABEL;
   1795 		break;
   1796 
   1797 	case DIOCGDEFLABEL:
   1798 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1799 		break;
   1800 
   1801 #ifdef __HAVE_OLD_DISKLABEL
   1802 	case ODIOCGDEFLABEL:
   1803 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1804 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1805 			return ENOTTY;
   1806 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1807 		break;
   1808 #endif
   1809 
   1810 	case DIOCAWEDGE:
   1811 	case DIOCDWEDGE:
   1812 	    	dkw = (void *)data;
   1813 
   1814 		/* If the ioctl happens here, the parent is us. */
   1815 		(void)strcpy(dkw->dkw_parent, rs->sc_xname);
   1816 		return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
   1817 
   1818 	case DIOCLWEDGES:
   1819 		return dkwedge_list(&rs->sc_dkdev,
   1820 		    (struct dkwedge_list *)data, l);
   1821 
   1822 	default:
   1823 		retcode = ENOTTY;
   1824 	}
   1825 	return (retcode);
   1826 
   1827 }
   1828 
   1829 
   1830 /* raidinit -- complete the rest of the initialization for the
   1831    RAIDframe device.  */
   1832 
   1833 
   1834 static void
   1835 raidinit(RF_Raid_t *raidPtr)
   1836 {
   1837 	struct cfdata *cf;
   1838 	struct raid_softc *rs;
   1839 	int     unit;
   1840 
   1841 	unit = raidPtr->raidid;
   1842 
   1843 	rs = &raid_softc[unit];
   1844 
   1845 	/* XXX should check return code first... */
   1846 	rs->sc_flags |= RAIDF_INITED;
   1847 
   1848 	/* XXX doesn't check bounds. */
   1849 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
   1850 
   1851 	/* attach the pseudo device */
   1852 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
   1853 	cf->cf_name = raid_cd.cd_name;
   1854 	cf->cf_atname = raid_cd.cd_name;
   1855 	cf->cf_unit = unit;
   1856 	cf->cf_fstate = FSTATE_STAR;
   1857 
   1858 	rs->sc_dev = config_attach_pseudo(cf);
   1859 
   1860 	if (rs->sc_dev==NULL) {
   1861 		printf("raid%d: config_attach_pseudo failed\n",
   1862 		       raidPtr->raidid);
   1863 	}
   1864 
   1865 	/* disk_attach actually creates space for the CPU disklabel, among
   1866 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1867 	 * with disklabels. */
   1868 
   1869 	disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
   1870 	disk_attach(&rs->sc_dkdev);
   1871 
   1872 	/* XXX There may be a weird interaction here between this, and
   1873 	 * protectedSectors, as used in RAIDframe.  */
   1874 
   1875 	rs->sc_size = raidPtr->totalSectors;
   1876 
   1877 	dkwedge_discover(&rs->sc_dkdev);
   1878 
   1879 	rf_set_properties(rs, raidPtr);
   1880 
   1881 }
   1882 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   1883 /* wake up the daemon & tell it to get us a spare table
   1884  * XXX
   1885  * the entries in the queues should be tagged with the raidPtr
   1886  * so that in the extremely rare case that two recons happen at once,
   1887  * we know for which device were requesting a spare table
   1888  * XXX
   1889  *
   1890  * XXX This code is not currently used. GO
   1891  */
   1892 int
   1893 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
   1894 {
   1895 	int     retcode;
   1896 
   1897 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1898 	req->next = rf_sparet_wait_queue;
   1899 	rf_sparet_wait_queue = req;
   1900 	wakeup(&rf_sparet_wait_queue);
   1901 
   1902 	/* mpsleep unlocks the mutex */
   1903 	while (!rf_sparet_resp_queue) {
   1904 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1905 		    "raidframe getsparetable", 0);
   1906 	}
   1907 	req = rf_sparet_resp_queue;
   1908 	rf_sparet_resp_queue = req->next;
   1909 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1910 
   1911 	retcode = req->fcol;
   1912 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1913 					 * alloc'd */
   1914 	return (retcode);
   1915 }
   1916 #endif
   1917 
   1918 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1919  * bp & passes it down.
   1920  * any calls originating in the kernel must use non-blocking I/O
   1921  * do some extra sanity checking to return "appropriate" error values for
   1922  * certain conditions (to make some standard utilities work)
   1923  *
   1924  * Formerly known as: rf_DoAccessKernel
   1925  */
   1926 void
   1927 raidstart(RF_Raid_t *raidPtr)
   1928 {
   1929 	RF_SectorCount_t num_blocks, pb, sum;
   1930 	RF_RaidAddr_t raid_addr;
   1931 	struct partition *pp;
   1932 	daddr_t blocknum;
   1933 	int     unit;
   1934 	struct raid_softc *rs;
   1935 	int     do_async;
   1936 	struct buf *bp;
   1937 	int rc;
   1938 
   1939 	unit = raidPtr->raidid;
   1940 	rs = &raid_softc[unit];
   1941 
   1942 	/* quick check to see if anything has died recently */
   1943 	RF_LOCK_MUTEX(raidPtr->mutex);
   1944 	if (raidPtr->numNewFailures > 0) {
   1945 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1946 		rf_update_component_labels(raidPtr,
   1947 					   RF_NORMAL_COMPONENT_UPDATE);
   1948 		RF_LOCK_MUTEX(raidPtr->mutex);
   1949 		raidPtr->numNewFailures--;
   1950 	}
   1951 
   1952 	/* Check to see if we're at the limit... */
   1953 	while (raidPtr->openings > 0) {
   1954 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1955 
   1956 		/* get the next item, if any, from the queue */
   1957 		if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
   1958 			/* nothing more to do */
   1959 			return;
   1960 		}
   1961 
   1962 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1963 		 * partition.. Need to make it absolute to the underlying
   1964 		 * device.. */
   1965 
   1966 		blocknum = bp->b_blkno;
   1967 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1968 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1969 			blocknum += pp->p_offset;
   1970 		}
   1971 
   1972 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1973 			    (int) blocknum));
   1974 
   1975 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1976 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1977 
   1978 		/* *THIS* is where we adjust what block we're going to...
   1979 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1980 		raid_addr = blocknum;
   1981 
   1982 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1983 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1984 		sum = raid_addr + num_blocks + pb;
   1985 		if (1 || rf_debugKernelAccess) {
   1986 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1987 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1988 				    (int) pb, (int) bp->b_resid));
   1989 		}
   1990 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1991 		    || (sum < num_blocks) || (sum < pb)) {
   1992 			bp->b_error = ENOSPC;
   1993 			bp->b_resid = bp->b_bcount;
   1994 			biodone(bp);
   1995 			RF_LOCK_MUTEX(raidPtr->mutex);
   1996 			continue;
   1997 		}
   1998 		/*
   1999 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   2000 		 */
   2001 
   2002 		if (bp->b_bcount & raidPtr->sectorMask) {
   2003 			bp->b_error = EINVAL;
   2004 			bp->b_resid = bp->b_bcount;
   2005 			biodone(bp);
   2006 			RF_LOCK_MUTEX(raidPtr->mutex);
   2007 			continue;
   2008 
   2009 		}
   2010 		db1_printf(("Calling DoAccess..\n"));
   2011 
   2012 
   2013 		RF_LOCK_MUTEX(raidPtr->mutex);
   2014 		raidPtr->openings--;
   2015 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   2016 
   2017 		/*
   2018 		 * Everything is async.
   2019 		 */
   2020 		do_async = 1;
   2021 
   2022 		disk_busy(&rs->sc_dkdev);
   2023 
   2024 		/* XXX we're still at splbio() here... do we *really*
   2025 		   need to be? */
   2026 
   2027 		/* don't ever condition on bp->b_flags & B_WRITE.
   2028 		 * always condition on B_READ instead */
   2029 
   2030 		rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   2031 				 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   2032 				 do_async, raid_addr, num_blocks,
   2033 				 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   2034 
   2035 		if (rc) {
   2036 			bp->b_error = rc;
   2037 			bp->b_resid = bp->b_bcount;
   2038 			biodone(bp);
   2039 			/* continue loop */
   2040 		}
   2041 
   2042 		RF_LOCK_MUTEX(raidPtr->mutex);
   2043 	}
   2044 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   2045 }
   2046 
   2047 
   2048 
   2049 
   2050 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   2051 
   2052 int
   2053 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
   2054 {
   2055 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   2056 	struct buf *bp;
   2057 
   2058 	req->queue = queue;
   2059 
   2060 #if DIAGNOSTIC
   2061 	if (queue->raidPtr->raidid >= numraid) {
   2062 		printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
   2063 		    numraid);
   2064 		panic("Invalid Unit number in rf_DispatchKernelIO");
   2065 	}
   2066 #endif
   2067 
   2068 	bp = req->bp;
   2069 
   2070 	switch (req->type) {
   2071 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   2072 		/* XXX need to do something extra here.. */
   2073 		/* I'm leaving this in, as I've never actually seen it used,
   2074 		 * and I'd like folks to report it... GO */
   2075 		printf(("WAKEUP CALLED\n"));
   2076 		queue->numOutstanding++;
   2077 
   2078 		bp->b_flags = 0;
   2079 		bp->b_private = req;
   2080 
   2081 		KernelWakeupFunc(bp);
   2082 		break;
   2083 
   2084 	case RF_IO_TYPE_READ:
   2085 	case RF_IO_TYPE_WRITE:
   2086 #if RF_ACC_TRACE > 0
   2087 		if (req->tracerec) {
   2088 			RF_ETIMER_START(req->tracerec->timer);
   2089 		}
   2090 #endif
   2091 		InitBP(bp, queue->rf_cinfo->ci_vp,
   2092 		    op, queue->rf_cinfo->ci_dev,
   2093 		    req->sectorOffset, req->numSector,
   2094 		    req->buf, KernelWakeupFunc, (void *) req,
   2095 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   2096 
   2097 		if (rf_debugKernelAccess) {
   2098 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   2099 				(long) bp->b_blkno));
   2100 		}
   2101 		queue->numOutstanding++;
   2102 		queue->last_deq_sector = req->sectorOffset;
   2103 		/* acc wouldn't have been let in if there were any pending
   2104 		 * reqs at any other priority */
   2105 		queue->curPriority = req->priority;
   2106 
   2107 		db1_printf(("Going for %c to unit %d col %d\n",
   2108 			    req->type, queue->raidPtr->raidid,
   2109 			    queue->col));
   2110 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   2111 			(int) req->sectorOffset, (int) req->numSector,
   2112 			(int) (req->numSector <<
   2113 			    queue->raidPtr->logBytesPerSector),
   2114 			(int) queue->raidPtr->logBytesPerSector));
   2115 		VOP_STRATEGY(bp->b_vp, bp);
   2116 
   2117 		break;
   2118 
   2119 	default:
   2120 		panic("bad req->type in rf_DispatchKernelIO");
   2121 	}
   2122 	db1_printf(("Exiting from DispatchKernelIO\n"));
   2123 
   2124 	return (0);
   2125 }
   2126 /* this is the callback function associated with a I/O invoked from
   2127    kernel code.
   2128  */
   2129 static void
   2130 KernelWakeupFunc(struct buf *bp)
   2131 {
   2132 	RF_DiskQueueData_t *req = NULL;
   2133 	RF_DiskQueue_t *queue;
   2134 	int s;
   2135 
   2136 	s = splbio();
   2137 	db1_printf(("recovering the request queue:\n"));
   2138 	req = bp->b_private;
   2139 
   2140 	queue = (RF_DiskQueue_t *) req->queue;
   2141 
   2142 #if RF_ACC_TRACE > 0
   2143 	if (req->tracerec) {
   2144 		RF_ETIMER_STOP(req->tracerec->timer);
   2145 		RF_ETIMER_EVAL(req->tracerec->timer);
   2146 		RF_LOCK_MUTEX(rf_tracing_mutex);
   2147 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2148 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2149 		req->tracerec->num_phys_ios++;
   2150 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   2151 	}
   2152 #endif
   2153 
   2154 	/* XXX Ok, let's get aggressive... If b_error is set, let's go
   2155 	 * ballistic, and mark the component as hosed... */
   2156 
   2157 	if (bp->b_error != 0) {
   2158 		/* Mark the disk as dead */
   2159 		/* but only mark it once... */
   2160 		/* and only if it wouldn't leave this RAID set
   2161 		   completely broken */
   2162 		if (((queue->raidPtr->Disks[queue->col].status ==
   2163 		      rf_ds_optimal) ||
   2164 		     (queue->raidPtr->Disks[queue->col].status ==
   2165 		      rf_ds_used_spare)) &&
   2166 		     (queue->raidPtr->numFailures <
   2167 		      queue->raidPtr->Layout.map->faultsTolerated)) {
   2168 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   2169 			       queue->raidPtr->raidid,
   2170 			       queue->raidPtr->Disks[queue->col].devname);
   2171 			queue->raidPtr->Disks[queue->col].status =
   2172 			    rf_ds_failed;
   2173 			queue->raidPtr->status = rf_rs_degraded;
   2174 			queue->raidPtr->numFailures++;
   2175 			queue->raidPtr->numNewFailures++;
   2176 		} else {	/* Disk is already dead... */
   2177 			/* printf("Disk already marked as dead!\n"); */
   2178 		}
   2179 
   2180 	}
   2181 
   2182 	/* Fill in the error value */
   2183 
   2184 	req->error = bp->b_error;
   2185 
   2186 	simple_lock(&queue->raidPtr->iodone_lock);
   2187 
   2188 	/* Drop this one on the "finished" queue... */
   2189 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   2190 
   2191 	/* Let the raidio thread know there is work to be done. */
   2192 	wakeup(&(queue->raidPtr->iodone));
   2193 
   2194 	simple_unlock(&queue->raidPtr->iodone_lock);
   2195 
   2196 	splx(s);
   2197 }
   2198 
   2199 
   2200 
   2201 /*
   2202  * initialize a buf structure for doing an I/O in the kernel.
   2203  */
   2204 static void
   2205 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
   2206        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
   2207        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
   2208        struct proc *b_proc)
   2209 {
   2210 	/* bp->b_flags       = B_PHYS | rw_flag; */
   2211 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   2212 	bp->b_bcount = numSect << logBytesPerSector;
   2213 	bp->b_bufsize = bp->b_bcount;
   2214 	bp->b_error = 0;
   2215 	bp->b_dev = dev;
   2216 	bp->b_data = bf;
   2217 	bp->b_blkno = startSect;
   2218 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   2219 	if (bp->b_bcount == 0) {
   2220 		panic("bp->b_bcount is zero in InitBP!!");
   2221 	}
   2222 	bp->b_proc = b_proc;
   2223 	bp->b_iodone = cbFunc;
   2224 	bp->b_private = cbArg;
   2225 	bp->b_vp = b_vp;
   2226 	if ((bp->b_flags & B_READ) == 0) {
   2227 		bp->b_vp->v_numoutput++;
   2228 	}
   2229 
   2230 }
   2231 
   2232 static void
   2233 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
   2234 		    struct disklabel *lp)
   2235 {
   2236 	memset(lp, 0, sizeof(*lp));
   2237 
   2238 	/* fabricate a label... */
   2239 	lp->d_secperunit = raidPtr->totalSectors;
   2240 	lp->d_secsize = raidPtr->bytesPerSector;
   2241 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2242 	lp->d_ntracks = 4 * raidPtr->numCol;
   2243 	lp->d_ncylinders = raidPtr->totalSectors /
   2244 		(lp->d_nsectors * lp->d_ntracks);
   2245 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2246 
   2247 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2248 	lp->d_type = DTYPE_RAID;
   2249 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2250 	lp->d_rpm = 3600;
   2251 	lp->d_interleave = 1;
   2252 	lp->d_flags = 0;
   2253 
   2254 	lp->d_partitions[RAW_PART].p_offset = 0;
   2255 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2256 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2257 	lp->d_npartitions = RAW_PART + 1;
   2258 
   2259 	lp->d_magic = DISKMAGIC;
   2260 	lp->d_magic2 = DISKMAGIC;
   2261 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2262 
   2263 }
   2264 /*
   2265  * Read the disklabel from the raid device.  If one is not present, fake one
   2266  * up.
   2267  */
   2268 static void
   2269 raidgetdisklabel(dev_t dev)
   2270 {
   2271 	int     unit = raidunit(dev);
   2272 	struct raid_softc *rs = &raid_softc[unit];
   2273 	const char   *errstring;
   2274 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2275 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2276 	RF_Raid_t *raidPtr;
   2277 
   2278 	db1_printf(("Getting the disklabel...\n"));
   2279 
   2280 	memset(clp, 0, sizeof(*clp));
   2281 
   2282 	raidPtr = raidPtrs[unit];
   2283 
   2284 	raidgetdefaultlabel(raidPtr, rs, lp);
   2285 
   2286 	/*
   2287 	 * Call the generic disklabel extraction routine.
   2288 	 */
   2289 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2290 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2291 	if (errstring)
   2292 		raidmakedisklabel(rs);
   2293 	else {
   2294 		int     i;
   2295 		struct partition *pp;
   2296 
   2297 		/*
   2298 		 * Sanity check whether the found disklabel is valid.
   2299 		 *
   2300 		 * This is necessary since total size of the raid device
   2301 		 * may vary when an interleave is changed even though exactly
   2302 		 * same components are used, and old disklabel may used
   2303 		 * if that is found.
   2304 		 */
   2305 		if (lp->d_secperunit != rs->sc_size)
   2306 			printf("raid%d: WARNING: %s: "
   2307 			    "total sector size in disklabel (%d) != "
   2308 			    "the size of raid (%ld)\n", unit, rs->sc_xname,
   2309 			    lp->d_secperunit, (long) rs->sc_size);
   2310 		for (i = 0; i < lp->d_npartitions; i++) {
   2311 			pp = &lp->d_partitions[i];
   2312 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2313 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2314 				       "exceeds the size of raid (%ld)\n",
   2315 				       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2316 		}
   2317 	}
   2318 
   2319 }
   2320 /*
   2321  * Take care of things one might want to take care of in the event
   2322  * that a disklabel isn't present.
   2323  */
   2324 static void
   2325 raidmakedisklabel(struct raid_softc *rs)
   2326 {
   2327 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2328 	db1_printf(("Making a label..\n"));
   2329 
   2330 	/*
   2331 	 * For historical reasons, if there's no disklabel present
   2332 	 * the raw partition must be marked FS_BSDFFS.
   2333 	 */
   2334 
   2335 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2336 
   2337 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2338 
   2339 	lp->d_checksum = dkcksum(lp);
   2340 }
   2341 /*
   2342  * Wait interruptibly for an exclusive lock.
   2343  *
   2344  * XXX
   2345  * Several drivers do this; it should be abstracted and made MP-safe.
   2346  * (Hmm... where have we seen this warning before :->  GO )
   2347  */
   2348 static int
   2349 raidlock(struct raid_softc *rs)
   2350 {
   2351 	int     error;
   2352 
   2353 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2354 		rs->sc_flags |= RAIDF_WANTED;
   2355 		if ((error =
   2356 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2357 			return (error);
   2358 	}
   2359 	rs->sc_flags |= RAIDF_LOCKED;
   2360 	return (0);
   2361 }
   2362 /*
   2363  * Unlock and wake up any waiters.
   2364  */
   2365 static void
   2366 raidunlock(struct raid_softc *rs)
   2367 {
   2368 
   2369 	rs->sc_flags &= ~RAIDF_LOCKED;
   2370 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2371 		rs->sc_flags &= ~RAIDF_WANTED;
   2372 		wakeup(rs);
   2373 	}
   2374 }
   2375 
   2376 
   2377 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2378 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2379 
   2380 int
   2381 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2382 {
   2383 	RF_ComponentLabel_t clabel;
   2384 	raidread_component_label(dev, b_vp, &clabel);
   2385 	clabel.mod_counter = mod_counter;
   2386 	clabel.clean = RF_RAID_CLEAN;
   2387 	raidwrite_component_label(dev, b_vp, &clabel);
   2388 	return(0);
   2389 }
   2390 
   2391 
   2392 int
   2393 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2394 {
   2395 	RF_ComponentLabel_t clabel;
   2396 	raidread_component_label(dev, b_vp, &clabel);
   2397 	clabel.mod_counter = mod_counter;
   2398 	clabel.clean = RF_RAID_DIRTY;
   2399 	raidwrite_component_label(dev, b_vp, &clabel);
   2400 	return(0);
   2401 }
   2402 
   2403 /* ARGSUSED */
   2404 int
   2405 raidread_component_label(dev_t dev, struct vnode *b_vp,
   2406 			 RF_ComponentLabel_t *clabel)
   2407 {
   2408 	struct buf *bp;
   2409 	const struct bdevsw *bdev;
   2410 	int error;
   2411 
   2412 	/* XXX should probably ensure that we don't try to do this if
   2413 	   someone has changed rf_protected_sectors. */
   2414 
   2415 	if (b_vp == NULL) {
   2416 		/* For whatever reason, this component is not valid.
   2417 		   Don't try to read a component label from it. */
   2418 		return(EINVAL);
   2419 	}
   2420 
   2421 	/* get a block of the appropriate size... */
   2422 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2423 	bp->b_dev = dev;
   2424 
   2425 	/* get our ducks in a row for the read */
   2426 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2427 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2428 	bp->b_flags |= B_READ;
   2429  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2430 
   2431 	bdev = bdevsw_lookup(bp->b_dev);
   2432 	if (bdev == NULL)
   2433 		return (ENXIO);
   2434 	(*bdev->d_strategy)(bp);
   2435 
   2436 	error = biowait(bp);
   2437 
   2438 	if (!error) {
   2439 		memcpy(clabel, bp->b_data,
   2440 		       sizeof(RF_ComponentLabel_t));
   2441 	}
   2442 
   2443 	brelse(bp, 0);
   2444 	return(error);
   2445 }
   2446 /* ARGSUSED */
   2447 int
   2448 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
   2449 			  RF_ComponentLabel_t *clabel)
   2450 {
   2451 	struct buf *bp;
   2452 	const struct bdevsw *bdev;
   2453 	int error;
   2454 
   2455 	/* get a block of the appropriate size... */
   2456 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2457 	bp->b_dev = dev;
   2458 
   2459 	/* get our ducks in a row for the write */
   2460 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2461 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2462 	bp->b_flags |= B_WRITE;
   2463  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2464 
   2465 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2466 
   2467 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2468 
   2469 	bdev = bdevsw_lookup(bp->b_dev);
   2470 	if (bdev == NULL)
   2471 		return (ENXIO);
   2472 	(*bdev->d_strategy)(bp);
   2473 	error = biowait(bp);
   2474 	brelse(bp, 0);
   2475 	if (error) {
   2476 #if 1
   2477 		printf("Failed to write RAID component info!\n");
   2478 #endif
   2479 	}
   2480 
   2481 	return(error);
   2482 }
   2483 
   2484 void
   2485 rf_markalldirty(RF_Raid_t *raidPtr)
   2486 {
   2487 	RF_ComponentLabel_t clabel;
   2488 	int sparecol;
   2489 	int c;
   2490 	int j;
   2491 	int scol = -1;
   2492 
   2493 	raidPtr->mod_counter++;
   2494 	for (c = 0; c < raidPtr->numCol; c++) {
   2495 		/* we don't want to touch (at all) a disk that has
   2496 		   failed */
   2497 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   2498 			raidread_component_label(
   2499 						 raidPtr->Disks[c].dev,
   2500 						 raidPtr->raid_cinfo[c].ci_vp,
   2501 						 &clabel);
   2502 			if (clabel.status == rf_ds_spared) {
   2503 				/* XXX do something special...
   2504 				   but whatever you do, don't
   2505 				   try to access it!! */
   2506 			} else {
   2507 				raidmarkdirty(
   2508 					      raidPtr->Disks[c].dev,
   2509 					      raidPtr->raid_cinfo[c].ci_vp,
   2510 					      raidPtr->mod_counter);
   2511 			}
   2512 		}
   2513 	}
   2514 
   2515 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2516 		sparecol = raidPtr->numCol + c;
   2517 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2518 			/*
   2519 
   2520 			   we claim this disk is "optimal" if it's
   2521 			   rf_ds_used_spare, as that means it should be
   2522 			   directly substitutable for the disk it replaced.
   2523 			   We note that too...
   2524 
   2525 			 */
   2526 
   2527 			for(j=0;j<raidPtr->numCol;j++) {
   2528 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2529 					scol = j;
   2530 					break;
   2531 				}
   2532 			}
   2533 
   2534 			raidread_component_label(
   2535 				 raidPtr->Disks[sparecol].dev,
   2536 				 raidPtr->raid_cinfo[sparecol].ci_vp,
   2537 				 &clabel);
   2538 			/* make sure status is noted */
   2539 
   2540 			raid_init_component_label(raidPtr, &clabel);
   2541 
   2542 			clabel.row = 0;
   2543 			clabel.column = scol;
   2544 			/* Note: we *don't* change status from rf_ds_used_spare
   2545 			   to rf_ds_optimal */
   2546 			/* clabel.status = rf_ds_optimal; */
   2547 
   2548 			raidmarkdirty(raidPtr->Disks[sparecol].dev,
   2549 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2550 				      raidPtr->mod_counter);
   2551 		}
   2552 	}
   2553 }
   2554 
   2555 
   2556 void
   2557 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
   2558 {
   2559 	RF_ComponentLabel_t clabel;
   2560 	int sparecol;
   2561 	int c;
   2562 	int j;
   2563 	int scol;
   2564 
   2565 	scol = -1;
   2566 
   2567 	/* XXX should do extra checks to make sure things really are clean,
   2568 	   rather than blindly setting the clean bit... */
   2569 
   2570 	raidPtr->mod_counter++;
   2571 
   2572 	for (c = 0; c < raidPtr->numCol; c++) {
   2573 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   2574 			raidread_component_label(
   2575 						 raidPtr->Disks[c].dev,
   2576 						 raidPtr->raid_cinfo[c].ci_vp,
   2577 						 &clabel);
   2578 			/* make sure status is noted */
   2579 			clabel.status = rf_ds_optimal;
   2580 
   2581 			/* bump the counter */
   2582 			clabel.mod_counter = raidPtr->mod_counter;
   2583 
   2584 			/* note what unit we are configured as */
   2585 			clabel.last_unit = raidPtr->raidid;
   2586 
   2587 			raidwrite_component_label(
   2588 						  raidPtr->Disks[c].dev,
   2589 						  raidPtr->raid_cinfo[c].ci_vp,
   2590 						  &clabel);
   2591 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2592 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2593 					raidmarkclean(
   2594 						      raidPtr->Disks[c].dev,
   2595 						      raidPtr->raid_cinfo[c].ci_vp,
   2596 						      raidPtr->mod_counter);
   2597 				}
   2598 			}
   2599 		}
   2600 		/* else we don't touch it.. */
   2601 	}
   2602 
   2603 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2604 		sparecol = raidPtr->numCol + c;
   2605 		/* Need to ensure that the reconstruct actually completed! */
   2606 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2607 			/*
   2608 
   2609 			   we claim this disk is "optimal" if it's
   2610 			   rf_ds_used_spare, as that means it should be
   2611 			   directly substitutable for the disk it replaced.
   2612 			   We note that too...
   2613 
   2614 			 */
   2615 
   2616 			for(j=0;j<raidPtr->numCol;j++) {
   2617 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2618 					scol = j;
   2619 					break;
   2620 				}
   2621 			}
   2622 
   2623 			/* XXX shouldn't *really* need this... */
   2624 			raidread_component_label(
   2625 				      raidPtr->Disks[sparecol].dev,
   2626 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2627 				      &clabel);
   2628 			/* make sure status is noted */
   2629 
   2630 			raid_init_component_label(raidPtr, &clabel);
   2631 
   2632 			clabel.mod_counter = raidPtr->mod_counter;
   2633 			clabel.column = scol;
   2634 			clabel.status = rf_ds_optimal;
   2635 			clabel.last_unit = raidPtr->raidid;
   2636 
   2637 			raidwrite_component_label(
   2638 				      raidPtr->Disks[sparecol].dev,
   2639 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2640 				      &clabel);
   2641 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2642 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2643 					raidmarkclean( raidPtr->Disks[sparecol].dev,
   2644 						       raidPtr->raid_cinfo[sparecol].ci_vp,
   2645 						       raidPtr->mod_counter);
   2646 				}
   2647 			}
   2648 		}
   2649 	}
   2650 }
   2651 
   2652 void
   2653 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
   2654 {
   2655 
   2656 	if (vp != NULL) {
   2657 		if (auto_configured == 1) {
   2658 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2659 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2660 			vput(vp);
   2661 
   2662 		} else {
   2663 			(void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred, curlwp);
   2664 		}
   2665 	}
   2666 }
   2667 
   2668 
   2669 void
   2670 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
   2671 {
   2672 	int r,c;
   2673 	struct vnode *vp;
   2674 	int acd;
   2675 
   2676 
   2677 	/* We take this opportunity to close the vnodes like we should.. */
   2678 
   2679 	for (c = 0; c < raidPtr->numCol; c++) {
   2680 		vp = raidPtr->raid_cinfo[c].ci_vp;
   2681 		acd = raidPtr->Disks[c].auto_configured;
   2682 		rf_close_component(raidPtr, vp, acd);
   2683 		raidPtr->raid_cinfo[c].ci_vp = NULL;
   2684 		raidPtr->Disks[c].auto_configured = 0;
   2685 	}
   2686 
   2687 	for (r = 0; r < raidPtr->numSpare; r++) {
   2688 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
   2689 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
   2690 		rf_close_component(raidPtr, vp, acd);
   2691 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
   2692 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
   2693 	}
   2694 }
   2695 
   2696 
   2697 void
   2698 rf_ReconThread(struct rf_recon_req *req)
   2699 {
   2700 	int     s;
   2701 	RF_Raid_t *raidPtr;
   2702 
   2703 	s = splbio();
   2704 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2705 	raidPtr->recon_in_progress = 1;
   2706 
   2707 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
   2708 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2709 
   2710 	RF_Free(req, sizeof(*req));
   2711 
   2712 	raidPtr->recon_in_progress = 0;
   2713 	splx(s);
   2714 
   2715 	/* That's all... */
   2716 	kthread_exit(0);	/* does not return */
   2717 }
   2718 
   2719 void
   2720 rf_RewriteParityThread(RF_Raid_t *raidPtr)
   2721 {
   2722 	int retcode;
   2723 	int s;
   2724 
   2725 	raidPtr->parity_rewrite_stripes_done = 0;
   2726 	raidPtr->parity_rewrite_in_progress = 1;
   2727 	s = splbio();
   2728 	retcode = rf_RewriteParity(raidPtr);
   2729 	splx(s);
   2730 	if (retcode) {
   2731 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2732 	} else {
   2733 		/* set the clean bit!  If we shutdown correctly,
   2734 		   the clean bit on each component label will get
   2735 		   set */
   2736 		raidPtr->parity_good = RF_RAID_CLEAN;
   2737 	}
   2738 	raidPtr->parity_rewrite_in_progress = 0;
   2739 
   2740 	/* Anyone waiting for us to stop?  If so, inform them... */
   2741 	if (raidPtr->waitShutdown) {
   2742 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2743 	}
   2744 
   2745 	/* That's all... */
   2746 	kthread_exit(0);	/* does not return */
   2747 }
   2748 
   2749 
   2750 void
   2751 rf_CopybackThread(RF_Raid_t *raidPtr)
   2752 {
   2753 	int s;
   2754 
   2755 	raidPtr->copyback_in_progress = 1;
   2756 	s = splbio();
   2757 	rf_CopybackReconstructedData(raidPtr);
   2758 	splx(s);
   2759 	raidPtr->copyback_in_progress = 0;
   2760 
   2761 	/* That's all... */
   2762 	kthread_exit(0);	/* does not return */
   2763 }
   2764 
   2765 
   2766 void
   2767 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
   2768 {
   2769 	int s;
   2770 	RF_Raid_t *raidPtr;
   2771 
   2772 	s = splbio();
   2773 	raidPtr = req->raidPtr;
   2774 	raidPtr->recon_in_progress = 1;
   2775 	rf_ReconstructInPlace(raidPtr, req->col);
   2776 	RF_Free(req, sizeof(*req));
   2777 	raidPtr->recon_in_progress = 0;
   2778 	splx(s);
   2779 
   2780 	/* That's all... */
   2781 	kthread_exit(0);	/* does not return */
   2782 }
   2783 
   2784 static RF_AutoConfig_t *
   2785 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
   2786     const char *cname, RF_SectorCount_t size)
   2787 {
   2788 	int good_one = 0;
   2789 	RF_ComponentLabel_t *clabel;
   2790 	RF_AutoConfig_t *ac;
   2791 
   2792 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
   2793 	if (clabel == NULL) {
   2794 oomem:
   2795 		    while(ac_list) {
   2796 			    ac = ac_list;
   2797 			    if (ac->clabel)
   2798 				    free(ac->clabel, M_RAIDFRAME);
   2799 			    ac_list = ac_list->next;
   2800 			    free(ac, M_RAIDFRAME);
   2801 		    }
   2802 		    printf("RAID auto config: out of memory!\n");
   2803 		    return NULL; /* XXX probably should panic? */
   2804 	}
   2805 
   2806 	if (!raidread_component_label(dev, vp, clabel)) {
   2807 		    /* Got the label.  Does it look reasonable? */
   2808 		    if (rf_reasonable_label(clabel) &&
   2809 			(clabel->partitionSize <= size)) {
   2810 #ifdef DEBUG
   2811 			    printf("Component on: %s: %llu\n",
   2812 				cname, (unsigned long long)size);
   2813 			    rf_print_component_label(clabel);
   2814 #endif
   2815 			    /* if it's reasonable, add it, else ignore it. */
   2816 			    ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
   2817 				M_NOWAIT);
   2818 			    if (ac == NULL) {
   2819 				    free(clabel, M_RAIDFRAME);
   2820 				    goto oomem;
   2821 			    }
   2822 			    strlcpy(ac->devname, cname, sizeof(ac->devname));
   2823 			    ac->dev = dev;
   2824 			    ac->vp = vp;
   2825 			    ac->clabel = clabel;
   2826 			    ac->next = ac_list;
   2827 			    ac_list = ac;
   2828 			    good_one = 1;
   2829 		    }
   2830 	}
   2831 	if (!good_one) {
   2832 		/* cleanup */
   2833 		free(clabel, M_RAIDFRAME);
   2834 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2835 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2836 		vput(vp);
   2837 	}
   2838 	return ac_list;
   2839 }
   2840 
   2841 RF_AutoConfig_t *
   2842 rf_find_raid_components()
   2843 {
   2844 	struct vnode *vp;
   2845 	struct disklabel label;
   2846 	struct device *dv;
   2847 	dev_t dev;
   2848 	int bmajor, bminor, wedge;
   2849 	int error;
   2850 	int i;
   2851 	RF_AutoConfig_t *ac_list;
   2852 
   2853 
   2854 	/* initialize the AutoConfig list */
   2855 	ac_list = NULL;
   2856 
   2857 	/* we begin by trolling through *all* the devices on the system */
   2858 
   2859 	for (dv = alldevs.tqh_first; dv != NULL;
   2860 	     dv = dv->dv_list.tqe_next) {
   2861 
   2862 		/* we are only interested in disks... */
   2863 		if (device_class(dv) != DV_DISK)
   2864 			continue;
   2865 
   2866 		/* we don't care about floppies... */
   2867 		if (device_is_a(dv, "fd")) {
   2868 			continue;
   2869 		}
   2870 
   2871 		/* we don't care about CD's... */
   2872 		if (device_is_a(dv, "cd")) {
   2873 			continue;
   2874 		}
   2875 
   2876 		/* hdfd is the Atari/Hades floppy driver */
   2877 		if (device_is_a(dv, "hdfd")) {
   2878 			continue;
   2879 		}
   2880 
   2881 		/* fdisa is the Atari/Milan floppy driver */
   2882 		if (device_is_a(dv, "fdisa")) {
   2883 			continue;
   2884 		}
   2885 
   2886 		/* need to find the device_name_to_block_device_major stuff */
   2887 		bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
   2888 
   2889 		/* get a vnode for the raw partition of this disk */
   2890 
   2891 		wedge = device_is_a(dv, "dk");
   2892 		bminor = minor(device_unit(dv));
   2893 		dev = wedge ? makedev(bmajor, bminor) :
   2894 		    MAKEDISKDEV(bmajor, bminor, RAW_PART);
   2895 		if (bdevvp(dev, &vp))
   2896 			panic("RAID can't alloc vnode");
   2897 
   2898 		error = VOP_OPEN(vp, FREAD, NOCRED);
   2899 
   2900 		if (error) {
   2901 			/* "Who cares."  Continue looking
   2902 			   for something that exists*/
   2903 			vput(vp);
   2904 			continue;
   2905 		}
   2906 
   2907 		if (wedge) {
   2908 			struct dkwedge_info dkw;
   2909 			error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
   2910 			    NOCRED);
   2911 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2912 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2913 			vput(vp);
   2914 			if (error) {
   2915 				printf("RAIDframe: can't get wedge info for "
   2916 				    "dev %s (%d)\n", dv->dv_xname, error);
   2917 				continue;
   2918 			}
   2919 
   2920 			if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0)
   2921 				continue;
   2922 
   2923 			ac_list = rf_get_component(ac_list, dev, vp,
   2924 			    dv->dv_xname, dkw.dkw_size);
   2925 			continue;
   2926 		}
   2927 
   2928 		/* Ok, the disk exists.  Go get the disklabel. */
   2929 		error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
   2930 		if (error) {
   2931 			/*
   2932 			 * XXX can't happen - open() would
   2933 			 * have errored out (or faked up one)
   2934 			 */
   2935 			if (error != ENOTTY)
   2936 				printf("RAIDframe: can't get label for dev "
   2937 				    "%s (%d)\n", dv->dv_xname, error);
   2938 		}
   2939 
   2940 		/* don't need this any more.  We'll allocate it again
   2941 		   a little later if we really do... */
   2942 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2943 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2944 		vput(vp);
   2945 
   2946 		if (error)
   2947 			continue;
   2948 
   2949 		for (i = 0; i < label.d_npartitions; i++) {
   2950 			char cname[sizeof(ac_list->devname)];
   2951 
   2952 			/* We only support partitions marked as RAID */
   2953 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2954 				continue;
   2955 
   2956 			dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
   2957 			if (bdevvp(dev, &vp))
   2958 				panic("RAID can't alloc vnode");
   2959 
   2960 			error = VOP_OPEN(vp, FREAD, NOCRED);
   2961 			if (error) {
   2962 				/* Whatever... */
   2963 				vput(vp);
   2964 				continue;
   2965 			}
   2966 			snprintf(cname, sizeof(cname), "%s%c",
   2967 			    dv->dv_xname, 'a' + i);
   2968 			ac_list = rf_get_component(ac_list, dev, vp, cname,
   2969 				label.d_partitions[i].p_size);
   2970 		}
   2971 	}
   2972 	return ac_list;
   2973 }
   2974 
   2975 
   2976 static int
   2977 rf_reasonable_label(RF_ComponentLabel_t *clabel)
   2978 {
   2979 
   2980 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2981 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2982 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2983 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2984 	    clabel->row >=0 &&
   2985 	    clabel->column >= 0 &&
   2986 	    clabel->num_rows > 0 &&
   2987 	    clabel->num_columns > 0 &&
   2988 	    clabel->row < clabel->num_rows &&
   2989 	    clabel->column < clabel->num_columns &&
   2990 	    clabel->blockSize > 0 &&
   2991 	    clabel->numBlocks > 0) {
   2992 		/* label looks reasonable enough... */
   2993 		return(1);
   2994 	}
   2995 	return(0);
   2996 }
   2997 
   2998 
   2999 #ifdef DEBUG
   3000 void
   3001 rf_print_component_label(RF_ComponentLabel_t *clabel)
   3002 {
   3003 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   3004 	       clabel->row, clabel->column,
   3005 	       clabel->num_rows, clabel->num_columns);
   3006 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   3007 	       clabel->version, clabel->serial_number,
   3008 	       clabel->mod_counter);
   3009 	printf("   Clean: %s Status: %d\n",
   3010 	       clabel->clean ? "Yes" : "No", clabel->status );
   3011 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   3012 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   3013 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   3014 	       (char) clabel->parityConfig, clabel->blockSize,
   3015 	       clabel->numBlocks);
   3016 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   3017 	printf("   Contains root partition: %s\n",
   3018 	       clabel->root_partition ? "Yes" : "No" );
   3019 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   3020 #if 0
   3021 	   printf("   Config order: %d\n", clabel->config_order);
   3022 #endif
   3023 
   3024 }
   3025 #endif
   3026 
   3027 RF_ConfigSet_t *
   3028 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
   3029 {
   3030 	RF_AutoConfig_t *ac;
   3031 	RF_ConfigSet_t *config_sets;
   3032 	RF_ConfigSet_t *cset;
   3033 	RF_AutoConfig_t *ac_next;
   3034 
   3035 
   3036 	config_sets = NULL;
   3037 
   3038 	/* Go through the AutoConfig list, and figure out which components
   3039 	   belong to what sets.  */
   3040 	ac = ac_list;
   3041 	while(ac!=NULL) {
   3042 		/* we're going to putz with ac->next, so save it here
   3043 		   for use at the end of the loop */
   3044 		ac_next = ac->next;
   3045 
   3046 		if (config_sets == NULL) {
   3047 			/* will need at least this one... */
   3048 			config_sets = (RF_ConfigSet_t *)
   3049 				malloc(sizeof(RF_ConfigSet_t),
   3050 				       M_RAIDFRAME, M_NOWAIT);
   3051 			if (config_sets == NULL) {
   3052 				panic("rf_create_auto_sets: No memory!");
   3053 			}
   3054 			/* this one is easy :) */
   3055 			config_sets->ac = ac;
   3056 			config_sets->next = NULL;
   3057 			config_sets->rootable = 0;
   3058 			ac->next = NULL;
   3059 		} else {
   3060 			/* which set does this component fit into? */
   3061 			cset = config_sets;
   3062 			while(cset!=NULL) {
   3063 				if (rf_does_it_fit(cset, ac)) {
   3064 					/* looks like it matches... */
   3065 					ac->next = cset->ac;
   3066 					cset->ac = ac;
   3067 					break;
   3068 				}
   3069 				cset = cset->next;
   3070 			}
   3071 			if (cset==NULL) {
   3072 				/* didn't find a match above... new set..*/
   3073 				cset = (RF_ConfigSet_t *)
   3074 					malloc(sizeof(RF_ConfigSet_t),
   3075 					       M_RAIDFRAME, M_NOWAIT);
   3076 				if (cset == NULL) {
   3077 					panic("rf_create_auto_sets: No memory!");
   3078 				}
   3079 				cset->ac = ac;
   3080 				ac->next = NULL;
   3081 				cset->next = config_sets;
   3082 				cset->rootable = 0;
   3083 				config_sets = cset;
   3084 			}
   3085 		}
   3086 		ac = ac_next;
   3087 	}
   3088 
   3089 
   3090 	return(config_sets);
   3091 }
   3092 
   3093 static int
   3094 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
   3095 {
   3096 	RF_ComponentLabel_t *clabel1, *clabel2;
   3097 
   3098 	/* If this one matches the *first* one in the set, that's good
   3099 	   enough, since the other members of the set would have been
   3100 	   through here too... */
   3101 	/* note that we are not checking partitionSize here..
   3102 
   3103 	   Note that we are also not checking the mod_counters here.
   3104 	   If everything else matches execpt the mod_counter, that's
   3105 	   good enough for this test.  We will deal with the mod_counters
   3106 	   a little later in the autoconfiguration process.
   3107 
   3108 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   3109 
   3110 	   The reason we don't check for this is that failed disks
   3111 	   will have lower modification counts.  If those disks are
   3112 	   not added to the set they used to belong to, then they will
   3113 	   form their own set, which may result in 2 different sets,
   3114 	   for example, competing to be configured at raid0, and
   3115 	   perhaps competing to be the root filesystem set.  If the
   3116 	   wrong ones get configured, or both attempt to become /,
   3117 	   weird behaviour and or serious lossage will occur.  Thus we
   3118 	   need to bring them into the fold here, and kick them out at
   3119 	   a later point.
   3120 
   3121 	*/
   3122 
   3123 	clabel1 = cset->ac->clabel;
   3124 	clabel2 = ac->clabel;
   3125 	if ((clabel1->version == clabel2->version) &&
   3126 	    (clabel1->serial_number == clabel2->serial_number) &&
   3127 	    (clabel1->num_rows == clabel2->num_rows) &&
   3128 	    (clabel1->num_columns == clabel2->num_columns) &&
   3129 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   3130 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   3131 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   3132 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   3133 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   3134 	    (clabel1->blockSize == clabel2->blockSize) &&
   3135 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   3136 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   3137 	    (clabel1->root_partition == clabel2->root_partition) &&
   3138 	    (clabel1->last_unit == clabel2->last_unit) &&
   3139 	    (clabel1->config_order == clabel2->config_order)) {
   3140 		/* if it get's here, it almost *has* to be a match */
   3141 	} else {
   3142 		/* it's not consistent with somebody in the set..
   3143 		   punt */
   3144 		return(0);
   3145 	}
   3146 	/* all was fine.. it must fit... */
   3147 	return(1);
   3148 }
   3149 
   3150 int
   3151 rf_have_enough_components(RF_ConfigSet_t *cset)
   3152 {
   3153 	RF_AutoConfig_t *ac;
   3154 	RF_AutoConfig_t *auto_config;
   3155 	RF_ComponentLabel_t *clabel;
   3156 	int c;
   3157 	int num_cols;
   3158 	int num_missing;
   3159 	int mod_counter;
   3160 	int mod_counter_found;
   3161 	int even_pair_failed;
   3162 	char parity_type;
   3163 
   3164 
   3165 	/* check to see that we have enough 'live' components
   3166 	   of this set.  If so, we can configure it if necessary */
   3167 
   3168 	num_cols = cset->ac->clabel->num_columns;
   3169 	parity_type = cset->ac->clabel->parityConfig;
   3170 
   3171 	/* XXX Check for duplicate components!?!?!? */
   3172 
   3173 	/* Determine what the mod_counter is supposed to be for this set. */
   3174 
   3175 	mod_counter_found = 0;
   3176 	mod_counter = 0;
   3177 	ac = cset->ac;
   3178 	while(ac!=NULL) {
   3179 		if (mod_counter_found==0) {
   3180 			mod_counter = ac->clabel->mod_counter;
   3181 			mod_counter_found = 1;
   3182 		} else {
   3183 			if (ac->clabel->mod_counter > mod_counter) {
   3184 				mod_counter = ac->clabel->mod_counter;
   3185 			}
   3186 		}
   3187 		ac = ac->next;
   3188 	}
   3189 
   3190 	num_missing = 0;
   3191 	auto_config = cset->ac;
   3192 
   3193 	even_pair_failed = 0;
   3194 	for(c=0; c<num_cols; c++) {
   3195 		ac = auto_config;
   3196 		while(ac!=NULL) {
   3197 			if ((ac->clabel->column == c) &&
   3198 			    (ac->clabel->mod_counter == mod_counter)) {
   3199 				/* it's this one... */
   3200 #ifdef DEBUG
   3201 				printf("Found: %s at %d\n",
   3202 				       ac->devname,c);
   3203 #endif
   3204 				break;
   3205 			}
   3206 			ac=ac->next;
   3207 		}
   3208 		if (ac==NULL) {
   3209 				/* Didn't find one here! */
   3210 				/* special case for RAID 1, especially
   3211 				   where there are more than 2
   3212 				   components (where RAIDframe treats
   3213 				   things a little differently :( ) */
   3214 			if (parity_type == '1') {
   3215 				if (c%2 == 0) { /* even component */
   3216 					even_pair_failed = 1;
   3217 				} else { /* odd component.  If
   3218 					    we're failed, and
   3219 					    so is the even
   3220 					    component, it's
   3221 					    "Good Night, Charlie" */
   3222 					if (even_pair_failed == 1) {
   3223 						return(0);
   3224 					}
   3225 				}
   3226 			} else {
   3227 				/* normal accounting */
   3228 				num_missing++;
   3229 			}
   3230 		}
   3231 		if ((parity_type == '1') && (c%2 == 1)) {
   3232 				/* Just did an even component, and we didn't
   3233 				   bail.. reset the even_pair_failed flag,
   3234 				   and go on to the next component.... */
   3235 			even_pair_failed = 0;
   3236 		}
   3237 	}
   3238 
   3239 	clabel = cset->ac->clabel;
   3240 
   3241 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3242 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3243 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3244 		/* XXX this needs to be made *much* more general */
   3245 		/* Too many failures */
   3246 		return(0);
   3247 	}
   3248 	/* otherwise, all is well, and we've got enough to take a kick
   3249 	   at autoconfiguring this set */
   3250 	return(1);
   3251 }
   3252 
   3253 void
   3254 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
   3255 			RF_Raid_t *raidPtr)
   3256 {
   3257 	RF_ComponentLabel_t *clabel;
   3258 	int i;
   3259 
   3260 	clabel = ac->clabel;
   3261 
   3262 	/* 1. Fill in the common stuff */
   3263 	config->numRow = clabel->num_rows = 1;
   3264 	config->numCol = clabel->num_columns;
   3265 	config->numSpare = 0; /* XXX should this be set here? */
   3266 	config->sectPerSU = clabel->sectPerSU;
   3267 	config->SUsPerPU = clabel->SUsPerPU;
   3268 	config->SUsPerRU = clabel->SUsPerRU;
   3269 	config->parityConfig = clabel->parityConfig;
   3270 	/* XXX... */
   3271 	strcpy(config->diskQueueType,"fifo");
   3272 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3273 	config->layoutSpecificSize = 0; /* XXX ?? */
   3274 
   3275 	while(ac!=NULL) {
   3276 		/* row/col values will be in range due to the checks
   3277 		   in reasonable_label() */
   3278 		strcpy(config->devnames[0][ac->clabel->column],
   3279 		       ac->devname);
   3280 		ac = ac->next;
   3281 	}
   3282 
   3283 	for(i=0;i<RF_MAXDBGV;i++) {
   3284 		config->debugVars[i][0] = 0;
   3285 	}
   3286 }
   3287 
   3288 int
   3289 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
   3290 {
   3291 	RF_ComponentLabel_t clabel;
   3292 	struct vnode *vp;
   3293 	dev_t dev;
   3294 	int column;
   3295 	int sparecol;
   3296 
   3297 	raidPtr->autoconfigure = new_value;
   3298 
   3299 	for(column=0; column<raidPtr->numCol; column++) {
   3300 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3301 			dev = raidPtr->Disks[column].dev;
   3302 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3303 			raidread_component_label(dev, vp, &clabel);
   3304 			clabel.autoconfigure = new_value;
   3305 			raidwrite_component_label(dev, vp, &clabel);
   3306 		}
   3307 	}
   3308 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3309 		sparecol = raidPtr->numCol + column;
   3310 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3311 			dev = raidPtr->Disks[sparecol].dev;
   3312 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3313 			raidread_component_label(dev, vp, &clabel);
   3314 			clabel.autoconfigure = new_value;
   3315 			raidwrite_component_label(dev, vp, &clabel);
   3316 		}
   3317 	}
   3318 	return(new_value);
   3319 }
   3320 
   3321 int
   3322 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
   3323 {
   3324 	RF_ComponentLabel_t clabel;
   3325 	struct vnode *vp;
   3326 	dev_t dev;
   3327 	int column;
   3328 	int sparecol;
   3329 
   3330 	raidPtr->root_partition = new_value;
   3331 	for(column=0; column<raidPtr->numCol; column++) {
   3332 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3333 			dev = raidPtr->Disks[column].dev;
   3334 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3335 			raidread_component_label(dev, vp, &clabel);
   3336 			clabel.root_partition = new_value;
   3337 			raidwrite_component_label(dev, vp, &clabel);
   3338 		}
   3339 	}
   3340 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3341 		sparecol = raidPtr->numCol + column;
   3342 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3343 			dev = raidPtr->Disks[sparecol].dev;
   3344 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3345 			raidread_component_label(dev, vp, &clabel);
   3346 			clabel.root_partition = new_value;
   3347 			raidwrite_component_label(dev, vp, &clabel);
   3348 		}
   3349 	}
   3350 	return(new_value);
   3351 }
   3352 
   3353 void
   3354 rf_release_all_vps(RF_ConfigSet_t *cset)
   3355 {
   3356 	RF_AutoConfig_t *ac;
   3357 
   3358 	ac = cset->ac;
   3359 	while(ac!=NULL) {
   3360 		/* Close the vp, and give it back */
   3361 		if (ac->vp) {
   3362 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3363 			VOP_CLOSE(ac->vp, FREAD, NOCRED);
   3364 			vput(ac->vp);
   3365 			ac->vp = NULL;
   3366 		}
   3367 		ac = ac->next;
   3368 	}
   3369 }
   3370 
   3371 
   3372 void
   3373 rf_cleanup_config_set(RF_ConfigSet_t *cset)
   3374 {
   3375 	RF_AutoConfig_t *ac;
   3376 	RF_AutoConfig_t *next_ac;
   3377 
   3378 	ac = cset->ac;
   3379 	while(ac!=NULL) {
   3380 		next_ac = ac->next;
   3381 		/* nuke the label */
   3382 		free(ac->clabel, M_RAIDFRAME);
   3383 		/* cleanup the config structure */
   3384 		free(ac, M_RAIDFRAME);
   3385 		/* "next.." */
   3386 		ac = next_ac;
   3387 	}
   3388 	/* and, finally, nuke the config set */
   3389 	free(cset, M_RAIDFRAME);
   3390 }
   3391 
   3392 
   3393 void
   3394 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   3395 {
   3396 	/* current version number */
   3397 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3398 	clabel->serial_number = raidPtr->serial_number;
   3399 	clabel->mod_counter = raidPtr->mod_counter;
   3400 	clabel->num_rows = 1;
   3401 	clabel->num_columns = raidPtr->numCol;
   3402 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3403 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3404 
   3405 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3406 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3407 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3408 
   3409 	clabel->blockSize = raidPtr->bytesPerSector;
   3410 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3411 
   3412 	/* XXX not portable */
   3413 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3414 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3415 	clabel->autoconfigure = raidPtr->autoconfigure;
   3416 	clabel->root_partition = raidPtr->root_partition;
   3417 	clabel->last_unit = raidPtr->raidid;
   3418 	clabel->config_order = raidPtr->config_order;
   3419 }
   3420 
   3421 int
   3422 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
   3423 {
   3424 	RF_Raid_t *raidPtr;
   3425 	RF_Config_t *config;
   3426 	int raidID;
   3427 	int retcode;
   3428 
   3429 #ifdef DEBUG
   3430 	printf("RAID autoconfigure\n");
   3431 #endif
   3432 
   3433 	retcode = 0;
   3434 	*unit = -1;
   3435 
   3436 	/* 1. Create a config structure */
   3437 
   3438 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3439 				       M_RAIDFRAME,
   3440 				       M_NOWAIT);
   3441 	if (config==NULL) {
   3442 		printf("Out of mem!?!?\n");
   3443 				/* XXX do something more intelligent here. */
   3444 		return(1);
   3445 	}
   3446 
   3447 	memset(config, 0, sizeof(RF_Config_t));
   3448 
   3449 	/*
   3450 	   2. Figure out what RAID ID this one is supposed to live at
   3451 	   See if we can get the same RAID dev that it was configured
   3452 	   on last time..
   3453 	*/
   3454 
   3455 	raidID = cset->ac->clabel->last_unit;
   3456 	if ((raidID < 0) || (raidID >= numraid)) {
   3457 		/* let's not wander off into lala land. */
   3458 		raidID = numraid - 1;
   3459 	}
   3460 	if (raidPtrs[raidID]->valid != 0) {
   3461 
   3462 		/*
   3463 		   Nope... Go looking for an alternative...
   3464 		   Start high so we don't immediately use raid0 if that's
   3465 		   not taken.
   3466 		*/
   3467 
   3468 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3469 			if (raidPtrs[raidID]->valid == 0) {
   3470 				/* can use this one! */
   3471 				break;
   3472 			}
   3473 		}
   3474 	}
   3475 
   3476 	if (raidID < 0) {
   3477 		/* punt... */
   3478 		printf("Unable to auto configure this set!\n");
   3479 		printf("(Out of RAID devs!)\n");
   3480 		free(config, M_RAIDFRAME);
   3481 		return(1);
   3482 	}
   3483 
   3484 #ifdef DEBUG
   3485 	printf("Configuring raid%d:\n",raidID);
   3486 #endif
   3487 
   3488 	raidPtr = raidPtrs[raidID];
   3489 
   3490 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3491 	raidPtr->raidid = raidID;
   3492 	raidPtr->openings = RAIDOUTSTANDING;
   3493 
   3494 	/* 3. Build the configuration structure */
   3495 	rf_create_configuration(cset->ac, config, raidPtr);
   3496 
   3497 	/* 4. Do the configuration */
   3498 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3499 
   3500 	if (retcode == 0) {
   3501 
   3502 		raidinit(raidPtrs[raidID]);
   3503 
   3504 		rf_markalldirty(raidPtrs[raidID]);
   3505 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3506 		if (cset->ac->clabel->root_partition==1) {
   3507 			/* everything configured just fine.  Make a note
   3508 			   that this set is eligible to be root. */
   3509 			cset->rootable = 1;
   3510 			/* XXX do this here? */
   3511 			raidPtrs[raidID]->root_partition = 1;
   3512 		}
   3513 	}
   3514 
   3515 	/* 5. Cleanup */
   3516 	free(config, M_RAIDFRAME);
   3517 
   3518 	*unit = raidID;
   3519 	return(retcode);
   3520 }
   3521 
   3522 void
   3523 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
   3524 {
   3525 	struct buf *bp;
   3526 
   3527 	bp = (struct buf *)desc->bp;
   3528 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3529 	    (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
   3530 }
   3531 
   3532 void
   3533 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
   3534 	     size_t xmin, size_t xmax)
   3535 {
   3536 	pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
   3537 	pool_sethiwat(p, xmax);
   3538 	pool_prime(p, xmin);
   3539 	pool_setlowat(p, xmin);
   3540 }
   3541 
   3542 /*
   3543  * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
   3544  * if there is IO pending and if that IO could possibly be done for a
   3545  * given RAID set.  Returns 0 if IO is waiting and can be done, 1
   3546  * otherwise.
   3547  *
   3548  */
   3549 
   3550 int
   3551 rf_buf_queue_check(int raidid)
   3552 {
   3553 	if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
   3554 	    raidPtrs[raidid]->openings > 0) {
   3555 		/* there is work to do */
   3556 		return 0;
   3557 	}
   3558 	/* default is nothing to do */
   3559 	return 1;
   3560 }
   3561 
   3562 int
   3563 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
   3564 {
   3565 	struct partinfo dpart;
   3566 	struct dkwedge_info dkw;
   3567 	int error;
   3568 
   3569 	error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred);
   3570 	if (error == 0) {
   3571 		diskPtr->blockSize = dpart.disklab->d_secsize;
   3572 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
   3573 		diskPtr->partitionSize = dpart.part->p_size;
   3574 		return 0;
   3575 	}
   3576 
   3577 	error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred);
   3578 	if (error == 0) {
   3579 		diskPtr->blockSize = 512;	/* XXX */
   3580 		diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
   3581 		diskPtr->partitionSize = dkw.dkw_size;
   3582 		return 0;
   3583 	}
   3584 	return error;
   3585 }
   3586 
   3587 static int
   3588 raid_match(struct device *self, struct cfdata *cfdata,
   3589     void *aux)
   3590 {
   3591 	return 1;
   3592 }
   3593 
   3594 static void
   3595 raid_attach(struct device *parent, struct device *self,
   3596     void *aux)
   3597 {
   3598 
   3599 }
   3600 
   3601 
   3602 static int
   3603 raid_detach(struct device *self, int flags)
   3604 {
   3605 	struct raid_softc *rs = (struct raid_softc *)self;
   3606 
   3607 	if (rs->sc_flags & RAIDF_INITED)
   3608 		return EBUSY;
   3609 
   3610 	return 0;
   3611 }
   3612 
   3613 static void
   3614 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
   3615 {
   3616 	prop_dictionary_t disk_info, odisk_info, geom;
   3617 	disk_info = prop_dictionary_create();
   3618 	geom = prop_dictionary_create();
   3619 	prop_dictionary_set_uint64(geom, "sectors-per-unit",
   3620 				   raidPtr->totalSectors);
   3621 	prop_dictionary_set_uint32(geom, "sector-size",
   3622 				   raidPtr->bytesPerSector);
   3623 
   3624 	prop_dictionary_set_uint16(geom, "sectors-per-track",
   3625 				   raidPtr->Layout.dataSectorsPerStripe);
   3626 	prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
   3627 				   4 * raidPtr->numCol);
   3628 
   3629 	prop_dictionary_set_uint64(geom, "cylinders-per-unit",
   3630 	   raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
   3631 	   (4 * raidPtr->numCol)));
   3632 
   3633 	prop_dictionary_set(disk_info, "geometry", geom);
   3634 	prop_object_release(geom);
   3635 	prop_dictionary_set(device_properties(rs->sc_dev),
   3636 			    "disk-info", disk_info);
   3637 	odisk_info = rs->sc_dkdev.dk_info;
   3638 	rs->sc_dkdev.dk_info = disk_info;
   3639 	if (odisk_info)
   3640 		prop_object_release(odisk_info);
   3641 }
   3642