Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.250.2.1
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.250.2.1 2009/01/19 13:19:02 skrll Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 /*
     32  * Copyright (c) 1990, 1993
     33  *      The Regents of the University of California.  All rights reserved.
     34  *
     35  * This code is derived from software contributed to Berkeley by
     36  * the Systems Programming Group of the University of Utah Computer
     37  * Science Department.
     38  *
     39  * Redistribution and use in source and binary forms, with or without
     40  * modification, are permitted provided that the following conditions
     41  * are met:
     42  * 1. Redistributions of source code must retain the above copyright
     43  *    notice, this list of conditions and the following disclaimer.
     44  * 2. Redistributions in binary form must reproduce the above copyright
     45  *    notice, this list of conditions and the following disclaimer in the
     46  *    documentation and/or other materials provided with the distribution.
     47  * 3. Neither the name of the University nor the names of its contributors
     48  *    may be used to endorse or promote products derived from this software
     49  *    without specific prior written permission.
     50  *
     51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     61  * SUCH DAMAGE.
     62  *
     63  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     64  *
     65  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     66  */
     67 
     68 /*
     69  * Copyright (c) 1988 University of Utah.
     70  *
     71  * This code is derived from software contributed to Berkeley by
     72  * the Systems Programming Group of the University of Utah Computer
     73  * Science Department.
     74  *
     75  * Redistribution and use in source and binary forms, with or without
     76  * modification, are permitted provided that the following conditions
     77  * are met:
     78  * 1. Redistributions of source code must retain the above copyright
     79  *    notice, this list of conditions and the following disclaimer.
     80  * 2. Redistributions in binary form must reproduce the above copyright
     81  *    notice, this list of conditions and the following disclaimer in the
     82  *    documentation and/or other materials provided with the distribution.
     83  * 3. All advertising materials mentioning features or use of this software
     84  *    must display the following acknowledgement:
     85  *      This product includes software developed by the University of
     86  *      California, Berkeley and its contributors.
     87  * 4. Neither the name of the University nor the names of its contributors
     88  *    may be used to endorse or promote products derived from this software
     89  *    without specific prior written permission.
     90  *
     91  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     92  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     93  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     94  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     95  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     96  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     97  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     98  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     99  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    100  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    101  * SUCH DAMAGE.
    102  *
    103  * from: Utah $Hdr: cd.c 1.6 90/11/28$
    104  *
    105  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
    106  */
    107 
    108 /*
    109  * Copyright (c) 1995 Carnegie-Mellon University.
    110  * All rights reserved.
    111  *
    112  * Authors: Mark Holland, Jim Zelenka
    113  *
    114  * Permission to use, copy, modify and distribute this software and
    115  * its documentation is hereby granted, provided that both the copyright
    116  * notice and this permission notice appear in all copies of the
    117  * software, derivative works or modified versions, and any portions
    118  * thereof, and that both notices appear in supporting documentation.
    119  *
    120  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
    121  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
    122  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
    123  *
    124  * Carnegie Mellon requests users of this software to return to
    125  *
    126  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    127  *  School of Computer Science
    128  *  Carnegie Mellon University
    129  *  Pittsburgh PA 15213-3890
    130  *
    131  * any improvements or extensions that they make and grant Carnegie the
    132  * rights to redistribute these changes.
    133  */
    134 
    135 /***********************************************************
    136  *
    137  * rf_kintf.c -- the kernel interface routines for RAIDframe
    138  *
    139  ***********************************************************/
    140 
    141 #include <sys/cdefs.h>
    142 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.250.2.1 2009/01/19 13:19:02 skrll Exp $");
    143 
    144 #ifdef _KERNEL_OPT
    145 #include "opt_compat_netbsd.h"
    146 #include "opt_raid_autoconfig.h"
    147 #include "raid.h"
    148 #endif
    149 
    150 #include <sys/param.h>
    151 #include <sys/errno.h>
    152 #include <sys/pool.h>
    153 #include <sys/proc.h>
    154 #include <sys/queue.h>
    155 #include <sys/disk.h>
    156 #include <sys/device.h>
    157 #include <sys/stat.h>
    158 #include <sys/ioctl.h>
    159 #include <sys/fcntl.h>
    160 #include <sys/systm.h>
    161 #include <sys/vnode.h>
    162 #include <sys/disklabel.h>
    163 #include <sys/conf.h>
    164 #include <sys/buf.h>
    165 #include <sys/bufq.h>
    166 #include <sys/user.h>
    167 #include <sys/reboot.h>
    168 #include <sys/kauth.h>
    169 
    170 #include <prop/proplib.h>
    171 
    172 #include <dev/raidframe/raidframevar.h>
    173 #include <dev/raidframe/raidframeio.h>
    174 
    175 #include "rf_raid.h"
    176 #include "rf_copyback.h"
    177 #include "rf_dag.h"
    178 #include "rf_dagflags.h"
    179 #include "rf_desc.h"
    180 #include "rf_diskqueue.h"
    181 #include "rf_etimer.h"
    182 #include "rf_general.h"
    183 #include "rf_kintf.h"
    184 #include "rf_options.h"
    185 #include "rf_driver.h"
    186 #include "rf_parityscan.h"
    187 #include "rf_threadstuff.h"
    188 
    189 #ifdef COMPAT_50
    190 #include "rf_compat50.h"
    191 #endif
    192 
    193 #ifdef DEBUG
    194 int     rf_kdebug_level = 0;
    195 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    196 #else				/* DEBUG */
    197 #define db1_printf(a) { }
    198 #endif				/* DEBUG */
    199 
    200 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    201 
    202 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
    203 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    204 
    205 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    206 						 * spare table */
    207 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    208 						 * installation process */
    209 #endif
    210 
    211 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
    212 
    213 /* prototypes */
    214 static void KernelWakeupFunc(struct buf *);
    215 static void InitBP(struct buf *, struct vnode *, unsigned,
    216     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
    217     void *, int, struct proc *);
    218 static void raidinit(RF_Raid_t *);
    219 
    220 void raidattach(int);
    221 static int raid_match(struct device *, struct cfdata *, void *);
    222 static void raid_attach(struct device *, struct device *, void *);
    223 static int raid_detach(struct device *, int);
    224 
    225 dev_type_open(raidopen);
    226 dev_type_close(raidclose);
    227 dev_type_read(raidread);
    228 dev_type_write(raidwrite);
    229 dev_type_ioctl(raidioctl);
    230 dev_type_strategy(raidstrategy);
    231 dev_type_dump(raiddump);
    232 dev_type_size(raidsize);
    233 
    234 const struct bdevsw raid_bdevsw = {
    235 	raidopen, raidclose, raidstrategy, raidioctl,
    236 	raiddump, raidsize, D_DISK
    237 };
    238 
    239 const struct cdevsw raid_cdevsw = {
    240 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    241 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
    242 };
    243 
    244 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
    245 
    246 /* XXX Not sure if the following should be replacing the raidPtrs above,
    247    or if it should be used in conjunction with that...
    248 */
    249 
    250 struct raid_softc {
    251 	struct device *sc_dev;
    252 	int     sc_flags;	/* flags */
    253 	int     sc_cflags;	/* configuration flags */
    254 	uint64_t sc_size;	/* size of the raid device */
    255 	char    sc_xname[20];	/* XXX external name */
    256 	struct disk sc_dkdev;	/* generic disk device info */
    257 	struct bufq_state *buf_queue;	/* used for the device queue */
    258 };
    259 /* sc_flags */
    260 #define RAIDF_INITED	0x01	/* unit has been initialized */
    261 #define RAIDF_WLABEL	0x02	/* label area is writable */
    262 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    263 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    264 #define RAIDF_LOCKED	0x80	/* unit is locked */
    265 
    266 #define	raidunit(x)	DISKUNIT(x)
    267 int numraid = 0;
    268 
    269 extern struct cfdriver raid_cd;
    270 CFATTACH_DECL_NEW(raid, sizeof(struct raid_softc),
    271     raid_match, raid_attach, raid_detach, NULL);
    272 
    273 /*
    274  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    275  * Be aware that large numbers can allow the driver to consume a lot of
    276  * kernel memory, especially on writes, and in degraded mode reads.
    277  *
    278  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    279  * a single 64K write will typically require 64K for the old data,
    280  * 64K for the old parity, and 64K for the new parity, for a total
    281  * of 192K (if the parity buffer is not re-used immediately).
    282  * Even it if is used immediately, that's still 128K, which when multiplied
    283  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    284  *
    285  * Now in degraded mode, for example, a 64K read on the above setup may
    286  * require data reconstruction, which will require *all* of the 4 remaining
    287  * disks to participate -- 4 * 32K/disk == 128K again.
    288  */
    289 
    290 #ifndef RAIDOUTSTANDING
    291 #define RAIDOUTSTANDING   6
    292 #endif
    293 
    294 #define RAIDLABELDEV(dev)	\
    295 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    296 
    297 /* declared here, and made public, for the benefit of KVM stuff.. */
    298 struct raid_softc *raid_softc;
    299 
    300 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    301 				     struct disklabel *);
    302 static void raidgetdisklabel(dev_t);
    303 static void raidmakedisklabel(struct raid_softc *);
    304 
    305 static int raidlock(struct raid_softc *);
    306 static void raidunlock(struct raid_softc *);
    307 
    308 static void rf_markalldirty(RF_Raid_t *);
    309 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
    310 
    311 void rf_ReconThread(struct rf_recon_req *);
    312 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    313 void rf_CopybackThread(RF_Raid_t *raidPtr);
    314 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    315 int rf_autoconfig(struct device *self);
    316 void rf_buildroothack(RF_ConfigSet_t *);
    317 
    318 RF_AutoConfig_t *rf_find_raid_components(void);
    319 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    320 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    321 static int rf_reasonable_label(RF_ComponentLabel_t *);
    322 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    323 int rf_set_autoconfig(RF_Raid_t *, int);
    324 int rf_set_rootpartition(RF_Raid_t *, int);
    325 void rf_release_all_vps(RF_ConfigSet_t *);
    326 void rf_cleanup_config_set(RF_ConfigSet_t *);
    327 int rf_have_enough_components(RF_ConfigSet_t *);
    328 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    329 static int rf_sync_component_caches(RF_Raid_t *raidPtr);
    330 
    331 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    332 				  allow autoconfig to take place.
    333 				  Note that this is overridden by having
    334 				  RAID_AUTOCONFIG as an option in the
    335 				  kernel config file.  */
    336 
    337 struct RF_Pools_s rf_pools;
    338 
    339 void
    340 raidattach(int num)
    341 {
    342 	int raidID;
    343 	int i, rc;
    344 
    345 #ifdef DEBUG
    346 	printf("raidattach: Asked for %d units\n", num);
    347 #endif
    348 
    349 	if (num <= 0) {
    350 #ifdef DIAGNOSTIC
    351 		panic("raidattach: count <= 0");
    352 #endif
    353 		return;
    354 	}
    355 	/* This is where all the initialization stuff gets done. */
    356 
    357 	numraid = num;
    358 
    359 	/* Make some space for requested number of units... */
    360 
    361 	RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
    362 	if (raidPtrs == NULL) {
    363 		panic("raidPtrs is NULL!!");
    364 	}
    365 
    366 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
    367 	rf_mutex_init(&rf_sparet_wait_mutex);
    368 
    369 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    370 #endif
    371 
    372 	for (i = 0; i < num; i++)
    373 		raidPtrs[i] = NULL;
    374 	rc = rf_BootRaidframe();
    375 	if (rc == 0)
    376 		aprint_normal("Kernelized RAIDframe activated\n");
    377 	else
    378 		panic("Serious error booting RAID!!");
    379 
    380 	/* put together some datastructures like the CCD device does.. This
    381 	 * lets us lock the device and what-not when it gets opened. */
    382 
    383 	raid_softc = (struct raid_softc *)
    384 		malloc(num * sizeof(struct raid_softc),
    385 		       M_RAIDFRAME, M_NOWAIT);
    386 	if (raid_softc == NULL) {
    387 		aprint_error("WARNING: no memory for RAIDframe driver\n");
    388 		return;
    389 	}
    390 
    391 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    392 
    393 	for (raidID = 0; raidID < num; raidID++) {
    394 		bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
    395 
    396 		RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
    397 			  (RF_Raid_t *));
    398 		if (raidPtrs[raidID] == NULL) {
    399 			aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID);
    400 			numraid = raidID;
    401 			return;
    402 		}
    403 	}
    404 
    405 	if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
    406 		aprint_error("raidattach: config_cfattach_attach failed?\n");
    407 	}
    408 
    409 #ifdef RAID_AUTOCONFIG
    410 	raidautoconfig = 1;
    411 #endif
    412 
    413 	/*
    414 	 * Register a finalizer which will be used to auto-config RAID
    415 	 * sets once all real hardware devices have been found.
    416 	 */
    417 	if (config_finalize_register(NULL, rf_autoconfig) != 0)
    418 		aprint_error("WARNING: unable to register RAIDframe finalizer\n");
    419 }
    420 
    421 int
    422 rf_autoconfig(struct device *self)
    423 {
    424 	RF_AutoConfig_t *ac_list;
    425 	RF_ConfigSet_t *config_sets;
    426 
    427 	if (raidautoconfig == 0)
    428 		return (0);
    429 
    430 	/* XXX This code can only be run once. */
    431 	raidautoconfig = 0;
    432 
    433 	/* 1. locate all RAID components on the system */
    434 #ifdef DEBUG
    435 	printf("Searching for RAID components...\n");
    436 #endif
    437 	ac_list = rf_find_raid_components();
    438 
    439 	/* 2. Sort them into their respective sets. */
    440 	config_sets = rf_create_auto_sets(ac_list);
    441 
    442 	/*
    443 	 * 3. Evaluate each set andconfigure the valid ones.
    444 	 * This gets done in rf_buildroothack().
    445 	 */
    446 	rf_buildroothack(config_sets);
    447 
    448 	return 1;
    449 }
    450 
    451 void
    452 rf_buildroothack(RF_ConfigSet_t *config_sets)
    453 {
    454 	RF_ConfigSet_t *cset;
    455 	RF_ConfigSet_t *next_cset;
    456 	int retcode;
    457 	int raidID;
    458 	int rootID;
    459 	int col;
    460 	int num_root;
    461 	char *devname;
    462 
    463 	rootID = 0;
    464 	num_root = 0;
    465 	cset = config_sets;
    466 	while(cset != NULL ) {
    467 		next_cset = cset->next;
    468 		if (rf_have_enough_components(cset) &&
    469 		    cset->ac->clabel->autoconfigure==1) {
    470 			retcode = rf_auto_config_set(cset,&raidID);
    471 			if (!retcode) {
    472 #ifdef DEBUG
    473 				printf("raid%d: configured ok\n", raidID);
    474 #endif
    475 				if (cset->rootable) {
    476 					rootID = raidID;
    477 					num_root++;
    478 				}
    479 			} else {
    480 				/* The autoconfig didn't work :( */
    481 #ifdef DEBUG
    482 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    483 #endif
    484 				rf_release_all_vps(cset);
    485 			}
    486 		} else {
    487 			/* we're not autoconfiguring this set...
    488 			   release the associated resources */
    489 			rf_release_all_vps(cset);
    490 		}
    491 		/* cleanup */
    492 		rf_cleanup_config_set(cset);
    493 		cset = next_cset;
    494 	}
    495 
    496 	/* if the user has specified what the root device should be
    497 	   then we don't touch booted_device or boothowto... */
    498 
    499 	if (rootspec != NULL)
    500 		return;
    501 
    502 	/* we found something bootable... */
    503 
    504 	if (num_root == 1) {
    505 		booted_device = raid_softc[rootID].sc_dev;
    506 	} else if (num_root > 1) {
    507 
    508 		/*
    509 		 * Maybe the MD code can help. If it cannot, then
    510 		 * setroot() will discover that we have no
    511 		 * booted_device and will ask the user if nothing was
    512 		 * hardwired in the kernel config file
    513 		 */
    514 
    515 		if (booted_device == NULL)
    516 			cpu_rootconf();
    517 		if (booted_device == NULL)
    518 			return;
    519 
    520 		num_root = 0;
    521 		for (raidID = 0; raidID < numraid; raidID++) {
    522 			if (raidPtrs[raidID]->valid == 0)
    523 				continue;
    524 
    525 			if (raidPtrs[raidID]->root_partition == 0)
    526 				continue;
    527 
    528 			for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
    529 				devname = raidPtrs[raidID]->Disks[col].devname;
    530 				devname += sizeof("/dev/") - 1;
    531 				if (strncmp(devname, device_xname(booted_device),
    532 					    strlen(device_xname(booted_device))) != 0)
    533 					continue;
    534 #ifdef DEBUG
    535 				printf("raid%d includes boot device %s\n",
    536 				       raidID, devname);
    537 #endif
    538 				num_root++;
    539 				rootID = raidID;
    540 			}
    541 		}
    542 
    543 		if (num_root == 1) {
    544 			booted_device = raid_softc[rootID].sc_dev;
    545 		} else {
    546 			/* we can't guess.. require the user to answer... */
    547 			boothowto |= RB_ASKNAME;
    548 		}
    549 	}
    550 }
    551 
    552 
    553 int
    554 raidsize(dev_t dev)
    555 {
    556 	struct raid_softc *rs;
    557 	struct disklabel *lp;
    558 	int     part, unit, omask, size;
    559 
    560 	unit = raidunit(dev);
    561 	if (unit >= numraid)
    562 		return (-1);
    563 	rs = &raid_softc[unit];
    564 
    565 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    566 		return (-1);
    567 
    568 	part = DISKPART(dev);
    569 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    570 	lp = rs->sc_dkdev.dk_label;
    571 
    572 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
    573 		return (-1);
    574 
    575 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    576 		size = -1;
    577 	else
    578 		size = lp->d_partitions[part].p_size *
    579 		    (lp->d_secsize / DEV_BSIZE);
    580 
    581 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
    582 		return (-1);
    583 
    584 	return (size);
    585 
    586 }
    587 
    588 int
    589 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
    590 {
    591 	int     unit = raidunit(dev);
    592 	struct raid_softc *rs;
    593 	const struct bdevsw *bdev;
    594 	struct disklabel *lp;
    595 	RF_Raid_t *raidPtr;
    596 	daddr_t offset;
    597 	int     part, c, sparecol, j, scol, dumpto;
    598 	int     error = 0;
    599 
    600 	if (unit >= numraid)
    601 		return (ENXIO);
    602 
    603 	rs = &raid_softc[unit];
    604 	raidPtr = raidPtrs[unit];
    605 
    606 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    607 		return ENXIO;
    608 
    609 	/* we only support dumping to RAID 1 sets */
    610 	if (raidPtr->Layout.numDataCol != 1 ||
    611 	    raidPtr->Layout.numParityCol != 1)
    612 		return EINVAL;
    613 
    614 
    615 	if ((error = raidlock(rs)) != 0)
    616 		return error;
    617 
    618 	if (size % DEV_BSIZE != 0) {
    619 		error = EINVAL;
    620 		goto out;
    621 	}
    622 
    623 	if (blkno + size / DEV_BSIZE > rs->sc_size) {
    624 		printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
    625 		    "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
    626 		    size / DEV_BSIZE, rs->sc_size);
    627 		error = EINVAL;
    628 		goto out;
    629 	}
    630 
    631 	part = DISKPART(dev);
    632 	lp = rs->sc_dkdev.dk_label;
    633 	offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
    634 
    635 	/* figure out what device is alive.. */
    636 
    637 	/*
    638 	   Look for a component to dump to.  The preference for the
    639 	   component to dump to is as follows:
    640 	   1) the master
    641 	   2) a used_spare of the master
    642 	   3) the slave
    643 	   4) a used_spare of the slave
    644 	*/
    645 
    646 	dumpto = -1;
    647 	for (c = 0; c < raidPtr->numCol; c++) {
    648 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
    649 			/* this might be the one */
    650 			dumpto = c;
    651 			break;
    652 		}
    653 	}
    654 
    655 	/*
    656 	   At this point we have possibly selected a live master or a
    657 	   live slave.  We now check to see if there is a spared
    658 	   master (or a spared slave), if we didn't find a live master
    659 	   or a live slave.
    660 	*/
    661 
    662 	for (c = 0; c < raidPtr->numSpare; c++) {
    663 		sparecol = raidPtr->numCol + c;
    664 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
    665 			/* How about this one? */
    666 			scol = -1;
    667 			for(j=0;j<raidPtr->numCol;j++) {
    668 				if (raidPtr->Disks[j].spareCol == sparecol) {
    669 					scol = j;
    670 					break;
    671 				}
    672 			}
    673 			if (scol == 0) {
    674 				/*
    675 				   We must have found a spared master!
    676 				   We'll take that over anything else
    677 				   found so far.  (We couldn't have
    678 				   found a real master before, since
    679 				   this is a used spare, and it's
    680 				   saying that it's replacing the
    681 				   master.)  On reboot (with
    682 				   autoconfiguration turned on)
    683 				   sparecol will become the 1st
    684 				   component (component0) of this set.
    685 				*/
    686 				dumpto = sparecol;
    687 				break;
    688 			} else if (scol != -1) {
    689 				/*
    690 				   Must be a spared slave.  We'll dump
    691 				   to that if we havn't found anything
    692 				   else so far.
    693 				*/
    694 				if (dumpto == -1)
    695 					dumpto = sparecol;
    696 			}
    697 		}
    698 	}
    699 
    700 	if (dumpto == -1) {
    701 		/* we couldn't find any live components to dump to!?!?
    702 		 */
    703 		error = EINVAL;
    704 		goto out;
    705 	}
    706 
    707 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
    708 
    709 	/*
    710 	   Note that blkno is relative to this particular partition.
    711 	   By adding the offset of this partition in the RAID
    712 	   set, and also adding RF_PROTECTED_SECTORS, we get a
    713 	   value that is relative to the partition used for the
    714 	   underlying component.
    715 	*/
    716 
    717 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
    718 				blkno + offset, va, size);
    719 
    720 out:
    721 	raidunlock(rs);
    722 
    723 	return error;
    724 }
    725 /* ARGSUSED */
    726 int
    727 raidopen(dev_t dev, int flags, int fmt,
    728     struct lwp *l)
    729 {
    730 	int     unit = raidunit(dev);
    731 	struct raid_softc *rs;
    732 	struct disklabel *lp;
    733 	int     part, pmask;
    734 	int     error = 0;
    735 
    736 	if (unit >= numraid)
    737 		return (ENXIO);
    738 	rs = &raid_softc[unit];
    739 
    740 	if ((error = raidlock(rs)) != 0)
    741 		return (error);
    742 	lp = rs->sc_dkdev.dk_label;
    743 
    744 	part = DISKPART(dev);
    745 
    746 	/*
    747 	 * If there are wedges, and this is not RAW_PART, then we
    748 	 * need to fail.
    749 	 */
    750 	if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
    751 		error = EBUSY;
    752 		goto bad;
    753 	}
    754 	pmask = (1 << part);
    755 
    756 	if ((rs->sc_flags & RAIDF_INITED) &&
    757 	    (rs->sc_dkdev.dk_openmask == 0))
    758 		raidgetdisklabel(dev);
    759 
    760 	/* make sure that this partition exists */
    761 
    762 	if (part != RAW_PART) {
    763 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    764 		    ((part >= lp->d_npartitions) ||
    765 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    766 			error = ENXIO;
    767 			goto bad;
    768 		}
    769 	}
    770 	/* Prevent this unit from being unconfigured while open. */
    771 	switch (fmt) {
    772 	case S_IFCHR:
    773 		rs->sc_dkdev.dk_copenmask |= pmask;
    774 		break;
    775 
    776 	case S_IFBLK:
    777 		rs->sc_dkdev.dk_bopenmask |= pmask;
    778 		break;
    779 	}
    780 
    781 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    782 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    783 		/* First one... mark things as dirty... Note that we *MUST*
    784 		 have done a configure before this.  I DO NOT WANT TO BE
    785 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    786 		 THAT THEY BELONG TOGETHER!!!!! */
    787 		/* XXX should check to see if we're only open for reading
    788 		   here... If so, we needn't do this, but then need some
    789 		   other way of keeping track of what's happened.. */
    790 
    791 		rf_markalldirty( raidPtrs[unit] );
    792 	}
    793 
    794 
    795 	rs->sc_dkdev.dk_openmask =
    796 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    797 
    798 bad:
    799 	raidunlock(rs);
    800 
    801 	return (error);
    802 
    803 
    804 }
    805 /* ARGSUSED */
    806 int
    807 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
    808 {
    809 	int     unit = raidunit(dev);
    810 	struct cfdata *cf;
    811 	struct raid_softc *rs;
    812 	int     error = 0;
    813 	int     part;
    814 
    815 	if (unit >= numraid)
    816 		return (ENXIO);
    817 	rs = &raid_softc[unit];
    818 
    819 	if ((error = raidlock(rs)) != 0)
    820 		return (error);
    821 
    822 	part = DISKPART(dev);
    823 
    824 	/* ...that much closer to allowing unconfiguration... */
    825 	switch (fmt) {
    826 	case S_IFCHR:
    827 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    828 		break;
    829 
    830 	case S_IFBLK:
    831 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    832 		break;
    833 	}
    834 	rs->sc_dkdev.dk_openmask =
    835 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    836 
    837 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    838 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    839 		/* Last one... device is not unconfigured yet.
    840 		   Device shutdown has taken care of setting the
    841 		   clean bits if RAIDF_INITED is not set
    842 		   mark things as clean... */
    843 
    844 		rf_update_component_labels(raidPtrs[unit],
    845 						 RF_FINAL_COMPONENT_UPDATE);
    846 		if (doing_shutdown) {
    847 			/* last one, and we're going down, so
    848 			   lights out for this RAID set too. */
    849 			error = rf_Shutdown(raidPtrs[unit]);
    850 
    851 			/* It's no longer initialized... */
    852 			rs->sc_flags &= ~RAIDF_INITED;
    853 
    854 			/* detach the device */
    855 
    856 			cf = device_cfdata(rs->sc_dev);
    857 			error = config_detach(rs->sc_dev, DETACH_QUIET);
    858 			free(cf, M_RAIDFRAME);
    859 
    860 			/* Detach the disk. */
    861 			disk_detach(&rs->sc_dkdev);
    862 			disk_destroy(&rs->sc_dkdev);
    863 		}
    864 	}
    865 
    866 	raidunlock(rs);
    867 	return (0);
    868 
    869 }
    870 
    871 void
    872 raidstrategy(struct buf *bp)
    873 {
    874 	int s;
    875 
    876 	unsigned int raidID = raidunit(bp->b_dev);
    877 	RF_Raid_t *raidPtr;
    878 	struct raid_softc *rs = &raid_softc[raidID];
    879 	int     wlabel;
    880 
    881 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    882 		bp->b_error = ENXIO;
    883 		goto done;
    884 	}
    885 	if (raidID >= numraid || !raidPtrs[raidID]) {
    886 		bp->b_error = ENODEV;
    887 		goto done;
    888 	}
    889 	raidPtr = raidPtrs[raidID];
    890 	if (!raidPtr->valid) {
    891 		bp->b_error = ENODEV;
    892 		goto done;
    893 	}
    894 	if (bp->b_bcount == 0) {
    895 		db1_printf(("b_bcount is zero..\n"));
    896 		goto done;
    897 	}
    898 
    899 	/*
    900 	 * Do bounds checking and adjust transfer.  If there's an
    901 	 * error, the bounds check will flag that for us.
    902 	 */
    903 
    904 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    905 	if (DISKPART(bp->b_dev) == RAW_PART) {
    906 		uint64_t size; /* device size in DEV_BSIZE unit */
    907 
    908 		if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
    909 			size = raidPtr->totalSectors <<
    910 			    (raidPtr->logBytesPerSector - DEV_BSHIFT);
    911 		} else {
    912 			size = raidPtr->totalSectors >>
    913 			    (DEV_BSHIFT - raidPtr->logBytesPerSector);
    914 		}
    915 		if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
    916 			goto done;
    917 		}
    918 	} else {
    919 		if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
    920 			db1_printf(("Bounds check failed!!:%d %d\n",
    921 				(int) bp->b_blkno, (int) wlabel));
    922 			goto done;
    923 		}
    924 	}
    925 	s = splbio();
    926 
    927 	bp->b_resid = 0;
    928 
    929 	/* stuff it onto our queue */
    930 	bufq_put(rs->buf_queue, bp);
    931 
    932 	/* scheduled the IO to happen at the next convenient time */
    933 	wakeup(&(raidPtrs[raidID]->iodone));
    934 
    935 	splx(s);
    936 	return;
    937 
    938 done:
    939 	bp->b_resid = bp->b_bcount;
    940 	biodone(bp);
    941 }
    942 /* ARGSUSED */
    943 int
    944 raidread(dev_t dev, struct uio *uio, int flags)
    945 {
    946 	int     unit = raidunit(dev);
    947 	struct raid_softc *rs;
    948 
    949 	if (unit >= numraid)
    950 		return (ENXIO);
    951 	rs = &raid_softc[unit];
    952 
    953 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    954 		return (ENXIO);
    955 
    956 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    957 
    958 }
    959 /* ARGSUSED */
    960 int
    961 raidwrite(dev_t dev, struct uio *uio, int flags)
    962 {
    963 	int     unit = raidunit(dev);
    964 	struct raid_softc *rs;
    965 
    966 	if (unit >= numraid)
    967 		return (ENXIO);
    968 	rs = &raid_softc[unit];
    969 
    970 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    971 		return (ENXIO);
    972 
    973 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    974 
    975 }
    976 
    977 int
    978 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
    979 {
    980 	int     unit = raidunit(dev);
    981 	int     error = 0;
    982 	int     part, pmask;
    983 	struct cfdata *cf;
    984 	struct raid_softc *rs;
    985 	RF_Config_t *k_cfg, *u_cfg;
    986 	RF_Raid_t *raidPtr;
    987 	RF_RaidDisk_t *diskPtr;
    988 	RF_AccTotals_t *totals;
    989 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    990 	u_char *specific_buf;
    991 	int retcode = 0;
    992 	int column;
    993 	int raidid;
    994 	struct rf_recon_req *rrcopy, *rr;
    995 	RF_ComponentLabel_t *clabel;
    996 	RF_ComponentLabel_t *ci_label;
    997 	RF_ComponentLabel_t **clabel_ptr;
    998 	RF_SingleComponent_t *sparePtr,*componentPtr;
    999 	RF_SingleComponent_t component;
   1000 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
   1001 	int i, j, d;
   1002 #ifdef __HAVE_OLD_DISKLABEL
   1003 	struct disklabel newlabel;
   1004 #endif
   1005 	struct dkwedge_info *dkw;
   1006 
   1007 	if (unit >= numraid)
   1008 		return (ENXIO);
   1009 	rs = &raid_softc[unit];
   1010 	raidPtr = raidPtrs[unit];
   1011 
   1012 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
   1013 		(int) DISKPART(dev), (int) unit, (int) cmd));
   1014 
   1015 	/* Must be open for writes for these commands... */
   1016 	switch (cmd) {
   1017 #ifdef DIOCGSECTORSIZE
   1018 	case DIOCGSECTORSIZE:
   1019 		*(u_int *)data = raidPtr->bytesPerSector;
   1020 		return 0;
   1021 	case DIOCGMEDIASIZE:
   1022 		*(off_t *)data =
   1023 		    (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
   1024 		return 0;
   1025 #endif
   1026 	case DIOCSDINFO:
   1027 	case DIOCWDINFO:
   1028 #ifdef __HAVE_OLD_DISKLABEL
   1029 	case ODIOCWDINFO:
   1030 	case ODIOCSDINFO:
   1031 #endif
   1032 	case DIOCWLABEL:
   1033 	case DIOCAWEDGE:
   1034 	case DIOCDWEDGE:
   1035 		if ((flag & FWRITE) == 0)
   1036 			return (EBADF);
   1037 	}
   1038 
   1039 	/* Must be initialized for these... */
   1040 	switch (cmd) {
   1041 	case DIOCGDINFO:
   1042 	case DIOCSDINFO:
   1043 	case DIOCWDINFO:
   1044 #ifdef __HAVE_OLD_DISKLABEL
   1045 	case ODIOCGDINFO:
   1046 	case ODIOCWDINFO:
   1047 	case ODIOCSDINFO:
   1048 	case ODIOCGDEFLABEL:
   1049 #endif
   1050 	case DIOCGPART:
   1051 	case DIOCWLABEL:
   1052 	case DIOCGDEFLABEL:
   1053 	case DIOCAWEDGE:
   1054 	case DIOCDWEDGE:
   1055 	case DIOCLWEDGES:
   1056 	case DIOCCACHESYNC:
   1057 	case RAIDFRAME_SHUTDOWN:
   1058 	case RAIDFRAME_REWRITEPARITY:
   1059 	case RAIDFRAME_GET_INFO:
   1060 	case RAIDFRAME_RESET_ACCTOTALS:
   1061 	case RAIDFRAME_GET_ACCTOTALS:
   1062 	case RAIDFRAME_KEEP_ACCTOTALS:
   1063 	case RAIDFRAME_GET_SIZE:
   1064 	case RAIDFRAME_FAIL_DISK:
   1065 	case RAIDFRAME_COPYBACK:
   1066 	case RAIDFRAME_CHECK_RECON_STATUS:
   1067 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1068 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1069 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1070 	case RAIDFRAME_ADD_HOT_SPARE:
   1071 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1072 	case RAIDFRAME_INIT_LABELS:
   1073 	case RAIDFRAME_REBUILD_IN_PLACE:
   1074 	case RAIDFRAME_CHECK_PARITY:
   1075 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1076 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1077 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1078 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1079 	case RAIDFRAME_SET_AUTOCONFIG:
   1080 	case RAIDFRAME_SET_ROOT:
   1081 	case RAIDFRAME_DELETE_COMPONENT:
   1082 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1083 		if ((rs->sc_flags & RAIDF_INITED) == 0)
   1084 			return (ENXIO);
   1085 	}
   1086 
   1087 	switch (cmd) {
   1088 #ifdef COMPAT_50
   1089 	case RAIDFRAME_GET_INFO50:
   1090 		return rf_get_info50(raidPtr, data);
   1091 
   1092 	case RAIDFRAME_CONFIGURE50:
   1093 		if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
   1094 			return retcode;
   1095 		goto config;
   1096 #endif
   1097 		/* configure the system */
   1098 	case RAIDFRAME_CONFIGURE:
   1099 
   1100 		if (raidPtr->valid) {
   1101 			/* There is a valid RAID set running on this unit! */
   1102 			printf("raid%d: Device already configured!\n",unit);
   1103 			return(EINVAL);
   1104 		}
   1105 
   1106 		/* copy-in the configuration information */
   1107 		/* data points to a pointer to the configuration structure */
   1108 
   1109 		u_cfg = *((RF_Config_t **) data);
   1110 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
   1111 		if (k_cfg == NULL) {
   1112 			return (ENOMEM);
   1113 		}
   1114 		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
   1115 		if (retcode) {
   1116 			RF_Free(k_cfg, sizeof(RF_Config_t));
   1117 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
   1118 				retcode));
   1119 			return (retcode);
   1120 		}
   1121 		goto config;
   1122 	config:
   1123 		/* allocate a buffer for the layout-specific data, and copy it
   1124 		 * in */
   1125 		if (k_cfg->layoutSpecificSize) {
   1126 			if (k_cfg->layoutSpecificSize > 10000) {
   1127 				/* sanity check */
   1128 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1129 				return (EINVAL);
   1130 			}
   1131 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
   1132 			    (u_char *));
   1133 			if (specific_buf == NULL) {
   1134 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1135 				return (ENOMEM);
   1136 			}
   1137 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
   1138 			    k_cfg->layoutSpecificSize);
   1139 			if (retcode) {
   1140 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1141 				RF_Free(specific_buf,
   1142 					k_cfg->layoutSpecificSize);
   1143 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
   1144 					retcode));
   1145 				return (retcode);
   1146 			}
   1147 		} else
   1148 			specific_buf = NULL;
   1149 		k_cfg->layoutSpecific = specific_buf;
   1150 
   1151 		/* should do some kind of sanity check on the configuration.
   1152 		 * Store the sum of all the bytes in the last byte? */
   1153 
   1154 		/* configure the system */
   1155 
   1156 		/*
   1157 		 * Clear the entire RAID descriptor, just to make sure
   1158 		 *  there is no stale data left in the case of a
   1159 		 *  reconfiguration
   1160 		 */
   1161 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
   1162 		raidPtr->raidid = unit;
   1163 
   1164 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
   1165 
   1166 		if (retcode == 0) {
   1167 
   1168 			/* allow this many simultaneous IO's to
   1169 			   this RAID device */
   1170 			raidPtr->openings = RAIDOUTSTANDING;
   1171 
   1172 			raidinit(raidPtr);
   1173 			rf_markalldirty(raidPtr);
   1174 		}
   1175 		/* free the buffers.  No return code here. */
   1176 		if (k_cfg->layoutSpecificSize) {
   1177 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
   1178 		}
   1179 		RF_Free(k_cfg, sizeof(RF_Config_t));
   1180 
   1181 		return (retcode);
   1182 
   1183 		/* shutdown the system */
   1184 	case RAIDFRAME_SHUTDOWN:
   1185 
   1186 		if ((error = raidlock(rs)) != 0)
   1187 			return (error);
   1188 
   1189 		/*
   1190 		 * If somebody has a partition mounted, we shouldn't
   1191 		 * shutdown.
   1192 		 */
   1193 
   1194 		part = DISKPART(dev);
   1195 		pmask = (1 << part);
   1196 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
   1197 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
   1198 			(rs->sc_dkdev.dk_copenmask & pmask))) {
   1199 			raidunlock(rs);
   1200 			return (EBUSY);
   1201 		}
   1202 
   1203 		retcode = rf_Shutdown(raidPtr);
   1204 
   1205 		/* It's no longer initialized... */
   1206 		rs->sc_flags &= ~RAIDF_INITED;
   1207 
   1208 		/* free the pseudo device attach bits */
   1209 
   1210 		cf = device_cfdata(rs->sc_dev);
   1211 		/* XXX this causes us to not return any errors
   1212 		   from the above call to rf_Shutdown() */
   1213 		retcode = config_detach(rs->sc_dev, DETACH_QUIET);
   1214 		free(cf, M_RAIDFRAME);
   1215 
   1216 		/* Detach the disk. */
   1217 		disk_detach(&rs->sc_dkdev);
   1218 		disk_destroy(&rs->sc_dkdev);
   1219 
   1220 		raidunlock(rs);
   1221 
   1222 		return (retcode);
   1223 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1224 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1225 		/* need to read the component label for the disk indicated
   1226 		   by row,column in clabel */
   1227 
   1228 		/* For practice, let's get it directly fromdisk, rather
   1229 		   than from the in-core copy */
   1230 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
   1231 			   (RF_ComponentLabel_t *));
   1232 		if (clabel == NULL)
   1233 			return (ENOMEM);
   1234 
   1235 		retcode = copyin( *clabel_ptr, clabel,
   1236 				  sizeof(RF_ComponentLabel_t));
   1237 
   1238 		if (retcode) {
   1239 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1240 			return(retcode);
   1241 		}
   1242 
   1243 		clabel->row = 0; /* Don't allow looking at anything else.*/
   1244 
   1245 		column = clabel->column;
   1246 
   1247 		if ((column < 0) || (column >= raidPtr->numCol +
   1248 				     raidPtr->numSpare)) {
   1249 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1250 			return(EINVAL);
   1251 		}
   1252 
   1253 		retcode = raidread_component_label(raidPtr->Disks[column].dev,
   1254 				raidPtr->raid_cinfo[column].ci_vp,
   1255 				clabel );
   1256 
   1257 		if (retcode == 0) {
   1258 			retcode = copyout(clabel, *clabel_ptr,
   1259 					  sizeof(RF_ComponentLabel_t));
   1260 		}
   1261 		RF_Free(clabel, sizeof(RF_ComponentLabel_t));
   1262 		return (retcode);
   1263 
   1264 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1265 		clabel = (RF_ComponentLabel_t *) data;
   1266 
   1267 		/* XXX check the label for valid stuff... */
   1268 		/* Note that some things *should not* get modified --
   1269 		   the user should be re-initing the labels instead of
   1270 		   trying to patch things.
   1271 		   */
   1272 
   1273 		raidid = raidPtr->raidid;
   1274 #ifdef DEBUG
   1275 		printf("raid%d: Got component label:\n", raidid);
   1276 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1277 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1278 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1279 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1280 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1281 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1282 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1283 #endif
   1284 		clabel->row = 0;
   1285 		column = clabel->column;
   1286 
   1287 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1288 			return(EINVAL);
   1289 		}
   1290 
   1291 		/* XXX this isn't allowed to do anything for now :-) */
   1292 
   1293 		/* XXX and before it is, we need to fill in the rest
   1294 		   of the fields!?!?!?! */
   1295 #if 0
   1296 		raidwrite_component_label(
   1297 		     raidPtr->Disks[column].dev,
   1298 			    raidPtr->raid_cinfo[column].ci_vp,
   1299 			    clabel );
   1300 #endif
   1301 		return (0);
   1302 
   1303 	case RAIDFRAME_INIT_LABELS:
   1304 		clabel = (RF_ComponentLabel_t *) data;
   1305 		/*
   1306 		   we only want the serial number from
   1307 		   the above.  We get all the rest of the information
   1308 		   from the config that was used to create this RAID
   1309 		   set.
   1310 		   */
   1311 
   1312 		raidPtr->serial_number = clabel->serial_number;
   1313 
   1314 		RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t),
   1315 			  (RF_ComponentLabel_t *));
   1316 		if (ci_label == NULL)
   1317 			return (ENOMEM);
   1318 
   1319 		raid_init_component_label(raidPtr, ci_label);
   1320 		ci_label->serial_number = clabel->serial_number;
   1321 		ci_label->row = 0; /* we dont' pretend to support more */
   1322 
   1323 		for(column=0;column<raidPtr->numCol;column++) {
   1324 			diskPtr = &raidPtr->Disks[column];
   1325 			if (!RF_DEAD_DISK(diskPtr->status)) {
   1326 				ci_label->partitionSize = diskPtr->partitionSize;
   1327 				ci_label->column = column;
   1328 				raidwrite_component_label(
   1329 							  raidPtr->Disks[column].dev,
   1330 							  raidPtr->raid_cinfo[column].ci_vp,
   1331 							  ci_label );
   1332 			}
   1333 		}
   1334 		RF_Free(ci_label, sizeof(RF_ComponentLabel_t));
   1335 
   1336 		return (retcode);
   1337 	case RAIDFRAME_SET_AUTOCONFIG:
   1338 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1339 		printf("raid%d: New autoconfig value is: %d\n",
   1340 		       raidPtr->raidid, d);
   1341 		*(int *) data = d;
   1342 		return (retcode);
   1343 
   1344 	case RAIDFRAME_SET_ROOT:
   1345 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1346 		printf("raid%d: New rootpartition value is: %d\n",
   1347 		       raidPtr->raidid, d);
   1348 		*(int *) data = d;
   1349 		return (retcode);
   1350 
   1351 		/* initialize all parity */
   1352 	case RAIDFRAME_REWRITEPARITY:
   1353 
   1354 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1355 			/* Parity for RAID 0 is trivially correct */
   1356 			raidPtr->parity_good = RF_RAID_CLEAN;
   1357 			return(0);
   1358 		}
   1359 
   1360 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1361 			/* Re-write is already in progress! */
   1362 			return(EINVAL);
   1363 		}
   1364 
   1365 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1366 					   rf_RewriteParityThread,
   1367 					   raidPtr,"raid_parity");
   1368 		return (retcode);
   1369 
   1370 
   1371 	case RAIDFRAME_ADD_HOT_SPARE:
   1372 		sparePtr = (RF_SingleComponent_t *) data;
   1373 		memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
   1374 		retcode = rf_add_hot_spare(raidPtr, &component);
   1375 		return(retcode);
   1376 
   1377 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1378 		return(retcode);
   1379 
   1380 	case RAIDFRAME_DELETE_COMPONENT:
   1381 		componentPtr = (RF_SingleComponent_t *)data;
   1382 		memcpy( &component, componentPtr,
   1383 			sizeof(RF_SingleComponent_t));
   1384 		retcode = rf_delete_component(raidPtr, &component);
   1385 		return(retcode);
   1386 
   1387 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1388 		componentPtr = (RF_SingleComponent_t *)data;
   1389 		memcpy( &component, componentPtr,
   1390 			sizeof(RF_SingleComponent_t));
   1391 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1392 		return(retcode);
   1393 
   1394 	case RAIDFRAME_REBUILD_IN_PLACE:
   1395 
   1396 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1397 			/* Can't do this on a RAID 0!! */
   1398 			return(EINVAL);
   1399 		}
   1400 
   1401 		if (raidPtr->recon_in_progress == 1) {
   1402 			/* a reconstruct is already in progress! */
   1403 			return(EINVAL);
   1404 		}
   1405 
   1406 		componentPtr = (RF_SingleComponent_t *) data;
   1407 		memcpy( &component, componentPtr,
   1408 			sizeof(RF_SingleComponent_t));
   1409 		component.row = 0; /* we don't support any more */
   1410 		column = component.column;
   1411 
   1412 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1413 			return(EINVAL);
   1414 		}
   1415 
   1416 		RF_LOCK_MUTEX(raidPtr->mutex);
   1417 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
   1418 		    (raidPtr->numFailures > 0)) {
   1419 			/* XXX 0 above shouldn't be constant!!! */
   1420 			/* some component other than this has failed.
   1421 			   Let's not make things worse than they already
   1422 			   are... */
   1423 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1424 			       raidPtr->raidid);
   1425 			printf("raid%d:     Col: %d   Too many failures.\n",
   1426 			       raidPtr->raidid, column);
   1427 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1428 			return (EINVAL);
   1429 		}
   1430 		if (raidPtr->Disks[column].status ==
   1431 		    rf_ds_reconstructing) {
   1432 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1433 			       raidPtr->raidid);
   1434 			printf("raid%d:    Col: %d   Reconstruction already occuring!\n", raidPtr->raidid, column);
   1435 
   1436 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1437 			return (EINVAL);
   1438 		}
   1439 		if (raidPtr->Disks[column].status == rf_ds_spared) {
   1440 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1441 			return (EINVAL);
   1442 		}
   1443 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1444 
   1445 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1446 		if (rrcopy == NULL)
   1447 			return(ENOMEM);
   1448 
   1449 		rrcopy->raidPtr = (void *) raidPtr;
   1450 		rrcopy->col = column;
   1451 
   1452 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1453 					   rf_ReconstructInPlaceThread,
   1454 					   rrcopy,"raid_reconip");
   1455 		return(retcode);
   1456 
   1457 	case RAIDFRAME_GET_INFO:
   1458 		if (!raidPtr->valid)
   1459 			return (ENODEV);
   1460 		ucfgp = (RF_DeviceConfig_t **) data;
   1461 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1462 			  (RF_DeviceConfig_t *));
   1463 		if (d_cfg == NULL)
   1464 			return (ENOMEM);
   1465 		d_cfg->rows = 1; /* there is only 1 row now */
   1466 		d_cfg->cols = raidPtr->numCol;
   1467 		d_cfg->ndevs = raidPtr->numCol;
   1468 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1469 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1470 			return (ENOMEM);
   1471 		}
   1472 		d_cfg->nspares = raidPtr->numSpare;
   1473 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1474 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1475 			return (ENOMEM);
   1476 		}
   1477 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1478 		d = 0;
   1479 		for (j = 0; j < d_cfg->cols; j++) {
   1480 			d_cfg->devs[d] = raidPtr->Disks[j];
   1481 			d++;
   1482 		}
   1483 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1484 			d_cfg->spares[i] = raidPtr->Disks[j];
   1485 		}
   1486 		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
   1487 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1488 
   1489 		return (retcode);
   1490 
   1491 	case RAIDFRAME_CHECK_PARITY:
   1492 		*(int *) data = raidPtr->parity_good;
   1493 		return (0);
   1494 
   1495 	case RAIDFRAME_RESET_ACCTOTALS:
   1496 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1497 		return (0);
   1498 
   1499 	case RAIDFRAME_GET_ACCTOTALS:
   1500 		totals = (RF_AccTotals_t *) data;
   1501 		*totals = raidPtr->acc_totals;
   1502 		return (0);
   1503 
   1504 	case RAIDFRAME_KEEP_ACCTOTALS:
   1505 		raidPtr->keep_acc_totals = *(int *)data;
   1506 		return (0);
   1507 
   1508 	case RAIDFRAME_GET_SIZE:
   1509 		*(int *) data = raidPtr->totalSectors;
   1510 		return (0);
   1511 
   1512 		/* fail a disk & optionally start reconstruction */
   1513 	case RAIDFRAME_FAIL_DISK:
   1514 
   1515 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1516 			/* Can't do this on a RAID 0!! */
   1517 			return(EINVAL);
   1518 		}
   1519 
   1520 		rr = (struct rf_recon_req *) data;
   1521 		rr->row = 0;
   1522 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
   1523 			return (EINVAL);
   1524 
   1525 
   1526 		RF_LOCK_MUTEX(raidPtr->mutex);
   1527 		if (raidPtr->status == rf_rs_reconstructing) {
   1528 			/* you can't fail a disk while we're reconstructing! */
   1529 			/* XXX wrong for RAID6 */
   1530 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1531 			return (EINVAL);
   1532 		}
   1533 		if ((raidPtr->Disks[rr->col].status ==
   1534 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
   1535 			/* some other component has failed.  Let's not make
   1536 			   things worse. XXX wrong for RAID6 */
   1537 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1538 			return (EINVAL);
   1539 		}
   1540 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
   1541 			/* Can't fail a spared disk! */
   1542 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1543 			return (EINVAL);
   1544 		}
   1545 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1546 
   1547 		/* make a copy of the recon request so that we don't rely on
   1548 		 * the user's buffer */
   1549 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1550 		if (rrcopy == NULL)
   1551 			return(ENOMEM);
   1552 		memcpy(rrcopy, rr, sizeof(*rr));
   1553 		rrcopy->raidPtr = (void *) raidPtr;
   1554 
   1555 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1556 					   rf_ReconThread,
   1557 					   rrcopy,"raid_recon");
   1558 		return (0);
   1559 
   1560 		/* invoke a copyback operation after recon on whatever disk
   1561 		 * needs it, if any */
   1562 	case RAIDFRAME_COPYBACK:
   1563 
   1564 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1565 			/* This makes no sense on a RAID 0!! */
   1566 			return(EINVAL);
   1567 		}
   1568 
   1569 		if (raidPtr->copyback_in_progress == 1) {
   1570 			/* Copyback is already in progress! */
   1571 			return(EINVAL);
   1572 		}
   1573 
   1574 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1575 					   rf_CopybackThread,
   1576 					   raidPtr,"raid_copyback");
   1577 		return (retcode);
   1578 
   1579 		/* return the percentage completion of reconstruction */
   1580 	case RAIDFRAME_CHECK_RECON_STATUS:
   1581 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1582 			/* This makes no sense on a RAID 0, so tell the
   1583 			   user it's done. */
   1584 			*(int *) data = 100;
   1585 			return(0);
   1586 		}
   1587 		if (raidPtr->status != rf_rs_reconstructing)
   1588 			*(int *) data = 100;
   1589 		else {
   1590 			if (raidPtr->reconControl->numRUsTotal > 0) {
   1591 				*(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
   1592 			} else {
   1593 				*(int *) data = 0;
   1594 			}
   1595 		}
   1596 		return (0);
   1597 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1598 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1599 		if (raidPtr->status != rf_rs_reconstructing) {
   1600 			progressInfo.remaining = 0;
   1601 			progressInfo.completed = 100;
   1602 			progressInfo.total = 100;
   1603 		} else {
   1604 			progressInfo.total =
   1605 				raidPtr->reconControl->numRUsTotal;
   1606 			progressInfo.completed =
   1607 				raidPtr->reconControl->numRUsComplete;
   1608 			progressInfo.remaining = progressInfo.total -
   1609 				progressInfo.completed;
   1610 		}
   1611 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1612 				  sizeof(RF_ProgressInfo_t));
   1613 		return (retcode);
   1614 
   1615 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1616 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1617 			/* This makes no sense on a RAID 0, so tell the
   1618 			   user it's done. */
   1619 			*(int *) data = 100;
   1620 			return(0);
   1621 		}
   1622 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1623 			*(int *) data = 100 *
   1624 				raidPtr->parity_rewrite_stripes_done /
   1625 				raidPtr->Layout.numStripe;
   1626 		} else {
   1627 			*(int *) data = 100;
   1628 		}
   1629 		return (0);
   1630 
   1631 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1632 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1633 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1634 			progressInfo.total = raidPtr->Layout.numStripe;
   1635 			progressInfo.completed =
   1636 				raidPtr->parity_rewrite_stripes_done;
   1637 			progressInfo.remaining = progressInfo.total -
   1638 				progressInfo.completed;
   1639 		} else {
   1640 			progressInfo.remaining = 0;
   1641 			progressInfo.completed = 100;
   1642 			progressInfo.total = 100;
   1643 		}
   1644 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1645 				  sizeof(RF_ProgressInfo_t));
   1646 		return (retcode);
   1647 
   1648 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1649 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1650 			/* This makes no sense on a RAID 0 */
   1651 			*(int *) data = 100;
   1652 			return(0);
   1653 		}
   1654 		if (raidPtr->copyback_in_progress == 1) {
   1655 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1656 				raidPtr->Layout.numStripe;
   1657 		} else {
   1658 			*(int *) data = 100;
   1659 		}
   1660 		return (0);
   1661 
   1662 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1663 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1664 		if (raidPtr->copyback_in_progress == 1) {
   1665 			progressInfo.total = raidPtr->Layout.numStripe;
   1666 			progressInfo.completed =
   1667 				raidPtr->copyback_stripes_done;
   1668 			progressInfo.remaining = progressInfo.total -
   1669 				progressInfo.completed;
   1670 		} else {
   1671 			progressInfo.remaining = 0;
   1672 			progressInfo.completed = 100;
   1673 			progressInfo.total = 100;
   1674 		}
   1675 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1676 				  sizeof(RF_ProgressInfo_t));
   1677 		return (retcode);
   1678 
   1679 		/* the sparetable daemon calls this to wait for the kernel to
   1680 		 * need a spare table. this ioctl does not return until a
   1681 		 * spare table is needed. XXX -- calling mpsleep here in the
   1682 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1683 		 * -- I should either compute the spare table in the kernel,
   1684 		 * or have a different -- XXX XXX -- interface (a different
   1685 		 * character device) for delivering the table     -- XXX */
   1686 #if 0
   1687 	case RAIDFRAME_SPARET_WAIT:
   1688 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1689 		while (!rf_sparet_wait_queue)
   1690 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1691 		waitreq = rf_sparet_wait_queue;
   1692 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1693 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1694 
   1695 		/* structure assignment */
   1696 		*((RF_SparetWait_t *) data) = *waitreq;
   1697 
   1698 		RF_Free(waitreq, sizeof(*waitreq));
   1699 		return (0);
   1700 
   1701 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1702 		 * code in it that will cause the dameon to exit */
   1703 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1704 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1705 		waitreq->fcol = -1;
   1706 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1707 		waitreq->next = rf_sparet_wait_queue;
   1708 		rf_sparet_wait_queue = waitreq;
   1709 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1710 		wakeup(&rf_sparet_wait_queue);
   1711 		return (0);
   1712 
   1713 		/* used by the spare table daemon to deliver a spare table
   1714 		 * into the kernel */
   1715 	case RAIDFRAME_SEND_SPARET:
   1716 
   1717 		/* install the spare table */
   1718 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1719 
   1720 		/* respond to the requestor.  the return status of the spare
   1721 		 * table installation is passed in the "fcol" field */
   1722 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1723 		waitreq->fcol = retcode;
   1724 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1725 		waitreq->next = rf_sparet_resp_queue;
   1726 		rf_sparet_resp_queue = waitreq;
   1727 		wakeup(&rf_sparet_resp_queue);
   1728 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1729 
   1730 		return (retcode);
   1731 #endif
   1732 
   1733 	default:
   1734 		break; /* fall through to the os-specific code below */
   1735 
   1736 	}
   1737 
   1738 	if (!raidPtr->valid)
   1739 		return (EINVAL);
   1740 
   1741 	/*
   1742 	 * Add support for "regular" device ioctls here.
   1743 	 */
   1744 
   1745 	switch (cmd) {
   1746 	case DIOCGDINFO:
   1747 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1748 		break;
   1749 #ifdef __HAVE_OLD_DISKLABEL
   1750 	case ODIOCGDINFO:
   1751 		newlabel = *(rs->sc_dkdev.dk_label);
   1752 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1753 			return ENOTTY;
   1754 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1755 		break;
   1756 #endif
   1757 
   1758 	case DIOCGPART:
   1759 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1760 		((struct partinfo *) data)->part =
   1761 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1762 		break;
   1763 
   1764 	case DIOCWDINFO:
   1765 	case DIOCSDINFO:
   1766 #ifdef __HAVE_OLD_DISKLABEL
   1767 	case ODIOCWDINFO:
   1768 	case ODIOCSDINFO:
   1769 #endif
   1770 	{
   1771 		struct disklabel *lp;
   1772 #ifdef __HAVE_OLD_DISKLABEL
   1773 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1774 			memset(&newlabel, 0, sizeof newlabel);
   1775 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1776 			lp = &newlabel;
   1777 		} else
   1778 #endif
   1779 		lp = (struct disklabel *)data;
   1780 
   1781 		if ((error = raidlock(rs)) != 0)
   1782 			return (error);
   1783 
   1784 		rs->sc_flags |= RAIDF_LABELLING;
   1785 
   1786 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1787 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1788 		if (error == 0) {
   1789 			if (cmd == DIOCWDINFO
   1790 #ifdef __HAVE_OLD_DISKLABEL
   1791 			    || cmd == ODIOCWDINFO
   1792 #endif
   1793 			   )
   1794 				error = writedisklabel(RAIDLABELDEV(dev),
   1795 				    raidstrategy, rs->sc_dkdev.dk_label,
   1796 				    rs->sc_dkdev.dk_cpulabel);
   1797 		}
   1798 		rs->sc_flags &= ~RAIDF_LABELLING;
   1799 
   1800 		raidunlock(rs);
   1801 
   1802 		if (error)
   1803 			return (error);
   1804 		break;
   1805 	}
   1806 
   1807 	case DIOCWLABEL:
   1808 		if (*(int *) data != 0)
   1809 			rs->sc_flags |= RAIDF_WLABEL;
   1810 		else
   1811 			rs->sc_flags &= ~RAIDF_WLABEL;
   1812 		break;
   1813 
   1814 	case DIOCGDEFLABEL:
   1815 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1816 		break;
   1817 
   1818 #ifdef __HAVE_OLD_DISKLABEL
   1819 	case ODIOCGDEFLABEL:
   1820 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1821 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1822 			return ENOTTY;
   1823 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1824 		break;
   1825 #endif
   1826 
   1827 	case DIOCAWEDGE:
   1828 	case DIOCDWEDGE:
   1829 	    	dkw = (void *)data;
   1830 
   1831 		/* If the ioctl happens here, the parent is us. */
   1832 		(void)strcpy(dkw->dkw_parent, rs->sc_xname);
   1833 		return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
   1834 
   1835 	case DIOCLWEDGES:
   1836 		return dkwedge_list(&rs->sc_dkdev,
   1837 		    (struct dkwedge_list *)data, l);
   1838 	case DIOCCACHESYNC:
   1839 		return rf_sync_component_caches(raidPtr);
   1840 	default:
   1841 		retcode = ENOTTY;
   1842 	}
   1843 	return (retcode);
   1844 
   1845 }
   1846 
   1847 
   1848 /* raidinit -- complete the rest of the initialization for the
   1849    RAIDframe device.  */
   1850 
   1851 
   1852 static void
   1853 raidinit(RF_Raid_t *raidPtr)
   1854 {
   1855 	struct cfdata *cf;
   1856 	struct raid_softc *rs;
   1857 	int     unit;
   1858 
   1859 	unit = raidPtr->raidid;
   1860 
   1861 	rs = &raid_softc[unit];
   1862 
   1863 	/* XXX should check return code first... */
   1864 	rs->sc_flags |= RAIDF_INITED;
   1865 
   1866 	/* XXX doesn't check bounds. */
   1867 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
   1868 
   1869 	/* attach the pseudo device */
   1870 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
   1871 	cf->cf_name = raid_cd.cd_name;
   1872 	cf->cf_atname = raid_cd.cd_name;
   1873 	cf->cf_unit = unit;
   1874 	cf->cf_fstate = FSTATE_STAR;
   1875 
   1876 	rs->sc_dev = config_attach_pseudo(cf);
   1877 
   1878 	if (rs->sc_dev==NULL) {
   1879 		printf("raid%d: config_attach_pseudo failed\n",
   1880 		       raidPtr->raidid);
   1881 	}
   1882 
   1883 	/* disk_attach actually creates space for the CPU disklabel, among
   1884 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1885 	 * with disklabels. */
   1886 
   1887 	disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
   1888 	disk_attach(&rs->sc_dkdev);
   1889 
   1890 	/* XXX There may be a weird interaction here between this, and
   1891 	 * protectedSectors, as used in RAIDframe.  */
   1892 
   1893 	rs->sc_size = raidPtr->totalSectors;
   1894 
   1895 	dkwedge_discover(&rs->sc_dkdev);
   1896 
   1897 	rf_set_properties(rs, raidPtr);
   1898 
   1899 }
   1900 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   1901 /* wake up the daemon & tell it to get us a spare table
   1902  * XXX
   1903  * the entries in the queues should be tagged with the raidPtr
   1904  * so that in the extremely rare case that two recons happen at once,
   1905  * we know for which device were requesting a spare table
   1906  * XXX
   1907  *
   1908  * XXX This code is not currently used. GO
   1909  */
   1910 int
   1911 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
   1912 {
   1913 	int     retcode;
   1914 
   1915 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1916 	req->next = rf_sparet_wait_queue;
   1917 	rf_sparet_wait_queue = req;
   1918 	wakeup(&rf_sparet_wait_queue);
   1919 
   1920 	/* mpsleep unlocks the mutex */
   1921 	while (!rf_sparet_resp_queue) {
   1922 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1923 		    "raidframe getsparetable", 0);
   1924 	}
   1925 	req = rf_sparet_resp_queue;
   1926 	rf_sparet_resp_queue = req->next;
   1927 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1928 
   1929 	retcode = req->fcol;
   1930 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1931 					 * alloc'd */
   1932 	return (retcode);
   1933 }
   1934 #endif
   1935 
   1936 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1937  * bp & passes it down.
   1938  * any calls originating in the kernel must use non-blocking I/O
   1939  * do some extra sanity checking to return "appropriate" error values for
   1940  * certain conditions (to make some standard utilities work)
   1941  *
   1942  * Formerly known as: rf_DoAccessKernel
   1943  */
   1944 void
   1945 raidstart(RF_Raid_t *raidPtr)
   1946 {
   1947 	RF_SectorCount_t num_blocks, pb, sum;
   1948 	RF_RaidAddr_t raid_addr;
   1949 	struct partition *pp;
   1950 	daddr_t blocknum;
   1951 	int     unit;
   1952 	struct raid_softc *rs;
   1953 	int     do_async;
   1954 	struct buf *bp;
   1955 	int rc;
   1956 
   1957 	unit = raidPtr->raidid;
   1958 	rs = &raid_softc[unit];
   1959 
   1960 	/* quick check to see if anything has died recently */
   1961 	RF_LOCK_MUTEX(raidPtr->mutex);
   1962 	if (raidPtr->numNewFailures > 0) {
   1963 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1964 		rf_update_component_labels(raidPtr,
   1965 					   RF_NORMAL_COMPONENT_UPDATE);
   1966 		RF_LOCK_MUTEX(raidPtr->mutex);
   1967 		raidPtr->numNewFailures--;
   1968 	}
   1969 
   1970 	/* Check to see if we're at the limit... */
   1971 	while (raidPtr->openings > 0) {
   1972 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1973 
   1974 		/* get the next item, if any, from the queue */
   1975 		if ((bp = bufq_get(rs->buf_queue)) == NULL) {
   1976 			/* nothing more to do */
   1977 			return;
   1978 		}
   1979 
   1980 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1981 		 * partition.. Need to make it absolute to the underlying
   1982 		 * device.. */
   1983 
   1984 		blocknum = bp->b_blkno;
   1985 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1986 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1987 			blocknum += pp->p_offset;
   1988 		}
   1989 
   1990 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1991 			    (int) blocknum));
   1992 
   1993 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1994 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1995 
   1996 		/* *THIS* is where we adjust what block we're going to...
   1997 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1998 		raid_addr = blocknum;
   1999 
   2000 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   2001 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   2002 		sum = raid_addr + num_blocks + pb;
   2003 		if (1 || rf_debugKernelAccess) {
   2004 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   2005 				    (int) raid_addr, (int) sum, (int) num_blocks,
   2006 				    (int) pb, (int) bp->b_resid));
   2007 		}
   2008 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   2009 		    || (sum < num_blocks) || (sum < pb)) {
   2010 			bp->b_error = ENOSPC;
   2011 			bp->b_resid = bp->b_bcount;
   2012 			biodone(bp);
   2013 			RF_LOCK_MUTEX(raidPtr->mutex);
   2014 			continue;
   2015 		}
   2016 		/*
   2017 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   2018 		 */
   2019 
   2020 		if (bp->b_bcount & raidPtr->sectorMask) {
   2021 			bp->b_error = EINVAL;
   2022 			bp->b_resid = bp->b_bcount;
   2023 			biodone(bp);
   2024 			RF_LOCK_MUTEX(raidPtr->mutex);
   2025 			continue;
   2026 
   2027 		}
   2028 		db1_printf(("Calling DoAccess..\n"));
   2029 
   2030 
   2031 		RF_LOCK_MUTEX(raidPtr->mutex);
   2032 		raidPtr->openings--;
   2033 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   2034 
   2035 		/*
   2036 		 * Everything is async.
   2037 		 */
   2038 		do_async = 1;
   2039 
   2040 		disk_busy(&rs->sc_dkdev);
   2041 
   2042 		/* XXX we're still at splbio() here... do we *really*
   2043 		   need to be? */
   2044 
   2045 		/* don't ever condition on bp->b_flags & B_WRITE.
   2046 		 * always condition on B_READ instead */
   2047 
   2048 		rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   2049 				 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   2050 				 do_async, raid_addr, num_blocks,
   2051 				 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   2052 
   2053 		if (rc) {
   2054 			bp->b_error = rc;
   2055 			bp->b_resid = bp->b_bcount;
   2056 			biodone(bp);
   2057 			/* continue loop */
   2058 		}
   2059 
   2060 		RF_LOCK_MUTEX(raidPtr->mutex);
   2061 	}
   2062 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   2063 }
   2064 
   2065 
   2066 
   2067 
   2068 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   2069 
   2070 int
   2071 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
   2072 {
   2073 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   2074 	struct buf *bp;
   2075 
   2076 	req->queue = queue;
   2077 
   2078 #if DIAGNOSTIC
   2079 	if (queue->raidPtr->raidid >= numraid) {
   2080 		printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
   2081 		    numraid);
   2082 		panic("Invalid Unit number in rf_DispatchKernelIO");
   2083 	}
   2084 #endif
   2085 
   2086 	bp = req->bp;
   2087 
   2088 	switch (req->type) {
   2089 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   2090 		/* XXX need to do something extra here.. */
   2091 		/* I'm leaving this in, as I've never actually seen it used,
   2092 		 * and I'd like folks to report it... GO */
   2093 		printf(("WAKEUP CALLED\n"));
   2094 		queue->numOutstanding++;
   2095 
   2096 		bp->b_flags = 0;
   2097 		bp->b_private = req;
   2098 
   2099 		KernelWakeupFunc(bp);
   2100 		break;
   2101 
   2102 	case RF_IO_TYPE_READ:
   2103 	case RF_IO_TYPE_WRITE:
   2104 #if RF_ACC_TRACE > 0
   2105 		if (req->tracerec) {
   2106 			RF_ETIMER_START(req->tracerec->timer);
   2107 		}
   2108 #endif
   2109 		InitBP(bp, queue->rf_cinfo->ci_vp,
   2110 		    op, queue->rf_cinfo->ci_dev,
   2111 		    req->sectorOffset, req->numSector,
   2112 		    req->buf, KernelWakeupFunc, (void *) req,
   2113 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   2114 
   2115 		if (rf_debugKernelAccess) {
   2116 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   2117 				(long) bp->b_blkno));
   2118 		}
   2119 		queue->numOutstanding++;
   2120 		queue->last_deq_sector = req->sectorOffset;
   2121 		/* acc wouldn't have been let in if there were any pending
   2122 		 * reqs at any other priority */
   2123 		queue->curPriority = req->priority;
   2124 
   2125 		db1_printf(("Going for %c to unit %d col %d\n",
   2126 			    req->type, queue->raidPtr->raidid,
   2127 			    queue->col));
   2128 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   2129 			(int) req->sectorOffset, (int) req->numSector,
   2130 			(int) (req->numSector <<
   2131 			    queue->raidPtr->logBytesPerSector),
   2132 			(int) queue->raidPtr->logBytesPerSector));
   2133 		bdev_strategy(bp);
   2134 
   2135 		break;
   2136 
   2137 	default:
   2138 		panic("bad req->type in rf_DispatchKernelIO");
   2139 	}
   2140 	db1_printf(("Exiting from DispatchKernelIO\n"));
   2141 
   2142 	return (0);
   2143 }
   2144 /* this is the callback function associated with a I/O invoked from
   2145    kernel code.
   2146  */
   2147 static void
   2148 KernelWakeupFunc(struct buf *bp)
   2149 {
   2150 	RF_DiskQueueData_t *req = NULL;
   2151 	RF_DiskQueue_t *queue;
   2152 	int s;
   2153 
   2154 	s = splbio();
   2155 	db1_printf(("recovering the request queue:\n"));
   2156 	req = bp->b_private;
   2157 
   2158 	queue = (RF_DiskQueue_t *) req->queue;
   2159 
   2160 #if RF_ACC_TRACE > 0
   2161 	if (req->tracerec) {
   2162 		RF_ETIMER_STOP(req->tracerec->timer);
   2163 		RF_ETIMER_EVAL(req->tracerec->timer);
   2164 		RF_LOCK_MUTEX(rf_tracing_mutex);
   2165 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2166 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2167 		req->tracerec->num_phys_ios++;
   2168 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   2169 	}
   2170 #endif
   2171 
   2172 	/* XXX Ok, let's get aggressive... If b_error is set, let's go
   2173 	 * ballistic, and mark the component as hosed... */
   2174 
   2175 	if (bp->b_error != 0) {
   2176 		/* Mark the disk as dead */
   2177 		/* but only mark it once... */
   2178 		/* and only if it wouldn't leave this RAID set
   2179 		   completely broken */
   2180 		if (((queue->raidPtr->Disks[queue->col].status ==
   2181 		      rf_ds_optimal) ||
   2182 		     (queue->raidPtr->Disks[queue->col].status ==
   2183 		      rf_ds_used_spare)) &&
   2184 		     (queue->raidPtr->numFailures <
   2185 		      queue->raidPtr->Layout.map->faultsTolerated)) {
   2186 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   2187 			       queue->raidPtr->raidid,
   2188 			       queue->raidPtr->Disks[queue->col].devname);
   2189 			queue->raidPtr->Disks[queue->col].status =
   2190 			    rf_ds_failed;
   2191 			queue->raidPtr->status = rf_rs_degraded;
   2192 			queue->raidPtr->numFailures++;
   2193 			queue->raidPtr->numNewFailures++;
   2194 		} else {	/* Disk is already dead... */
   2195 			/* printf("Disk already marked as dead!\n"); */
   2196 		}
   2197 
   2198 	}
   2199 
   2200 	/* Fill in the error value */
   2201 
   2202 	req->error = bp->b_error;
   2203 
   2204 	simple_lock(&queue->raidPtr->iodone_lock);
   2205 
   2206 	/* Drop this one on the "finished" queue... */
   2207 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   2208 
   2209 	/* Let the raidio thread know there is work to be done. */
   2210 	wakeup(&(queue->raidPtr->iodone));
   2211 
   2212 	simple_unlock(&queue->raidPtr->iodone_lock);
   2213 
   2214 	splx(s);
   2215 }
   2216 
   2217 
   2218 
   2219 /*
   2220  * initialize a buf structure for doing an I/O in the kernel.
   2221  */
   2222 static void
   2223 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
   2224        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
   2225        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
   2226        struct proc *b_proc)
   2227 {
   2228 	/* bp->b_flags       = B_PHYS | rw_flag; */
   2229 	bp->b_flags = rw_flag;	/* XXX need B_PHYS here too??? */
   2230 	bp->b_oflags = 0;
   2231 	bp->b_cflags = 0;
   2232 	bp->b_bcount = numSect << logBytesPerSector;
   2233 	bp->b_bufsize = bp->b_bcount;
   2234 	bp->b_error = 0;
   2235 	bp->b_dev = dev;
   2236 	bp->b_data = bf;
   2237 	bp->b_blkno = startSect;
   2238 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   2239 	if (bp->b_bcount == 0) {
   2240 		panic("bp->b_bcount is zero in InitBP!!");
   2241 	}
   2242 	bp->b_proc = b_proc;
   2243 	bp->b_iodone = cbFunc;
   2244 	bp->b_private = cbArg;
   2245 }
   2246 
   2247 static void
   2248 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
   2249 		    struct disklabel *lp)
   2250 {
   2251 	memset(lp, 0, sizeof(*lp));
   2252 
   2253 	/* fabricate a label... */
   2254 	lp->d_secperunit = raidPtr->totalSectors;
   2255 	lp->d_secsize = raidPtr->bytesPerSector;
   2256 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2257 	lp->d_ntracks = 4 * raidPtr->numCol;
   2258 	lp->d_ncylinders = raidPtr->totalSectors /
   2259 		(lp->d_nsectors * lp->d_ntracks);
   2260 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2261 
   2262 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2263 	lp->d_type = DTYPE_RAID;
   2264 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2265 	lp->d_rpm = 3600;
   2266 	lp->d_interleave = 1;
   2267 	lp->d_flags = 0;
   2268 
   2269 	lp->d_partitions[RAW_PART].p_offset = 0;
   2270 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2271 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2272 	lp->d_npartitions = RAW_PART + 1;
   2273 
   2274 	lp->d_magic = DISKMAGIC;
   2275 	lp->d_magic2 = DISKMAGIC;
   2276 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2277 
   2278 }
   2279 /*
   2280  * Read the disklabel from the raid device.  If one is not present, fake one
   2281  * up.
   2282  */
   2283 static void
   2284 raidgetdisklabel(dev_t dev)
   2285 {
   2286 	int     unit = raidunit(dev);
   2287 	struct raid_softc *rs = &raid_softc[unit];
   2288 	const char   *errstring;
   2289 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2290 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2291 	RF_Raid_t *raidPtr;
   2292 
   2293 	db1_printf(("Getting the disklabel...\n"));
   2294 
   2295 	memset(clp, 0, sizeof(*clp));
   2296 
   2297 	raidPtr = raidPtrs[unit];
   2298 
   2299 	raidgetdefaultlabel(raidPtr, rs, lp);
   2300 
   2301 	/*
   2302 	 * Call the generic disklabel extraction routine.
   2303 	 */
   2304 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2305 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2306 	if (errstring)
   2307 		raidmakedisklabel(rs);
   2308 	else {
   2309 		int     i;
   2310 		struct partition *pp;
   2311 
   2312 		/*
   2313 		 * Sanity check whether the found disklabel is valid.
   2314 		 *
   2315 		 * This is necessary since total size of the raid device
   2316 		 * may vary when an interleave is changed even though exactly
   2317 		 * same components are used, and old disklabel may used
   2318 		 * if that is found.
   2319 		 */
   2320 		if (lp->d_secperunit != rs->sc_size)
   2321 			printf("raid%d: WARNING: %s: "
   2322 			    "total sector size in disklabel (%d) != "
   2323 			    "the size of raid (%ld)\n", unit, rs->sc_xname,
   2324 			    lp->d_secperunit, (long) rs->sc_size);
   2325 		for (i = 0; i < lp->d_npartitions; i++) {
   2326 			pp = &lp->d_partitions[i];
   2327 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2328 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2329 				       "exceeds the size of raid (%ld)\n",
   2330 				       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2331 		}
   2332 	}
   2333 
   2334 }
   2335 /*
   2336  * Take care of things one might want to take care of in the event
   2337  * that a disklabel isn't present.
   2338  */
   2339 static void
   2340 raidmakedisklabel(struct raid_softc *rs)
   2341 {
   2342 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2343 	db1_printf(("Making a label..\n"));
   2344 
   2345 	/*
   2346 	 * For historical reasons, if there's no disklabel present
   2347 	 * the raw partition must be marked FS_BSDFFS.
   2348 	 */
   2349 
   2350 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2351 
   2352 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2353 
   2354 	lp->d_checksum = dkcksum(lp);
   2355 }
   2356 /*
   2357  * Wait interruptibly for an exclusive lock.
   2358  *
   2359  * XXX
   2360  * Several drivers do this; it should be abstracted and made MP-safe.
   2361  * (Hmm... where have we seen this warning before :->  GO )
   2362  */
   2363 static int
   2364 raidlock(struct raid_softc *rs)
   2365 {
   2366 	int     error;
   2367 
   2368 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2369 		rs->sc_flags |= RAIDF_WANTED;
   2370 		if ((error =
   2371 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2372 			return (error);
   2373 	}
   2374 	rs->sc_flags |= RAIDF_LOCKED;
   2375 	return (0);
   2376 }
   2377 /*
   2378  * Unlock and wake up any waiters.
   2379  */
   2380 static void
   2381 raidunlock(struct raid_softc *rs)
   2382 {
   2383 
   2384 	rs->sc_flags &= ~RAIDF_LOCKED;
   2385 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2386 		rs->sc_flags &= ~RAIDF_WANTED;
   2387 		wakeup(rs);
   2388 	}
   2389 }
   2390 
   2391 
   2392 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2393 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2394 
   2395 int
   2396 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2397 {
   2398 	RF_ComponentLabel_t clabel;
   2399 	raidread_component_label(dev, b_vp, &clabel);
   2400 	clabel.mod_counter = mod_counter;
   2401 	clabel.clean = RF_RAID_CLEAN;
   2402 	raidwrite_component_label(dev, b_vp, &clabel);
   2403 	return(0);
   2404 }
   2405 
   2406 
   2407 int
   2408 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2409 {
   2410 	RF_ComponentLabel_t clabel;
   2411 	raidread_component_label(dev, b_vp, &clabel);
   2412 	clabel.mod_counter = mod_counter;
   2413 	clabel.clean = RF_RAID_DIRTY;
   2414 	raidwrite_component_label(dev, b_vp, &clabel);
   2415 	return(0);
   2416 }
   2417 
   2418 /* ARGSUSED */
   2419 int
   2420 raidread_component_label(dev_t dev, struct vnode *b_vp,
   2421 			 RF_ComponentLabel_t *clabel)
   2422 {
   2423 	struct buf *bp;
   2424 	const struct bdevsw *bdev;
   2425 	int error;
   2426 
   2427 	/* XXX should probably ensure that we don't try to do this if
   2428 	   someone has changed rf_protected_sectors. */
   2429 
   2430 	if (b_vp == NULL) {
   2431 		/* For whatever reason, this component is not valid.
   2432 		   Don't try to read a component label from it. */
   2433 		return(EINVAL);
   2434 	}
   2435 
   2436 	/* get a block of the appropriate size... */
   2437 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2438 	bp->b_dev = dev;
   2439 
   2440 	/* get our ducks in a row for the read */
   2441 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2442 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2443 	bp->b_flags |= B_READ;
   2444  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2445 
   2446 	bdev = bdevsw_lookup(bp->b_dev);
   2447 	if (bdev == NULL)
   2448 		return (ENXIO);
   2449 	(*bdev->d_strategy)(bp);
   2450 
   2451 	error = biowait(bp);
   2452 
   2453 	if (!error) {
   2454 		memcpy(clabel, bp->b_data,
   2455 		       sizeof(RF_ComponentLabel_t));
   2456 	}
   2457 
   2458 	brelse(bp, 0);
   2459 	return(error);
   2460 }
   2461 /* ARGSUSED */
   2462 int
   2463 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
   2464 			  RF_ComponentLabel_t *clabel)
   2465 {
   2466 	struct buf *bp;
   2467 	const struct bdevsw *bdev;
   2468 	int error;
   2469 
   2470 	/* get a block of the appropriate size... */
   2471 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2472 	bp->b_dev = dev;
   2473 
   2474 	/* get our ducks in a row for the write */
   2475 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2476 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2477 	bp->b_flags |= B_WRITE;
   2478  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2479 
   2480 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2481 
   2482 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2483 
   2484 	bdev = bdevsw_lookup(bp->b_dev);
   2485 	if (bdev == NULL)
   2486 		return (ENXIO);
   2487 	(*bdev->d_strategy)(bp);
   2488 	error = biowait(bp);
   2489 	brelse(bp, 0);
   2490 	if (error) {
   2491 #if 1
   2492 		printf("Failed to write RAID component info!\n");
   2493 #endif
   2494 	}
   2495 
   2496 	return(error);
   2497 }
   2498 
   2499 void
   2500 rf_markalldirty(RF_Raid_t *raidPtr)
   2501 {
   2502 	RF_ComponentLabel_t clabel;
   2503 	int sparecol;
   2504 	int c;
   2505 	int j;
   2506 	int scol = -1;
   2507 
   2508 	raidPtr->mod_counter++;
   2509 	for (c = 0; c < raidPtr->numCol; c++) {
   2510 		/* we don't want to touch (at all) a disk that has
   2511 		   failed */
   2512 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   2513 			raidread_component_label(
   2514 						 raidPtr->Disks[c].dev,
   2515 						 raidPtr->raid_cinfo[c].ci_vp,
   2516 						 &clabel);
   2517 			if (clabel.status == rf_ds_spared) {
   2518 				/* XXX do something special...
   2519 				   but whatever you do, don't
   2520 				   try to access it!! */
   2521 			} else {
   2522 				raidmarkdirty(
   2523 					      raidPtr->Disks[c].dev,
   2524 					      raidPtr->raid_cinfo[c].ci_vp,
   2525 					      raidPtr->mod_counter);
   2526 			}
   2527 		}
   2528 	}
   2529 
   2530 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2531 		sparecol = raidPtr->numCol + c;
   2532 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2533 			/*
   2534 
   2535 			   we claim this disk is "optimal" if it's
   2536 			   rf_ds_used_spare, as that means it should be
   2537 			   directly substitutable for the disk it replaced.
   2538 			   We note that too...
   2539 
   2540 			 */
   2541 
   2542 			for(j=0;j<raidPtr->numCol;j++) {
   2543 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2544 					scol = j;
   2545 					break;
   2546 				}
   2547 			}
   2548 
   2549 			raidread_component_label(
   2550 				 raidPtr->Disks[sparecol].dev,
   2551 				 raidPtr->raid_cinfo[sparecol].ci_vp,
   2552 				 &clabel);
   2553 			/* make sure status is noted */
   2554 
   2555 			raid_init_component_label(raidPtr, &clabel);
   2556 
   2557 			clabel.row = 0;
   2558 			clabel.column = scol;
   2559 			/* Note: we *don't* change status from rf_ds_used_spare
   2560 			   to rf_ds_optimal */
   2561 			/* clabel.status = rf_ds_optimal; */
   2562 
   2563 			raidmarkdirty(raidPtr->Disks[sparecol].dev,
   2564 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2565 				      raidPtr->mod_counter);
   2566 		}
   2567 	}
   2568 }
   2569 
   2570 
   2571 void
   2572 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
   2573 {
   2574 	RF_ComponentLabel_t clabel;
   2575 	int sparecol;
   2576 	int c;
   2577 	int j;
   2578 	int scol;
   2579 
   2580 	scol = -1;
   2581 
   2582 	/* XXX should do extra checks to make sure things really are clean,
   2583 	   rather than blindly setting the clean bit... */
   2584 
   2585 	raidPtr->mod_counter++;
   2586 
   2587 	for (c = 0; c < raidPtr->numCol; c++) {
   2588 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   2589 			raidread_component_label(
   2590 						 raidPtr->Disks[c].dev,
   2591 						 raidPtr->raid_cinfo[c].ci_vp,
   2592 						 &clabel);
   2593 			/* make sure status is noted */
   2594 			clabel.status = rf_ds_optimal;
   2595 
   2596 			/* bump the counter */
   2597 			clabel.mod_counter = raidPtr->mod_counter;
   2598 
   2599 			/* note what unit we are configured as */
   2600 			clabel.last_unit = raidPtr->raidid;
   2601 
   2602 			raidwrite_component_label(
   2603 						  raidPtr->Disks[c].dev,
   2604 						  raidPtr->raid_cinfo[c].ci_vp,
   2605 						  &clabel);
   2606 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2607 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2608 					raidmarkclean(
   2609 						      raidPtr->Disks[c].dev,
   2610 						      raidPtr->raid_cinfo[c].ci_vp,
   2611 						      raidPtr->mod_counter);
   2612 				}
   2613 			}
   2614 		}
   2615 		/* else we don't touch it.. */
   2616 	}
   2617 
   2618 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2619 		sparecol = raidPtr->numCol + c;
   2620 		/* Need to ensure that the reconstruct actually completed! */
   2621 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2622 			/*
   2623 
   2624 			   we claim this disk is "optimal" if it's
   2625 			   rf_ds_used_spare, as that means it should be
   2626 			   directly substitutable for the disk it replaced.
   2627 			   We note that too...
   2628 
   2629 			 */
   2630 
   2631 			for(j=0;j<raidPtr->numCol;j++) {
   2632 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2633 					scol = j;
   2634 					break;
   2635 				}
   2636 			}
   2637 
   2638 			/* XXX shouldn't *really* need this... */
   2639 			raidread_component_label(
   2640 				      raidPtr->Disks[sparecol].dev,
   2641 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2642 				      &clabel);
   2643 			/* make sure status is noted */
   2644 
   2645 			raid_init_component_label(raidPtr, &clabel);
   2646 
   2647 			clabel.mod_counter = raidPtr->mod_counter;
   2648 			clabel.column = scol;
   2649 			clabel.status = rf_ds_optimal;
   2650 			clabel.last_unit = raidPtr->raidid;
   2651 
   2652 			raidwrite_component_label(
   2653 				      raidPtr->Disks[sparecol].dev,
   2654 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2655 				      &clabel);
   2656 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2657 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2658 					raidmarkclean( raidPtr->Disks[sparecol].dev,
   2659 						       raidPtr->raid_cinfo[sparecol].ci_vp,
   2660 						       raidPtr->mod_counter);
   2661 				}
   2662 			}
   2663 		}
   2664 	}
   2665 }
   2666 
   2667 void
   2668 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
   2669 {
   2670 
   2671 	if (vp != NULL) {
   2672 		if (auto_configured == 1) {
   2673 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2674 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2675 			vput(vp);
   2676 
   2677 		} else {
   2678 			(void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
   2679 		}
   2680 	}
   2681 }
   2682 
   2683 
   2684 void
   2685 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
   2686 {
   2687 	int r,c;
   2688 	struct vnode *vp;
   2689 	int acd;
   2690 
   2691 
   2692 	/* We take this opportunity to close the vnodes like we should.. */
   2693 
   2694 	for (c = 0; c < raidPtr->numCol; c++) {
   2695 		vp = raidPtr->raid_cinfo[c].ci_vp;
   2696 		acd = raidPtr->Disks[c].auto_configured;
   2697 		rf_close_component(raidPtr, vp, acd);
   2698 		raidPtr->raid_cinfo[c].ci_vp = NULL;
   2699 		raidPtr->Disks[c].auto_configured = 0;
   2700 	}
   2701 
   2702 	for (r = 0; r < raidPtr->numSpare; r++) {
   2703 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
   2704 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
   2705 		rf_close_component(raidPtr, vp, acd);
   2706 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
   2707 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
   2708 	}
   2709 }
   2710 
   2711 
   2712 void
   2713 rf_ReconThread(struct rf_recon_req *req)
   2714 {
   2715 	int     s;
   2716 	RF_Raid_t *raidPtr;
   2717 
   2718 	s = splbio();
   2719 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2720 	raidPtr->recon_in_progress = 1;
   2721 
   2722 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
   2723 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2724 
   2725 	RF_Free(req, sizeof(*req));
   2726 
   2727 	raidPtr->recon_in_progress = 0;
   2728 	splx(s);
   2729 
   2730 	/* That's all... */
   2731 	kthread_exit(0);	/* does not return */
   2732 }
   2733 
   2734 void
   2735 rf_RewriteParityThread(RF_Raid_t *raidPtr)
   2736 {
   2737 	int retcode;
   2738 	int s;
   2739 
   2740 	raidPtr->parity_rewrite_stripes_done = 0;
   2741 	raidPtr->parity_rewrite_in_progress = 1;
   2742 	s = splbio();
   2743 	retcode = rf_RewriteParity(raidPtr);
   2744 	splx(s);
   2745 	if (retcode) {
   2746 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2747 	} else {
   2748 		/* set the clean bit!  If we shutdown correctly,
   2749 		   the clean bit on each component label will get
   2750 		   set */
   2751 		raidPtr->parity_good = RF_RAID_CLEAN;
   2752 	}
   2753 	raidPtr->parity_rewrite_in_progress = 0;
   2754 
   2755 	/* Anyone waiting for us to stop?  If so, inform them... */
   2756 	if (raidPtr->waitShutdown) {
   2757 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2758 	}
   2759 
   2760 	/* That's all... */
   2761 	kthread_exit(0);	/* does not return */
   2762 }
   2763 
   2764 
   2765 void
   2766 rf_CopybackThread(RF_Raid_t *raidPtr)
   2767 {
   2768 	int s;
   2769 
   2770 	raidPtr->copyback_in_progress = 1;
   2771 	s = splbio();
   2772 	rf_CopybackReconstructedData(raidPtr);
   2773 	splx(s);
   2774 	raidPtr->copyback_in_progress = 0;
   2775 
   2776 	/* That's all... */
   2777 	kthread_exit(0);	/* does not return */
   2778 }
   2779 
   2780 
   2781 void
   2782 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
   2783 {
   2784 	int s;
   2785 	RF_Raid_t *raidPtr;
   2786 
   2787 	s = splbio();
   2788 	raidPtr = req->raidPtr;
   2789 	raidPtr->recon_in_progress = 1;
   2790 	rf_ReconstructInPlace(raidPtr, req->col);
   2791 	RF_Free(req, sizeof(*req));
   2792 	raidPtr->recon_in_progress = 0;
   2793 	splx(s);
   2794 
   2795 	/* That's all... */
   2796 	kthread_exit(0);	/* does not return */
   2797 }
   2798 
   2799 static RF_AutoConfig_t *
   2800 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
   2801     const char *cname, RF_SectorCount_t size)
   2802 {
   2803 	int good_one = 0;
   2804 	RF_ComponentLabel_t *clabel;
   2805 	RF_AutoConfig_t *ac;
   2806 
   2807 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
   2808 	if (clabel == NULL) {
   2809 oomem:
   2810 		    while(ac_list) {
   2811 			    ac = ac_list;
   2812 			    if (ac->clabel)
   2813 				    free(ac->clabel, M_RAIDFRAME);
   2814 			    ac_list = ac_list->next;
   2815 			    free(ac, M_RAIDFRAME);
   2816 		    }
   2817 		    printf("RAID auto config: out of memory!\n");
   2818 		    return NULL; /* XXX probably should panic? */
   2819 	}
   2820 
   2821 	if (!raidread_component_label(dev, vp, clabel)) {
   2822 		    /* Got the label.  Does it look reasonable? */
   2823 		    if (rf_reasonable_label(clabel) &&
   2824 			(clabel->partitionSize <= size)) {
   2825 #ifdef DEBUG
   2826 			    printf("Component on: %s: %llu\n",
   2827 				cname, (unsigned long long)size);
   2828 			    rf_print_component_label(clabel);
   2829 #endif
   2830 			    /* if it's reasonable, add it, else ignore it. */
   2831 			    ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
   2832 				M_NOWAIT);
   2833 			    if (ac == NULL) {
   2834 				    free(clabel, M_RAIDFRAME);
   2835 				    goto oomem;
   2836 			    }
   2837 			    strlcpy(ac->devname, cname, sizeof(ac->devname));
   2838 			    ac->dev = dev;
   2839 			    ac->vp = vp;
   2840 			    ac->clabel = clabel;
   2841 			    ac->next = ac_list;
   2842 			    ac_list = ac;
   2843 			    good_one = 1;
   2844 		    }
   2845 	}
   2846 	if (!good_one) {
   2847 		/* cleanup */
   2848 		free(clabel, M_RAIDFRAME);
   2849 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2850 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2851 		vput(vp);
   2852 	}
   2853 	return ac_list;
   2854 }
   2855 
   2856 RF_AutoConfig_t *
   2857 rf_find_raid_components()
   2858 {
   2859 	struct vnode *vp;
   2860 	struct disklabel label;
   2861 	struct device *dv;
   2862 	dev_t dev;
   2863 	int bmajor, bminor, wedge;
   2864 	int error;
   2865 	int i;
   2866 	RF_AutoConfig_t *ac_list;
   2867 
   2868 
   2869 	/* initialize the AutoConfig list */
   2870 	ac_list = NULL;
   2871 
   2872 	/* we begin by trolling through *all* the devices on the system */
   2873 
   2874 	for (dv = alldevs.tqh_first; dv != NULL;
   2875 	     dv = dv->dv_list.tqe_next) {
   2876 
   2877 		/* we are only interested in disks... */
   2878 		if (device_class(dv) != DV_DISK)
   2879 			continue;
   2880 
   2881 		/* we don't care about floppies... */
   2882 		if (device_is_a(dv, "fd")) {
   2883 			continue;
   2884 		}
   2885 
   2886 		/* we don't care about CD's... */
   2887 		if (device_is_a(dv, "cd")) {
   2888 			continue;
   2889 		}
   2890 
   2891 		/* we don't care about md's... */
   2892 		if (device_is_a(dv, "md")) {
   2893 			continue;
   2894 		}
   2895 
   2896 		/* hdfd is the Atari/Hades floppy driver */
   2897 		if (device_is_a(dv, "hdfd")) {
   2898 			continue;
   2899 		}
   2900 
   2901 		/* fdisa is the Atari/Milan floppy driver */
   2902 		if (device_is_a(dv, "fdisa")) {
   2903 			continue;
   2904 		}
   2905 
   2906 		/* need to find the device_name_to_block_device_major stuff */
   2907 		bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
   2908 
   2909 		/* get a vnode for the raw partition of this disk */
   2910 
   2911 		wedge = device_is_a(dv, "dk");
   2912 		bminor = minor(device_unit(dv));
   2913 		dev = wedge ? makedev(bmajor, bminor) :
   2914 		    MAKEDISKDEV(bmajor, bminor, RAW_PART);
   2915 		if (bdevvp(dev, &vp))
   2916 			panic("RAID can't alloc vnode");
   2917 
   2918 		error = VOP_OPEN(vp, FREAD, NOCRED);
   2919 
   2920 		if (error) {
   2921 			/* "Who cares."  Continue looking
   2922 			   for something that exists*/
   2923 			vput(vp);
   2924 			continue;
   2925 		}
   2926 
   2927 		if (wedge) {
   2928 			struct dkwedge_info dkw;
   2929 			error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
   2930 			    NOCRED);
   2931 			if (error) {
   2932 				printf("RAIDframe: can't get wedge info for "
   2933 				    "dev %s (%d)\n", device_xname(dv), error);
   2934 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2935 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2936 				vput(vp);
   2937 				continue;
   2938 			}
   2939 
   2940 			if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
   2941 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2942 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2943 				vput(vp);
   2944 				continue;
   2945 			}
   2946 
   2947 			ac_list = rf_get_component(ac_list, dev, vp,
   2948 			    device_xname(dv), dkw.dkw_size);
   2949 			continue;
   2950 		}
   2951 
   2952 		/* Ok, the disk exists.  Go get the disklabel. */
   2953 		error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
   2954 		if (error) {
   2955 			/*
   2956 			 * XXX can't happen - open() would
   2957 			 * have errored out (or faked up one)
   2958 			 */
   2959 			if (error != ENOTTY)
   2960 				printf("RAIDframe: can't get label for dev "
   2961 				    "%s (%d)\n", device_xname(dv), error);
   2962 		}
   2963 
   2964 		/* don't need this any more.  We'll allocate it again
   2965 		   a little later if we really do... */
   2966 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2967 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2968 		vput(vp);
   2969 
   2970 		if (error)
   2971 			continue;
   2972 
   2973 		for (i = 0; i < label.d_npartitions; i++) {
   2974 			char cname[sizeof(ac_list->devname)];
   2975 
   2976 			/* We only support partitions marked as RAID */
   2977 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2978 				continue;
   2979 
   2980 			dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
   2981 			if (bdevvp(dev, &vp))
   2982 				panic("RAID can't alloc vnode");
   2983 
   2984 			error = VOP_OPEN(vp, FREAD, NOCRED);
   2985 			if (error) {
   2986 				/* Whatever... */
   2987 				vput(vp);
   2988 				continue;
   2989 			}
   2990 			snprintf(cname, sizeof(cname), "%s%c",
   2991 			    device_xname(dv), 'a' + i);
   2992 			ac_list = rf_get_component(ac_list, dev, vp, cname,
   2993 				label.d_partitions[i].p_size);
   2994 		}
   2995 	}
   2996 	return ac_list;
   2997 }
   2998 
   2999 
   3000 static int
   3001 rf_reasonable_label(RF_ComponentLabel_t *clabel)
   3002 {
   3003 
   3004 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   3005 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   3006 	    ((clabel->clean == RF_RAID_CLEAN) ||
   3007 	     (clabel->clean == RF_RAID_DIRTY)) &&
   3008 	    clabel->row >=0 &&
   3009 	    clabel->column >= 0 &&
   3010 	    clabel->num_rows > 0 &&
   3011 	    clabel->num_columns > 0 &&
   3012 	    clabel->row < clabel->num_rows &&
   3013 	    clabel->column < clabel->num_columns &&
   3014 	    clabel->blockSize > 0 &&
   3015 	    clabel->numBlocks > 0) {
   3016 		/* label looks reasonable enough... */
   3017 		return(1);
   3018 	}
   3019 	return(0);
   3020 }
   3021 
   3022 
   3023 #ifdef DEBUG
   3024 void
   3025 rf_print_component_label(RF_ComponentLabel_t *clabel)
   3026 {
   3027 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   3028 	       clabel->row, clabel->column,
   3029 	       clabel->num_rows, clabel->num_columns);
   3030 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   3031 	       clabel->version, clabel->serial_number,
   3032 	       clabel->mod_counter);
   3033 	printf("   Clean: %s Status: %d\n",
   3034 	       clabel->clean ? "Yes" : "No", clabel->status );
   3035 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   3036 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   3037 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   3038 	       (char) clabel->parityConfig, clabel->blockSize,
   3039 	       clabel->numBlocks);
   3040 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   3041 	printf("   Contains root partition: %s\n",
   3042 	       clabel->root_partition ? "Yes" : "No" );
   3043 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   3044 #if 0
   3045 	   printf("   Config order: %d\n", clabel->config_order);
   3046 #endif
   3047 
   3048 }
   3049 #endif
   3050 
   3051 RF_ConfigSet_t *
   3052 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
   3053 {
   3054 	RF_AutoConfig_t *ac;
   3055 	RF_ConfigSet_t *config_sets;
   3056 	RF_ConfigSet_t *cset;
   3057 	RF_AutoConfig_t *ac_next;
   3058 
   3059 
   3060 	config_sets = NULL;
   3061 
   3062 	/* Go through the AutoConfig list, and figure out which components
   3063 	   belong to what sets.  */
   3064 	ac = ac_list;
   3065 	while(ac!=NULL) {
   3066 		/* we're going to putz with ac->next, so save it here
   3067 		   for use at the end of the loop */
   3068 		ac_next = ac->next;
   3069 
   3070 		if (config_sets == NULL) {
   3071 			/* will need at least this one... */
   3072 			config_sets = (RF_ConfigSet_t *)
   3073 				malloc(sizeof(RF_ConfigSet_t),
   3074 				       M_RAIDFRAME, M_NOWAIT);
   3075 			if (config_sets == NULL) {
   3076 				panic("rf_create_auto_sets: No memory!");
   3077 			}
   3078 			/* this one is easy :) */
   3079 			config_sets->ac = ac;
   3080 			config_sets->next = NULL;
   3081 			config_sets->rootable = 0;
   3082 			ac->next = NULL;
   3083 		} else {
   3084 			/* which set does this component fit into? */
   3085 			cset = config_sets;
   3086 			while(cset!=NULL) {
   3087 				if (rf_does_it_fit(cset, ac)) {
   3088 					/* looks like it matches... */
   3089 					ac->next = cset->ac;
   3090 					cset->ac = ac;
   3091 					break;
   3092 				}
   3093 				cset = cset->next;
   3094 			}
   3095 			if (cset==NULL) {
   3096 				/* didn't find a match above... new set..*/
   3097 				cset = (RF_ConfigSet_t *)
   3098 					malloc(sizeof(RF_ConfigSet_t),
   3099 					       M_RAIDFRAME, M_NOWAIT);
   3100 				if (cset == NULL) {
   3101 					panic("rf_create_auto_sets: No memory!");
   3102 				}
   3103 				cset->ac = ac;
   3104 				ac->next = NULL;
   3105 				cset->next = config_sets;
   3106 				cset->rootable = 0;
   3107 				config_sets = cset;
   3108 			}
   3109 		}
   3110 		ac = ac_next;
   3111 	}
   3112 
   3113 
   3114 	return(config_sets);
   3115 }
   3116 
   3117 static int
   3118 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
   3119 {
   3120 	RF_ComponentLabel_t *clabel1, *clabel2;
   3121 
   3122 	/* If this one matches the *first* one in the set, that's good
   3123 	   enough, since the other members of the set would have been
   3124 	   through here too... */
   3125 	/* note that we are not checking partitionSize here..
   3126 
   3127 	   Note that we are also not checking the mod_counters here.
   3128 	   If everything else matches execpt the mod_counter, that's
   3129 	   good enough for this test.  We will deal with the mod_counters
   3130 	   a little later in the autoconfiguration process.
   3131 
   3132 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   3133 
   3134 	   The reason we don't check for this is that failed disks
   3135 	   will have lower modification counts.  If those disks are
   3136 	   not added to the set they used to belong to, then they will
   3137 	   form their own set, which may result in 2 different sets,
   3138 	   for example, competing to be configured at raid0, and
   3139 	   perhaps competing to be the root filesystem set.  If the
   3140 	   wrong ones get configured, or both attempt to become /,
   3141 	   weird behaviour and or serious lossage will occur.  Thus we
   3142 	   need to bring them into the fold here, and kick them out at
   3143 	   a later point.
   3144 
   3145 	*/
   3146 
   3147 	clabel1 = cset->ac->clabel;
   3148 	clabel2 = ac->clabel;
   3149 	if ((clabel1->version == clabel2->version) &&
   3150 	    (clabel1->serial_number == clabel2->serial_number) &&
   3151 	    (clabel1->num_rows == clabel2->num_rows) &&
   3152 	    (clabel1->num_columns == clabel2->num_columns) &&
   3153 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   3154 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   3155 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   3156 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   3157 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   3158 	    (clabel1->blockSize == clabel2->blockSize) &&
   3159 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   3160 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   3161 	    (clabel1->root_partition == clabel2->root_partition) &&
   3162 	    (clabel1->last_unit == clabel2->last_unit) &&
   3163 	    (clabel1->config_order == clabel2->config_order)) {
   3164 		/* if it get's here, it almost *has* to be a match */
   3165 	} else {
   3166 		/* it's not consistent with somebody in the set..
   3167 		   punt */
   3168 		return(0);
   3169 	}
   3170 	/* all was fine.. it must fit... */
   3171 	return(1);
   3172 }
   3173 
   3174 int
   3175 rf_have_enough_components(RF_ConfigSet_t *cset)
   3176 {
   3177 	RF_AutoConfig_t *ac;
   3178 	RF_AutoConfig_t *auto_config;
   3179 	RF_ComponentLabel_t *clabel;
   3180 	int c;
   3181 	int num_cols;
   3182 	int num_missing;
   3183 	int mod_counter;
   3184 	int mod_counter_found;
   3185 	int even_pair_failed;
   3186 	char parity_type;
   3187 
   3188 
   3189 	/* check to see that we have enough 'live' components
   3190 	   of this set.  If so, we can configure it if necessary */
   3191 
   3192 	num_cols = cset->ac->clabel->num_columns;
   3193 	parity_type = cset->ac->clabel->parityConfig;
   3194 
   3195 	/* XXX Check for duplicate components!?!?!? */
   3196 
   3197 	/* Determine what the mod_counter is supposed to be for this set. */
   3198 
   3199 	mod_counter_found = 0;
   3200 	mod_counter = 0;
   3201 	ac = cset->ac;
   3202 	while(ac!=NULL) {
   3203 		if (mod_counter_found==0) {
   3204 			mod_counter = ac->clabel->mod_counter;
   3205 			mod_counter_found = 1;
   3206 		} else {
   3207 			if (ac->clabel->mod_counter > mod_counter) {
   3208 				mod_counter = ac->clabel->mod_counter;
   3209 			}
   3210 		}
   3211 		ac = ac->next;
   3212 	}
   3213 
   3214 	num_missing = 0;
   3215 	auto_config = cset->ac;
   3216 
   3217 	even_pair_failed = 0;
   3218 	for(c=0; c<num_cols; c++) {
   3219 		ac = auto_config;
   3220 		while(ac!=NULL) {
   3221 			if ((ac->clabel->column == c) &&
   3222 			    (ac->clabel->mod_counter == mod_counter)) {
   3223 				/* it's this one... */
   3224 #ifdef DEBUG
   3225 				printf("Found: %s at %d\n",
   3226 				       ac->devname,c);
   3227 #endif
   3228 				break;
   3229 			}
   3230 			ac=ac->next;
   3231 		}
   3232 		if (ac==NULL) {
   3233 				/* Didn't find one here! */
   3234 				/* special case for RAID 1, especially
   3235 				   where there are more than 2
   3236 				   components (where RAIDframe treats
   3237 				   things a little differently :( ) */
   3238 			if (parity_type == '1') {
   3239 				if (c%2 == 0) { /* even component */
   3240 					even_pair_failed = 1;
   3241 				} else { /* odd component.  If
   3242 					    we're failed, and
   3243 					    so is the even
   3244 					    component, it's
   3245 					    "Good Night, Charlie" */
   3246 					if (even_pair_failed == 1) {
   3247 						return(0);
   3248 					}
   3249 				}
   3250 			} else {
   3251 				/* normal accounting */
   3252 				num_missing++;
   3253 			}
   3254 		}
   3255 		if ((parity_type == '1') && (c%2 == 1)) {
   3256 				/* Just did an even component, and we didn't
   3257 				   bail.. reset the even_pair_failed flag,
   3258 				   and go on to the next component.... */
   3259 			even_pair_failed = 0;
   3260 		}
   3261 	}
   3262 
   3263 	clabel = cset->ac->clabel;
   3264 
   3265 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3266 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3267 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3268 		/* XXX this needs to be made *much* more general */
   3269 		/* Too many failures */
   3270 		return(0);
   3271 	}
   3272 	/* otherwise, all is well, and we've got enough to take a kick
   3273 	   at autoconfiguring this set */
   3274 	return(1);
   3275 }
   3276 
   3277 void
   3278 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
   3279 			RF_Raid_t *raidPtr)
   3280 {
   3281 	RF_ComponentLabel_t *clabel;
   3282 	int i;
   3283 
   3284 	clabel = ac->clabel;
   3285 
   3286 	/* 1. Fill in the common stuff */
   3287 	config->numRow = clabel->num_rows = 1;
   3288 	config->numCol = clabel->num_columns;
   3289 	config->numSpare = 0; /* XXX should this be set here? */
   3290 	config->sectPerSU = clabel->sectPerSU;
   3291 	config->SUsPerPU = clabel->SUsPerPU;
   3292 	config->SUsPerRU = clabel->SUsPerRU;
   3293 	config->parityConfig = clabel->parityConfig;
   3294 	/* XXX... */
   3295 	strcpy(config->diskQueueType,"fifo");
   3296 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3297 	config->layoutSpecificSize = 0; /* XXX ?? */
   3298 
   3299 	while(ac!=NULL) {
   3300 		/* row/col values will be in range due to the checks
   3301 		   in reasonable_label() */
   3302 		strcpy(config->devnames[0][ac->clabel->column],
   3303 		       ac->devname);
   3304 		ac = ac->next;
   3305 	}
   3306 
   3307 	for(i=0;i<RF_MAXDBGV;i++) {
   3308 		config->debugVars[i][0] = 0;
   3309 	}
   3310 }
   3311 
   3312 int
   3313 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
   3314 {
   3315 	RF_ComponentLabel_t clabel;
   3316 	struct vnode *vp;
   3317 	dev_t dev;
   3318 	int column;
   3319 	int sparecol;
   3320 
   3321 	raidPtr->autoconfigure = new_value;
   3322 
   3323 	for(column=0; column<raidPtr->numCol; column++) {
   3324 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3325 			dev = raidPtr->Disks[column].dev;
   3326 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3327 			raidread_component_label(dev, vp, &clabel);
   3328 			clabel.autoconfigure = new_value;
   3329 			raidwrite_component_label(dev, vp, &clabel);
   3330 		}
   3331 	}
   3332 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3333 		sparecol = raidPtr->numCol + column;
   3334 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3335 			dev = raidPtr->Disks[sparecol].dev;
   3336 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3337 			raidread_component_label(dev, vp, &clabel);
   3338 			clabel.autoconfigure = new_value;
   3339 			raidwrite_component_label(dev, vp, &clabel);
   3340 		}
   3341 	}
   3342 	return(new_value);
   3343 }
   3344 
   3345 int
   3346 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
   3347 {
   3348 	RF_ComponentLabel_t clabel;
   3349 	struct vnode *vp;
   3350 	dev_t dev;
   3351 	int column;
   3352 	int sparecol;
   3353 
   3354 	raidPtr->root_partition = new_value;
   3355 	for(column=0; column<raidPtr->numCol; column++) {
   3356 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3357 			dev = raidPtr->Disks[column].dev;
   3358 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3359 			raidread_component_label(dev, vp, &clabel);
   3360 			clabel.root_partition = new_value;
   3361 			raidwrite_component_label(dev, vp, &clabel);
   3362 		}
   3363 	}
   3364 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3365 		sparecol = raidPtr->numCol + column;
   3366 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3367 			dev = raidPtr->Disks[sparecol].dev;
   3368 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3369 			raidread_component_label(dev, vp, &clabel);
   3370 			clabel.root_partition = new_value;
   3371 			raidwrite_component_label(dev, vp, &clabel);
   3372 		}
   3373 	}
   3374 	return(new_value);
   3375 }
   3376 
   3377 void
   3378 rf_release_all_vps(RF_ConfigSet_t *cset)
   3379 {
   3380 	RF_AutoConfig_t *ac;
   3381 
   3382 	ac = cset->ac;
   3383 	while(ac!=NULL) {
   3384 		/* Close the vp, and give it back */
   3385 		if (ac->vp) {
   3386 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3387 			VOP_CLOSE(ac->vp, FREAD, NOCRED);
   3388 			vput(ac->vp);
   3389 			ac->vp = NULL;
   3390 		}
   3391 		ac = ac->next;
   3392 	}
   3393 }
   3394 
   3395 
   3396 void
   3397 rf_cleanup_config_set(RF_ConfigSet_t *cset)
   3398 {
   3399 	RF_AutoConfig_t *ac;
   3400 	RF_AutoConfig_t *next_ac;
   3401 
   3402 	ac = cset->ac;
   3403 	while(ac!=NULL) {
   3404 		next_ac = ac->next;
   3405 		/* nuke the label */
   3406 		free(ac->clabel, M_RAIDFRAME);
   3407 		/* cleanup the config structure */
   3408 		free(ac, M_RAIDFRAME);
   3409 		/* "next.." */
   3410 		ac = next_ac;
   3411 	}
   3412 	/* and, finally, nuke the config set */
   3413 	free(cset, M_RAIDFRAME);
   3414 }
   3415 
   3416 
   3417 void
   3418 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   3419 {
   3420 	/* current version number */
   3421 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3422 	clabel->serial_number = raidPtr->serial_number;
   3423 	clabel->mod_counter = raidPtr->mod_counter;
   3424 	clabel->num_rows = 1;
   3425 	clabel->num_columns = raidPtr->numCol;
   3426 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3427 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3428 
   3429 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3430 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3431 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3432 
   3433 	clabel->blockSize = raidPtr->bytesPerSector;
   3434 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3435 
   3436 	/* XXX not portable */
   3437 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3438 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3439 	clabel->autoconfigure = raidPtr->autoconfigure;
   3440 	clabel->root_partition = raidPtr->root_partition;
   3441 	clabel->last_unit = raidPtr->raidid;
   3442 	clabel->config_order = raidPtr->config_order;
   3443 }
   3444 
   3445 int
   3446 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
   3447 {
   3448 	RF_Raid_t *raidPtr;
   3449 	RF_Config_t *config;
   3450 	int raidID;
   3451 	int retcode;
   3452 
   3453 #ifdef DEBUG
   3454 	printf("RAID autoconfigure\n");
   3455 #endif
   3456 
   3457 	retcode = 0;
   3458 	*unit = -1;
   3459 
   3460 	/* 1. Create a config structure */
   3461 
   3462 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3463 				       M_RAIDFRAME,
   3464 				       M_NOWAIT);
   3465 	if (config==NULL) {
   3466 		printf("Out of mem!?!?\n");
   3467 				/* XXX do something more intelligent here. */
   3468 		return(1);
   3469 	}
   3470 
   3471 	memset(config, 0, sizeof(RF_Config_t));
   3472 
   3473 	/*
   3474 	   2. Figure out what RAID ID this one is supposed to live at
   3475 	   See if we can get the same RAID dev that it was configured
   3476 	   on last time..
   3477 	*/
   3478 
   3479 	raidID = cset->ac->clabel->last_unit;
   3480 	if ((raidID < 0) || (raidID >= numraid)) {
   3481 		/* let's not wander off into lala land. */
   3482 		raidID = numraid - 1;
   3483 	}
   3484 	if (raidPtrs[raidID]->valid != 0) {
   3485 
   3486 		/*
   3487 		   Nope... Go looking for an alternative...
   3488 		   Start high so we don't immediately use raid0 if that's
   3489 		   not taken.
   3490 		*/
   3491 
   3492 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3493 			if (raidPtrs[raidID]->valid == 0) {
   3494 				/* can use this one! */
   3495 				break;
   3496 			}
   3497 		}
   3498 	}
   3499 
   3500 	if (raidID < 0) {
   3501 		/* punt... */
   3502 		printf("Unable to auto configure this set!\n");
   3503 		printf("(Out of RAID devs!)\n");
   3504 		free(config, M_RAIDFRAME);
   3505 		return(1);
   3506 	}
   3507 
   3508 #ifdef DEBUG
   3509 	printf("Configuring raid%d:\n",raidID);
   3510 #endif
   3511 
   3512 	raidPtr = raidPtrs[raidID];
   3513 
   3514 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3515 	raidPtr->raidid = raidID;
   3516 	raidPtr->openings = RAIDOUTSTANDING;
   3517 
   3518 	/* 3. Build the configuration structure */
   3519 	rf_create_configuration(cset->ac, config, raidPtr);
   3520 
   3521 	/* 4. Do the configuration */
   3522 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3523 
   3524 	if (retcode == 0) {
   3525 
   3526 		raidinit(raidPtrs[raidID]);
   3527 
   3528 		rf_markalldirty(raidPtrs[raidID]);
   3529 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3530 		if (cset->ac->clabel->root_partition==1) {
   3531 			/* everything configured just fine.  Make a note
   3532 			   that this set is eligible to be root. */
   3533 			cset->rootable = 1;
   3534 			/* XXX do this here? */
   3535 			raidPtrs[raidID]->root_partition = 1;
   3536 		}
   3537 	}
   3538 
   3539 	/* 5. Cleanup */
   3540 	free(config, M_RAIDFRAME);
   3541 
   3542 	*unit = raidID;
   3543 	return(retcode);
   3544 }
   3545 
   3546 void
   3547 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
   3548 {
   3549 	struct buf *bp;
   3550 
   3551 	bp = (struct buf *)desc->bp;
   3552 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3553 	    (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
   3554 }
   3555 
   3556 void
   3557 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
   3558 	     size_t xmin, size_t xmax)
   3559 {
   3560 	pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
   3561 	pool_sethiwat(p, xmax);
   3562 	pool_prime(p, xmin);
   3563 	pool_setlowat(p, xmin);
   3564 }
   3565 
   3566 /*
   3567  * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
   3568  * if there is IO pending and if that IO could possibly be done for a
   3569  * given RAID set.  Returns 0 if IO is waiting and can be done, 1
   3570  * otherwise.
   3571  *
   3572  */
   3573 
   3574 int
   3575 rf_buf_queue_check(int raidid)
   3576 {
   3577 	if ((bufq_peek(raid_softc[raidid].buf_queue) != NULL) &&
   3578 	    raidPtrs[raidid]->openings > 0) {
   3579 		/* there is work to do */
   3580 		return 0;
   3581 	}
   3582 	/* default is nothing to do */
   3583 	return 1;
   3584 }
   3585 
   3586 int
   3587 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
   3588 {
   3589 	struct partinfo dpart;
   3590 	struct dkwedge_info dkw;
   3591 	int error;
   3592 
   3593 	error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred);
   3594 	if (error == 0) {
   3595 		diskPtr->blockSize = dpart.disklab->d_secsize;
   3596 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
   3597 		diskPtr->partitionSize = dpart.part->p_size;
   3598 		return 0;
   3599 	}
   3600 
   3601 	error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred);
   3602 	if (error == 0) {
   3603 		diskPtr->blockSize = 512;	/* XXX */
   3604 		diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
   3605 		diskPtr->partitionSize = dkw.dkw_size;
   3606 		return 0;
   3607 	}
   3608 	return error;
   3609 }
   3610 
   3611 static int
   3612 raid_match(struct device *self, struct cfdata *cfdata,
   3613     void *aux)
   3614 {
   3615 	return 1;
   3616 }
   3617 
   3618 static void
   3619 raid_attach(struct device *parent, struct device *self,
   3620     void *aux)
   3621 {
   3622 
   3623 }
   3624 
   3625 
   3626 static int
   3627 raid_detach(struct device *self, int flags)
   3628 {
   3629 	struct raid_softc *rs = (struct raid_softc *)self;
   3630 
   3631 	if (rs->sc_flags & RAIDF_INITED)
   3632 		return EBUSY;
   3633 
   3634 	return 0;
   3635 }
   3636 
   3637 static void
   3638 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
   3639 {
   3640 	prop_dictionary_t disk_info, odisk_info, geom;
   3641 	disk_info = prop_dictionary_create();
   3642 	geom = prop_dictionary_create();
   3643 	prop_dictionary_set_uint64(geom, "sectors-per-unit",
   3644 				   raidPtr->totalSectors);
   3645 	prop_dictionary_set_uint32(geom, "sector-size",
   3646 				   raidPtr->bytesPerSector);
   3647 
   3648 	prop_dictionary_set_uint16(geom, "sectors-per-track",
   3649 				   raidPtr->Layout.dataSectorsPerStripe);
   3650 	prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
   3651 				   4 * raidPtr->numCol);
   3652 
   3653 	prop_dictionary_set_uint64(geom, "cylinders-per-unit",
   3654 	   raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
   3655 	   (4 * raidPtr->numCol)));
   3656 
   3657 	prop_dictionary_set(disk_info, "geometry", geom);
   3658 	prop_object_release(geom);
   3659 	prop_dictionary_set(device_properties(rs->sc_dev),
   3660 			    "disk-info", disk_info);
   3661 	odisk_info = rs->sc_dkdev.dk_info;
   3662 	rs->sc_dkdev.dk_info = disk_info;
   3663 	if (odisk_info)
   3664 		prop_object_release(odisk_info);
   3665 }
   3666 
   3667 /*
   3668  * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
   3669  * We end up returning whatever error was returned by the first cache flush
   3670  * that fails.
   3671  */
   3672 
   3673 static int
   3674 rf_sync_component_caches(RF_Raid_t *raidPtr)
   3675 {
   3676 	int c, sparecol;
   3677 	int e,error;
   3678 	int force = 1;
   3679 
   3680 	error = 0;
   3681 	for (c = 0; c < raidPtr->numCol; c++) {
   3682 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   3683 			e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
   3684 					  &force, FWRITE, NOCRED);
   3685 			if (e) {
   3686 				printf("raid%d: cache flush to component %s failed.\n",
   3687 				       raidPtr->raidid, raidPtr->Disks[c].devname);
   3688 				if (error == 0) {
   3689 					error = e;
   3690 				}
   3691 			}
   3692 		}
   3693 	}
   3694 
   3695 	for( c = 0; c < raidPtr->numSpare ; c++) {
   3696 		sparecol = raidPtr->numCol + c;
   3697 		/* Need to ensure that the reconstruct actually completed! */
   3698 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3699 			e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
   3700 					  DIOCCACHESYNC, &force, FWRITE, NOCRED);
   3701 			if (e) {
   3702 				printf("raid%d: cache flush to component %s failed.\n",
   3703 				       raidPtr->raidid, raidPtr->Disks[sparecol].devname);
   3704 				if (error == 0) {
   3705 					error = e;
   3706 				}
   3707 			}
   3708 		}
   3709 	}
   3710 	return error;
   3711 }
   3712