Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.247
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.247 2008/06/07 17:50:34 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 /*
     32  * Copyright (c) 1990, 1993
     33  *      The Regents of the University of California.  All rights reserved.
     34  *
     35  * This code is derived from software contributed to Berkeley by
     36  * the Systems Programming Group of the University of Utah Computer
     37  * Science Department.
     38  *
     39  * Redistribution and use in source and binary forms, with or without
     40  * modification, are permitted provided that the following conditions
     41  * are met:
     42  * 1. Redistributions of source code must retain the above copyright
     43  *    notice, this list of conditions and the following disclaimer.
     44  * 2. Redistributions in binary form must reproduce the above copyright
     45  *    notice, this list of conditions and the following disclaimer in the
     46  *    documentation and/or other materials provided with the distribution.
     47  * 3. Neither the name of the University nor the names of its contributors
     48  *    may be used to endorse or promote products derived from this software
     49  *    without specific prior written permission.
     50  *
     51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     61  * SUCH DAMAGE.
     62  *
     63  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     64  *
     65  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     66  */
     67 
     68 /*
     69  * Copyright (c) 1988 University of Utah.
     70  *
     71  * This code is derived from software contributed to Berkeley by
     72  * the Systems Programming Group of the University of Utah Computer
     73  * Science Department.
     74  *
     75  * Redistribution and use in source and binary forms, with or without
     76  * modification, are permitted provided that the following conditions
     77  * are met:
     78  * 1. Redistributions of source code must retain the above copyright
     79  *    notice, this list of conditions and the following disclaimer.
     80  * 2. Redistributions in binary form must reproduce the above copyright
     81  *    notice, this list of conditions and the following disclaimer in the
     82  *    documentation and/or other materials provided with the distribution.
     83  * 3. All advertising materials mentioning features or use of this software
     84  *    must display the following acknowledgement:
     85  *      This product includes software developed by the University of
     86  *      California, Berkeley and its contributors.
     87  * 4. Neither the name of the University nor the names of its contributors
     88  *    may be used to endorse or promote products derived from this software
     89  *    without specific prior written permission.
     90  *
     91  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     92  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     93  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     94  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     95  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     96  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     97  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     98  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     99  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    100  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    101  * SUCH DAMAGE.
    102  *
    103  * from: Utah $Hdr: cd.c 1.6 90/11/28$
    104  *
    105  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
    106  */
    107 
    108 /*
    109  * Copyright (c) 1995 Carnegie-Mellon University.
    110  * All rights reserved.
    111  *
    112  * Authors: Mark Holland, Jim Zelenka
    113  *
    114  * Permission to use, copy, modify and distribute this software and
    115  * its documentation is hereby granted, provided that both the copyright
    116  * notice and this permission notice appear in all copies of the
    117  * software, derivative works or modified versions, and any portions
    118  * thereof, and that both notices appear in supporting documentation.
    119  *
    120  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
    121  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
    122  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
    123  *
    124  * Carnegie Mellon requests users of this software to return to
    125  *
    126  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    127  *  School of Computer Science
    128  *  Carnegie Mellon University
    129  *  Pittsburgh PA 15213-3890
    130  *
    131  * any improvements or extensions that they make and grant Carnegie the
    132  * rights to redistribute these changes.
    133  */
    134 
    135 /***********************************************************
    136  *
    137  * rf_kintf.c -- the kernel interface routines for RAIDframe
    138  *
    139  ***********************************************************/
    140 
    141 #include <sys/cdefs.h>
    142 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.247 2008/06/07 17:50:34 oster Exp $");
    143 
    144 #include <sys/param.h>
    145 #include <sys/errno.h>
    146 #include <sys/pool.h>
    147 #include <sys/proc.h>
    148 #include <sys/queue.h>
    149 #include <sys/disk.h>
    150 #include <sys/device.h>
    151 #include <sys/stat.h>
    152 #include <sys/ioctl.h>
    153 #include <sys/fcntl.h>
    154 #include <sys/systm.h>
    155 #include <sys/vnode.h>
    156 #include <sys/disklabel.h>
    157 #include <sys/conf.h>
    158 #include <sys/buf.h>
    159 #include <sys/bufq.h>
    160 #include <sys/user.h>
    161 #include <sys/reboot.h>
    162 #include <sys/kauth.h>
    163 
    164 #include <prop/proplib.h>
    165 
    166 #include <dev/raidframe/raidframevar.h>
    167 #include <dev/raidframe/raidframeio.h>
    168 #include "raid.h"
    169 #include "opt_raid_autoconfig.h"
    170 #include "rf_raid.h"
    171 #include "rf_copyback.h"
    172 #include "rf_dag.h"
    173 #include "rf_dagflags.h"
    174 #include "rf_desc.h"
    175 #include "rf_diskqueue.h"
    176 #include "rf_etimer.h"
    177 #include "rf_general.h"
    178 #include "rf_kintf.h"
    179 #include "rf_options.h"
    180 #include "rf_driver.h"
    181 #include "rf_parityscan.h"
    182 #include "rf_threadstuff.h"
    183 
    184 #ifdef DEBUG
    185 int     rf_kdebug_level = 0;
    186 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    187 #else				/* DEBUG */
    188 #define db1_printf(a) { }
    189 #endif				/* DEBUG */
    190 
    191 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    192 
    193 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    194 
    195 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    196 						 * spare table */
    197 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    198 						 * installation process */
    199 
    200 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
    201 
    202 /* prototypes */
    203 static void KernelWakeupFunc(struct buf *);
    204 static void InitBP(struct buf *, struct vnode *, unsigned,
    205     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
    206     void *, int, struct proc *);
    207 static void raidinit(RF_Raid_t *);
    208 
    209 void raidattach(int);
    210 static int raid_match(struct device *, struct cfdata *, void *);
    211 static void raid_attach(struct device *, struct device *, void *);
    212 static int raid_detach(struct device *, int);
    213 
    214 dev_type_open(raidopen);
    215 dev_type_close(raidclose);
    216 dev_type_read(raidread);
    217 dev_type_write(raidwrite);
    218 dev_type_ioctl(raidioctl);
    219 dev_type_strategy(raidstrategy);
    220 dev_type_dump(raiddump);
    221 dev_type_size(raidsize);
    222 
    223 const struct bdevsw raid_bdevsw = {
    224 	raidopen, raidclose, raidstrategy, raidioctl,
    225 	raiddump, raidsize, D_DISK
    226 };
    227 
    228 const struct cdevsw raid_cdevsw = {
    229 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    230 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
    231 };
    232 
    233 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
    234 
    235 /* XXX Not sure if the following should be replacing the raidPtrs above,
    236    or if it should be used in conjunction with that...
    237 */
    238 
    239 struct raid_softc {
    240 	struct device *sc_dev;
    241 	int     sc_flags;	/* flags */
    242 	int     sc_cflags;	/* configuration flags */
    243 	uint64_t sc_size;	/* size of the raid device */
    244 	char    sc_xname[20];	/* XXX external name */
    245 	struct disk sc_dkdev;	/* generic disk device info */
    246 	struct bufq_state *buf_queue;	/* used for the device queue */
    247 };
    248 /* sc_flags */
    249 #define RAIDF_INITED	0x01	/* unit has been initialized */
    250 #define RAIDF_WLABEL	0x02	/* label area is writable */
    251 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    252 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    253 #define RAIDF_LOCKED	0x80	/* unit is locked */
    254 
    255 #define	raidunit(x)	DISKUNIT(x)
    256 int numraid = 0;
    257 
    258 extern struct cfdriver raid_cd;
    259 CFATTACH_DECL_NEW(raid, sizeof(struct raid_softc),
    260     raid_match, raid_attach, raid_detach, NULL);
    261 
    262 /*
    263  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    264  * Be aware that large numbers can allow the driver to consume a lot of
    265  * kernel memory, especially on writes, and in degraded mode reads.
    266  *
    267  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    268  * a single 64K write will typically require 64K for the old data,
    269  * 64K for the old parity, and 64K for the new parity, for a total
    270  * of 192K (if the parity buffer is not re-used immediately).
    271  * Even it if is used immediately, that's still 128K, which when multiplied
    272  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    273  *
    274  * Now in degraded mode, for example, a 64K read on the above setup may
    275  * require data reconstruction, which will require *all* of the 4 remaining
    276  * disks to participate -- 4 * 32K/disk == 128K again.
    277  */
    278 
    279 #ifndef RAIDOUTSTANDING
    280 #define RAIDOUTSTANDING   6
    281 #endif
    282 
    283 #define RAIDLABELDEV(dev)	\
    284 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    285 
    286 /* declared here, and made public, for the benefit of KVM stuff.. */
    287 struct raid_softc *raid_softc;
    288 
    289 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    290 				     struct disklabel *);
    291 static void raidgetdisklabel(dev_t);
    292 static void raidmakedisklabel(struct raid_softc *);
    293 
    294 static int raidlock(struct raid_softc *);
    295 static void raidunlock(struct raid_softc *);
    296 
    297 static void rf_markalldirty(RF_Raid_t *);
    298 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
    299 
    300 void rf_ReconThread(struct rf_recon_req *);
    301 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    302 void rf_CopybackThread(RF_Raid_t *raidPtr);
    303 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    304 int rf_autoconfig(struct device *self);
    305 void rf_buildroothack(RF_ConfigSet_t *);
    306 
    307 RF_AutoConfig_t *rf_find_raid_components(void);
    308 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    309 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    310 static int rf_reasonable_label(RF_ComponentLabel_t *);
    311 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    312 int rf_set_autoconfig(RF_Raid_t *, int);
    313 int rf_set_rootpartition(RF_Raid_t *, int);
    314 void rf_release_all_vps(RF_ConfigSet_t *);
    315 void rf_cleanup_config_set(RF_ConfigSet_t *);
    316 int rf_have_enough_components(RF_ConfigSet_t *);
    317 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    318 
    319 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    320 				  allow autoconfig to take place.
    321 				  Note that this is overridden by having
    322 				  RAID_AUTOCONFIG as an option in the
    323 				  kernel config file.  */
    324 
    325 struct RF_Pools_s rf_pools;
    326 
    327 void
    328 raidattach(int num)
    329 {
    330 	int raidID;
    331 	int i, rc;
    332 
    333 #ifdef DEBUG
    334 	printf("raidattach: Asked for %d units\n", num);
    335 #endif
    336 
    337 	if (num <= 0) {
    338 #ifdef DIAGNOSTIC
    339 		panic("raidattach: count <= 0");
    340 #endif
    341 		return;
    342 	}
    343 	/* This is where all the initialization stuff gets done. */
    344 
    345 	numraid = num;
    346 
    347 	/* Make some space for requested number of units... */
    348 
    349 	RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
    350 	if (raidPtrs == NULL) {
    351 		panic("raidPtrs is NULL!!");
    352 	}
    353 
    354 	rf_mutex_init(&rf_sparet_wait_mutex);
    355 
    356 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    357 
    358 	for (i = 0; i < num; i++)
    359 		raidPtrs[i] = NULL;
    360 	rc = rf_BootRaidframe();
    361 	if (rc == 0)
    362 		aprint_normal("Kernelized RAIDframe activated\n");
    363 	else
    364 		panic("Serious error booting RAID!!");
    365 
    366 	/* put together some datastructures like the CCD device does.. This
    367 	 * lets us lock the device and what-not when it gets opened. */
    368 
    369 	raid_softc = (struct raid_softc *)
    370 		malloc(num * sizeof(struct raid_softc),
    371 		       M_RAIDFRAME, M_NOWAIT);
    372 	if (raid_softc == NULL) {
    373 		aprint_error("WARNING: no memory for RAIDframe driver\n");
    374 		return;
    375 	}
    376 
    377 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    378 
    379 	for (raidID = 0; raidID < num; raidID++) {
    380 		bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
    381 
    382 		RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
    383 			  (RF_Raid_t *));
    384 		if (raidPtrs[raidID] == NULL) {
    385 			aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID);
    386 			numraid = raidID;
    387 			return;
    388 		}
    389 	}
    390 
    391 	if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
    392 		aprint_error("raidattach: config_cfattach_attach failed?\n");
    393 	}
    394 
    395 #ifdef RAID_AUTOCONFIG
    396 	raidautoconfig = 1;
    397 #endif
    398 
    399 	/*
    400 	 * Register a finalizer which will be used to auto-config RAID
    401 	 * sets once all real hardware devices have been found.
    402 	 */
    403 	if (config_finalize_register(NULL, rf_autoconfig) != 0)
    404 		aprint_error("WARNING: unable to register RAIDframe finalizer\n");
    405 }
    406 
    407 int
    408 rf_autoconfig(struct device *self)
    409 {
    410 	RF_AutoConfig_t *ac_list;
    411 	RF_ConfigSet_t *config_sets;
    412 
    413 	if (raidautoconfig == 0)
    414 		return (0);
    415 
    416 	/* XXX This code can only be run once. */
    417 	raidautoconfig = 0;
    418 
    419 	/* 1. locate all RAID components on the system */
    420 #ifdef DEBUG
    421 	printf("Searching for RAID components...\n");
    422 #endif
    423 	ac_list = rf_find_raid_components();
    424 
    425 	/* 2. Sort them into their respective sets. */
    426 	config_sets = rf_create_auto_sets(ac_list);
    427 
    428 	/*
    429 	 * 3. Evaluate each set andconfigure the valid ones.
    430 	 * This gets done in rf_buildroothack().
    431 	 */
    432 	rf_buildroothack(config_sets);
    433 
    434 	return 1;
    435 }
    436 
    437 void
    438 rf_buildroothack(RF_ConfigSet_t *config_sets)
    439 {
    440 	RF_ConfigSet_t *cset;
    441 	RF_ConfigSet_t *next_cset;
    442 	int retcode;
    443 	int raidID;
    444 	int rootID;
    445 	int col;
    446 	int num_root;
    447 	char *devname;
    448 
    449 	rootID = 0;
    450 	num_root = 0;
    451 	cset = config_sets;
    452 	while(cset != NULL ) {
    453 		next_cset = cset->next;
    454 		if (rf_have_enough_components(cset) &&
    455 		    cset->ac->clabel->autoconfigure==1) {
    456 			retcode = rf_auto_config_set(cset,&raidID);
    457 			if (!retcode) {
    458 #ifdef DEBUG
    459 				printf("raid%d: configured ok\n", raidID);
    460 #endif
    461 				if (cset->rootable) {
    462 					rootID = raidID;
    463 					num_root++;
    464 				}
    465 			} else {
    466 				/* The autoconfig didn't work :( */
    467 #ifdef DEBUG
    468 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    469 #endif
    470 				rf_release_all_vps(cset);
    471 			}
    472 		} else {
    473 			/* we're not autoconfiguring this set...
    474 			   release the associated resources */
    475 			rf_release_all_vps(cset);
    476 		}
    477 		/* cleanup */
    478 		rf_cleanup_config_set(cset);
    479 		cset = next_cset;
    480 	}
    481 
    482 	/* if the user has specified what the root device should be
    483 	   then we don't touch booted_device or boothowto... */
    484 
    485 	if (rootspec != NULL)
    486 		return;
    487 
    488 	/* we found something bootable... */
    489 
    490 	if (num_root == 1) {
    491 		booted_device = raid_softc[rootID].sc_dev;
    492 	} else if (num_root > 1) {
    493 
    494 		/*
    495 		 * Maybe the MD code can help. If it cannot, then
    496 		 * setroot() will discover that we have no
    497 		 * booted_device and will ask the user if nothing was
    498 		 * hardwired in the kernel config file
    499 		 */
    500 
    501 		if (booted_device == NULL)
    502 			cpu_rootconf();
    503 		if (booted_device == NULL)
    504 			return;
    505 
    506 		num_root = 0;
    507 		for (raidID = 0; raidID < numraid; raidID++) {
    508 			if (raidPtrs[raidID]->valid == 0)
    509 				continue;
    510 
    511 			if (raidPtrs[raidID]->root_partition == 0)
    512 				continue;
    513 
    514 			for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
    515 				devname = raidPtrs[raidID]->Disks[col].devname;
    516 				devname += sizeof("/dev/") - 1;
    517 				if (strncmp(devname, device_xname(booted_device),
    518 					    strlen(device_xname(booted_device))) != 0)
    519 					continue;
    520 #ifdef DEBUG
    521 				printf("raid%d includes boot device %s\n",
    522 				       raidID, devname);
    523 #endif
    524 				num_root++;
    525 				rootID = raidID;
    526 			}
    527 		}
    528 
    529 		if (num_root == 1) {
    530 			booted_device = raid_softc[rootID].sc_dev;
    531 		} else {
    532 			/* we can't guess.. require the user to answer... */
    533 			boothowto |= RB_ASKNAME;
    534 		}
    535 	}
    536 }
    537 
    538 
    539 int
    540 raidsize(dev_t dev)
    541 {
    542 	struct raid_softc *rs;
    543 	struct disklabel *lp;
    544 	int     part, unit, omask, size;
    545 
    546 	unit = raidunit(dev);
    547 	if (unit >= numraid)
    548 		return (-1);
    549 	rs = &raid_softc[unit];
    550 
    551 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    552 		return (-1);
    553 
    554 	part = DISKPART(dev);
    555 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    556 	lp = rs->sc_dkdev.dk_label;
    557 
    558 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
    559 		return (-1);
    560 
    561 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    562 		size = -1;
    563 	else
    564 		size = lp->d_partitions[part].p_size *
    565 		    (lp->d_secsize / DEV_BSIZE);
    566 
    567 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
    568 		return (-1);
    569 
    570 	return (size);
    571 
    572 }
    573 
    574 int
    575 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
    576 {
    577 	int     unit = raidunit(dev);
    578 	struct raid_softc *rs;
    579 	const struct bdevsw *bdev;
    580 	struct disklabel *lp;
    581 	RF_Raid_t *raidPtr;
    582 	daddr_t offset;
    583 	int     part, c, sparecol, j, scol, dumpto;
    584 	int     error = 0;
    585 
    586 	if (unit >= numraid)
    587 		return (ENXIO);
    588 
    589 	rs = &raid_softc[unit];
    590 	raidPtr = raidPtrs[unit];
    591 
    592 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    593 		return ENXIO;
    594 
    595 	/* we only support dumping to RAID 1 sets */
    596 	if (raidPtr->Layout.numDataCol != 1 ||
    597 	    raidPtr->Layout.numParityCol != 1)
    598 		return EINVAL;
    599 
    600 
    601 	if ((error = raidlock(rs)) != 0)
    602 		return error;
    603 
    604 	if (size % DEV_BSIZE != 0) {
    605 		error = EINVAL;
    606 		goto out;
    607 	}
    608 
    609 	if (blkno + size / DEV_BSIZE > rs->sc_size) {
    610 		printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
    611 		    "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
    612 		    size / DEV_BSIZE, rs->sc_size);
    613 		error = EINVAL;
    614 		goto out;
    615 	}
    616 
    617 	part = DISKPART(dev);
    618 	lp = rs->sc_dkdev.dk_label;
    619 	offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
    620 
    621 	/* figure out what device is alive.. */
    622 
    623 	/*
    624 	   Look for a component to dump to.  The preference for the
    625 	   component to dump to is as follows:
    626 	   1) the master
    627 	   2) a used_spare of the master
    628 	   3) the slave
    629 	   4) a used_spare of the slave
    630 	*/
    631 
    632 	dumpto = -1;
    633 	for (c = 0; c < raidPtr->numCol; c++) {
    634 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
    635 			/* this might be the one */
    636 			dumpto = c;
    637 			break;
    638 		}
    639 	}
    640 
    641 	/*
    642 	   At this point we have possibly selected a live master or a
    643 	   live slave.  We now check to see if there is a spared
    644 	   master (or a spared slave), if we didn't find a live master
    645 	   or a live slave.
    646 	*/
    647 
    648 	for (c = 0; c < raidPtr->numSpare; c++) {
    649 		sparecol = raidPtr->numCol + c;
    650 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
    651 			/* How about this one? */
    652 			scol = -1;
    653 			for(j=0;j<raidPtr->numCol;j++) {
    654 				if (raidPtr->Disks[j].spareCol == sparecol) {
    655 					scol = j;
    656 					break;
    657 				}
    658 			}
    659 			if (scol == 0) {
    660 				/*
    661 				   We must have found a spared master!
    662 				   We'll take that over anything else
    663 				   found so far.  (We couldn't have
    664 				   found a real master before, since
    665 				   this is a used spare, and it's
    666 				   saying that it's replacing the
    667 				   master.)  On reboot (with
    668 				   autoconfiguration turned on)
    669 				   sparecol will become the 1st
    670 				   component (component0) of this set.
    671 				*/
    672 				dumpto = sparecol;
    673 				break;
    674 			} else if (scol != -1) {
    675 				/*
    676 				   Must be a spared slave.  We'll dump
    677 				   to that if we havn't found anything
    678 				   else so far.
    679 				*/
    680 				if (dumpto == -1)
    681 					dumpto = sparecol;
    682 			}
    683 		}
    684 	}
    685 
    686 	if (dumpto == -1) {
    687 		/* we couldn't find any live components to dump to!?!?
    688 		 */
    689 		error = EINVAL;
    690 		goto out;
    691 	}
    692 
    693 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
    694 
    695 	/*
    696 	   Note that blkno is relative to this particular partition.
    697 	   By adding the offset of this partition in the RAID
    698 	   set, and also adding RF_PROTECTED_SECTORS, we get a
    699 	   value that is relative to the partition used for the
    700 	   underlying component.
    701 	*/
    702 
    703 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
    704 				blkno + offset, va, size);
    705 
    706 out:
    707 	raidunlock(rs);
    708 
    709 	return error;
    710 }
    711 /* ARGSUSED */
    712 int
    713 raidopen(dev_t dev, int flags, int fmt,
    714     struct lwp *l)
    715 {
    716 	int     unit = raidunit(dev);
    717 	struct raid_softc *rs;
    718 	struct disklabel *lp;
    719 	int     part, pmask;
    720 	int     error = 0;
    721 
    722 	if (unit >= numraid)
    723 		return (ENXIO);
    724 	rs = &raid_softc[unit];
    725 
    726 	if ((error = raidlock(rs)) != 0)
    727 		return (error);
    728 	lp = rs->sc_dkdev.dk_label;
    729 
    730 	part = DISKPART(dev);
    731 
    732 	/*
    733 	 * If there are wedges, and this is not RAW_PART, then we
    734 	 * need to fail.
    735 	 */
    736 	if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
    737 		error = EBUSY;
    738 		goto bad;
    739 	}
    740 	pmask = (1 << part);
    741 
    742 	if ((rs->sc_flags & RAIDF_INITED) &&
    743 	    (rs->sc_dkdev.dk_openmask == 0))
    744 		raidgetdisklabel(dev);
    745 
    746 	/* make sure that this partition exists */
    747 
    748 	if (part != RAW_PART) {
    749 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    750 		    ((part >= lp->d_npartitions) ||
    751 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    752 			error = ENXIO;
    753 			goto bad;
    754 		}
    755 	}
    756 	/* Prevent this unit from being unconfigured while open. */
    757 	switch (fmt) {
    758 	case S_IFCHR:
    759 		rs->sc_dkdev.dk_copenmask |= pmask;
    760 		break;
    761 
    762 	case S_IFBLK:
    763 		rs->sc_dkdev.dk_bopenmask |= pmask;
    764 		break;
    765 	}
    766 
    767 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    768 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    769 		/* First one... mark things as dirty... Note that we *MUST*
    770 		 have done a configure before this.  I DO NOT WANT TO BE
    771 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    772 		 THAT THEY BELONG TOGETHER!!!!! */
    773 		/* XXX should check to see if we're only open for reading
    774 		   here... If so, we needn't do this, but then need some
    775 		   other way of keeping track of what's happened.. */
    776 
    777 		rf_markalldirty( raidPtrs[unit] );
    778 	}
    779 
    780 
    781 	rs->sc_dkdev.dk_openmask =
    782 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    783 
    784 bad:
    785 	raidunlock(rs);
    786 
    787 	return (error);
    788 
    789 
    790 }
    791 /* ARGSUSED */
    792 int
    793 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
    794 {
    795 	int     unit = raidunit(dev);
    796 	struct cfdata *cf;
    797 	struct raid_softc *rs;
    798 	int     error = 0;
    799 	int     part;
    800 
    801 	if (unit >= numraid)
    802 		return (ENXIO);
    803 	rs = &raid_softc[unit];
    804 
    805 	if ((error = raidlock(rs)) != 0)
    806 		return (error);
    807 
    808 	part = DISKPART(dev);
    809 
    810 	/* ...that much closer to allowing unconfiguration... */
    811 	switch (fmt) {
    812 	case S_IFCHR:
    813 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    814 		break;
    815 
    816 	case S_IFBLK:
    817 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    818 		break;
    819 	}
    820 	rs->sc_dkdev.dk_openmask =
    821 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    822 
    823 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    824 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    825 		/* Last one... device is not unconfigured yet.
    826 		   Device shutdown has taken care of setting the
    827 		   clean bits if RAIDF_INITED is not set
    828 		   mark things as clean... */
    829 
    830 		rf_update_component_labels(raidPtrs[unit],
    831 						 RF_FINAL_COMPONENT_UPDATE);
    832 		if (doing_shutdown) {
    833 			/* last one, and we're going down, so
    834 			   lights out for this RAID set too. */
    835 			error = rf_Shutdown(raidPtrs[unit]);
    836 
    837 			/* It's no longer initialized... */
    838 			rs->sc_flags &= ~RAIDF_INITED;
    839 
    840 			/* detach the device */
    841 
    842 			cf = device_cfdata(rs->sc_dev);
    843 			error = config_detach(rs->sc_dev, DETACH_QUIET);
    844 			free(cf, M_RAIDFRAME);
    845 
    846 			/* Detach the disk. */
    847 			disk_detach(&rs->sc_dkdev);
    848 			disk_destroy(&rs->sc_dkdev);
    849 		}
    850 	}
    851 
    852 	raidunlock(rs);
    853 	return (0);
    854 
    855 }
    856 
    857 void
    858 raidstrategy(struct buf *bp)
    859 {
    860 	int s;
    861 
    862 	unsigned int raidID = raidunit(bp->b_dev);
    863 	RF_Raid_t *raidPtr;
    864 	struct raid_softc *rs = &raid_softc[raidID];
    865 	int     wlabel;
    866 
    867 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    868 		bp->b_error = ENXIO;
    869 		goto done;
    870 	}
    871 	if (raidID >= numraid || !raidPtrs[raidID]) {
    872 		bp->b_error = ENODEV;
    873 		goto done;
    874 	}
    875 	raidPtr = raidPtrs[raidID];
    876 	if (!raidPtr->valid) {
    877 		bp->b_error = ENODEV;
    878 		goto done;
    879 	}
    880 	if (bp->b_bcount == 0) {
    881 		db1_printf(("b_bcount is zero..\n"));
    882 		goto done;
    883 	}
    884 
    885 	/*
    886 	 * Do bounds checking and adjust transfer.  If there's an
    887 	 * error, the bounds check will flag that for us.
    888 	 */
    889 
    890 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    891 	if (DISKPART(bp->b_dev) == RAW_PART) {
    892 		uint64_t size; /* device size in DEV_BSIZE unit */
    893 
    894 		if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
    895 			size = raidPtr->totalSectors <<
    896 			    (raidPtr->logBytesPerSector - DEV_BSHIFT);
    897 		} else {
    898 			size = raidPtr->totalSectors >>
    899 			    (DEV_BSHIFT - raidPtr->logBytesPerSector);
    900 		}
    901 		if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
    902 			goto done;
    903 		}
    904 	} else {
    905 		if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
    906 			db1_printf(("Bounds check failed!!:%d %d\n",
    907 				(int) bp->b_blkno, (int) wlabel));
    908 			goto done;
    909 		}
    910 	}
    911 	s = splbio();
    912 
    913 	bp->b_resid = 0;
    914 
    915 	/* stuff it onto our queue */
    916 	BUFQ_PUT(rs->buf_queue, bp);
    917 
    918 	/* scheduled the IO to happen at the next convenient time */
    919 	wakeup(&(raidPtrs[raidID]->iodone));
    920 
    921 	splx(s);
    922 	return;
    923 
    924 done:
    925 	bp->b_resid = bp->b_bcount;
    926 	biodone(bp);
    927 }
    928 /* ARGSUSED */
    929 int
    930 raidread(dev_t dev, struct uio *uio, int flags)
    931 {
    932 	int     unit = raidunit(dev);
    933 	struct raid_softc *rs;
    934 
    935 	if (unit >= numraid)
    936 		return (ENXIO);
    937 	rs = &raid_softc[unit];
    938 
    939 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    940 		return (ENXIO);
    941 
    942 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    943 
    944 }
    945 /* ARGSUSED */
    946 int
    947 raidwrite(dev_t dev, struct uio *uio, int flags)
    948 {
    949 	int     unit = raidunit(dev);
    950 	struct raid_softc *rs;
    951 
    952 	if (unit >= numraid)
    953 		return (ENXIO);
    954 	rs = &raid_softc[unit];
    955 
    956 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    957 		return (ENXIO);
    958 
    959 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    960 
    961 }
    962 
    963 int
    964 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
    965 {
    966 	int     unit = raidunit(dev);
    967 	int     error = 0;
    968 	int     part, pmask;
    969 	struct cfdata *cf;
    970 	struct raid_softc *rs;
    971 	RF_Config_t *k_cfg, *u_cfg;
    972 	RF_Raid_t *raidPtr;
    973 	RF_RaidDisk_t *diskPtr;
    974 	RF_AccTotals_t *totals;
    975 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    976 	u_char *specific_buf;
    977 	int retcode = 0;
    978 	int column;
    979 	int raidid;
    980 	struct rf_recon_req *rrcopy, *rr;
    981 	RF_ComponentLabel_t *clabel;
    982 	RF_ComponentLabel_t *ci_label;
    983 	RF_ComponentLabel_t **clabel_ptr;
    984 	RF_SingleComponent_t *sparePtr,*componentPtr;
    985 	RF_SingleComponent_t component;
    986 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    987 	int i, j, d;
    988 #ifdef __HAVE_OLD_DISKLABEL
    989 	struct disklabel newlabel;
    990 #endif
    991 	struct dkwedge_info *dkw;
    992 
    993 	if (unit >= numraid)
    994 		return (ENXIO);
    995 	rs = &raid_softc[unit];
    996 	raidPtr = raidPtrs[unit];
    997 
    998 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    999 		(int) DISKPART(dev), (int) unit, (int) cmd));
   1000 
   1001 	/* Must be open for writes for these commands... */
   1002 	switch (cmd) {
   1003 #ifdef DIOCGSECTORSIZE
   1004 	case DIOCGSECTORSIZE:
   1005 		*(u_int *)data = raidPtr->bytesPerSector;
   1006 		return 0;
   1007 	case DIOCGMEDIASIZE:
   1008 		*(off_t *)data =
   1009 		    (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
   1010 		return 0;
   1011 #endif
   1012 	case DIOCSDINFO:
   1013 	case DIOCWDINFO:
   1014 #ifdef __HAVE_OLD_DISKLABEL
   1015 	case ODIOCWDINFO:
   1016 	case ODIOCSDINFO:
   1017 #endif
   1018 	case DIOCWLABEL:
   1019 	case DIOCAWEDGE:
   1020 	case DIOCDWEDGE:
   1021 		if ((flag & FWRITE) == 0)
   1022 			return (EBADF);
   1023 	}
   1024 
   1025 	/* Must be initialized for these... */
   1026 	switch (cmd) {
   1027 	case DIOCGDINFO:
   1028 	case DIOCSDINFO:
   1029 	case DIOCWDINFO:
   1030 #ifdef __HAVE_OLD_DISKLABEL
   1031 	case ODIOCGDINFO:
   1032 	case ODIOCWDINFO:
   1033 	case ODIOCSDINFO:
   1034 	case ODIOCGDEFLABEL:
   1035 #endif
   1036 	case DIOCGPART:
   1037 	case DIOCWLABEL:
   1038 	case DIOCGDEFLABEL:
   1039 	case DIOCAWEDGE:
   1040 	case DIOCDWEDGE:
   1041 	case DIOCLWEDGES:
   1042 	case RAIDFRAME_SHUTDOWN:
   1043 	case RAIDFRAME_REWRITEPARITY:
   1044 	case RAIDFRAME_GET_INFO:
   1045 	case RAIDFRAME_RESET_ACCTOTALS:
   1046 	case RAIDFRAME_GET_ACCTOTALS:
   1047 	case RAIDFRAME_KEEP_ACCTOTALS:
   1048 	case RAIDFRAME_GET_SIZE:
   1049 	case RAIDFRAME_FAIL_DISK:
   1050 	case RAIDFRAME_COPYBACK:
   1051 	case RAIDFRAME_CHECK_RECON_STATUS:
   1052 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1053 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1054 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1055 	case RAIDFRAME_ADD_HOT_SPARE:
   1056 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1057 	case RAIDFRAME_INIT_LABELS:
   1058 	case RAIDFRAME_REBUILD_IN_PLACE:
   1059 	case RAIDFRAME_CHECK_PARITY:
   1060 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1061 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1062 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1063 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1064 	case RAIDFRAME_SET_AUTOCONFIG:
   1065 	case RAIDFRAME_SET_ROOT:
   1066 	case RAIDFRAME_DELETE_COMPONENT:
   1067 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1068 		if ((rs->sc_flags & RAIDF_INITED) == 0)
   1069 			return (ENXIO);
   1070 	}
   1071 
   1072 	switch (cmd) {
   1073 
   1074 		/* configure the system */
   1075 	case RAIDFRAME_CONFIGURE:
   1076 
   1077 		if (raidPtr->valid) {
   1078 			/* There is a valid RAID set running on this unit! */
   1079 			printf("raid%d: Device already configured!\n",unit);
   1080 			return(EINVAL);
   1081 		}
   1082 
   1083 		/* copy-in the configuration information */
   1084 		/* data points to a pointer to the configuration structure */
   1085 
   1086 		u_cfg = *((RF_Config_t **) data);
   1087 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
   1088 		if (k_cfg == NULL) {
   1089 			return (ENOMEM);
   1090 		}
   1091 		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
   1092 		if (retcode) {
   1093 			RF_Free(k_cfg, sizeof(RF_Config_t));
   1094 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
   1095 				retcode));
   1096 			return (retcode);
   1097 		}
   1098 		/* allocate a buffer for the layout-specific data, and copy it
   1099 		 * in */
   1100 		if (k_cfg->layoutSpecificSize) {
   1101 			if (k_cfg->layoutSpecificSize > 10000) {
   1102 				/* sanity check */
   1103 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1104 				return (EINVAL);
   1105 			}
   1106 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
   1107 			    (u_char *));
   1108 			if (specific_buf == NULL) {
   1109 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1110 				return (ENOMEM);
   1111 			}
   1112 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
   1113 			    k_cfg->layoutSpecificSize);
   1114 			if (retcode) {
   1115 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1116 				RF_Free(specific_buf,
   1117 					k_cfg->layoutSpecificSize);
   1118 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
   1119 					retcode));
   1120 				return (retcode);
   1121 			}
   1122 		} else
   1123 			specific_buf = NULL;
   1124 		k_cfg->layoutSpecific = specific_buf;
   1125 
   1126 		/* should do some kind of sanity check on the configuration.
   1127 		 * Store the sum of all the bytes in the last byte? */
   1128 
   1129 		/* configure the system */
   1130 
   1131 		/*
   1132 		 * Clear the entire RAID descriptor, just to make sure
   1133 		 *  there is no stale data left in the case of a
   1134 		 *  reconfiguration
   1135 		 */
   1136 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
   1137 		raidPtr->raidid = unit;
   1138 
   1139 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
   1140 
   1141 		if (retcode == 0) {
   1142 
   1143 			/* allow this many simultaneous IO's to
   1144 			   this RAID device */
   1145 			raidPtr->openings = RAIDOUTSTANDING;
   1146 
   1147 			raidinit(raidPtr);
   1148 			rf_markalldirty(raidPtr);
   1149 		}
   1150 		/* free the buffers.  No return code here. */
   1151 		if (k_cfg->layoutSpecificSize) {
   1152 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
   1153 		}
   1154 		RF_Free(k_cfg, sizeof(RF_Config_t));
   1155 
   1156 		return (retcode);
   1157 
   1158 		/* shutdown the system */
   1159 	case RAIDFRAME_SHUTDOWN:
   1160 
   1161 		if ((error = raidlock(rs)) != 0)
   1162 			return (error);
   1163 
   1164 		/*
   1165 		 * If somebody has a partition mounted, we shouldn't
   1166 		 * shutdown.
   1167 		 */
   1168 
   1169 		part = DISKPART(dev);
   1170 		pmask = (1 << part);
   1171 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
   1172 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
   1173 			(rs->sc_dkdev.dk_copenmask & pmask))) {
   1174 			raidunlock(rs);
   1175 			return (EBUSY);
   1176 		}
   1177 
   1178 		retcode = rf_Shutdown(raidPtr);
   1179 
   1180 		/* It's no longer initialized... */
   1181 		rs->sc_flags &= ~RAIDF_INITED;
   1182 
   1183 		/* free the pseudo device attach bits */
   1184 
   1185 		cf = device_cfdata(rs->sc_dev);
   1186 		/* XXX this causes us to not return any errors
   1187 		   from the above call to rf_Shutdown() */
   1188 		retcode = config_detach(rs->sc_dev, DETACH_QUIET);
   1189 		free(cf, M_RAIDFRAME);
   1190 
   1191 		/* Detach the disk. */
   1192 		disk_detach(&rs->sc_dkdev);
   1193 		disk_destroy(&rs->sc_dkdev);
   1194 
   1195 		raidunlock(rs);
   1196 
   1197 		return (retcode);
   1198 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1199 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1200 		/* need to read the component label for the disk indicated
   1201 		   by row,column in clabel */
   1202 
   1203 		/* For practice, let's get it directly fromdisk, rather
   1204 		   than from the in-core copy */
   1205 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
   1206 			   (RF_ComponentLabel_t *));
   1207 		if (clabel == NULL)
   1208 			return (ENOMEM);
   1209 
   1210 		retcode = copyin( *clabel_ptr, clabel,
   1211 				  sizeof(RF_ComponentLabel_t));
   1212 
   1213 		if (retcode) {
   1214 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1215 			return(retcode);
   1216 		}
   1217 
   1218 		clabel->row = 0; /* Don't allow looking at anything else.*/
   1219 
   1220 		column = clabel->column;
   1221 
   1222 		if ((column < 0) || (column >= raidPtr->numCol +
   1223 				     raidPtr->numSpare)) {
   1224 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1225 			return(EINVAL);
   1226 		}
   1227 
   1228 		retcode = raidread_component_label(raidPtr->Disks[column].dev,
   1229 				raidPtr->raid_cinfo[column].ci_vp,
   1230 				clabel );
   1231 
   1232 		if (retcode == 0) {
   1233 			retcode = copyout(clabel, *clabel_ptr,
   1234 					  sizeof(RF_ComponentLabel_t));
   1235 		}
   1236 		RF_Free(clabel, sizeof(RF_ComponentLabel_t));
   1237 		return (retcode);
   1238 
   1239 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1240 		clabel = (RF_ComponentLabel_t *) data;
   1241 
   1242 		/* XXX check the label for valid stuff... */
   1243 		/* Note that some things *should not* get modified --
   1244 		   the user should be re-initing the labels instead of
   1245 		   trying to patch things.
   1246 		   */
   1247 
   1248 		raidid = raidPtr->raidid;
   1249 #ifdef DEBUG
   1250 		printf("raid%d: Got component label:\n", raidid);
   1251 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1252 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1253 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1254 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1255 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1256 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1257 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1258 #endif
   1259 		clabel->row = 0;
   1260 		column = clabel->column;
   1261 
   1262 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1263 			return(EINVAL);
   1264 		}
   1265 
   1266 		/* XXX this isn't allowed to do anything for now :-) */
   1267 
   1268 		/* XXX and before it is, we need to fill in the rest
   1269 		   of the fields!?!?!?! */
   1270 #if 0
   1271 		raidwrite_component_label(
   1272 		     raidPtr->Disks[column].dev,
   1273 			    raidPtr->raid_cinfo[column].ci_vp,
   1274 			    clabel );
   1275 #endif
   1276 		return (0);
   1277 
   1278 	case RAIDFRAME_INIT_LABELS:
   1279 		clabel = (RF_ComponentLabel_t *) data;
   1280 		/*
   1281 		   we only want the serial number from
   1282 		   the above.  We get all the rest of the information
   1283 		   from the config that was used to create this RAID
   1284 		   set.
   1285 		   */
   1286 
   1287 		raidPtr->serial_number = clabel->serial_number;
   1288 
   1289 		RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t),
   1290 			  (RF_ComponentLabel_t *));
   1291 		if (ci_label == NULL)
   1292 			return (ENOMEM);
   1293 
   1294 		raid_init_component_label(raidPtr, ci_label);
   1295 		ci_label->serial_number = clabel->serial_number;
   1296 		ci_label->row = 0; /* we dont' pretend to support more */
   1297 
   1298 		for(column=0;column<raidPtr->numCol;column++) {
   1299 			diskPtr = &raidPtr->Disks[column];
   1300 			if (!RF_DEAD_DISK(diskPtr->status)) {
   1301 				ci_label->partitionSize = diskPtr->partitionSize;
   1302 				ci_label->column = column;
   1303 				raidwrite_component_label(
   1304 							  raidPtr->Disks[column].dev,
   1305 							  raidPtr->raid_cinfo[column].ci_vp,
   1306 							  ci_label );
   1307 			}
   1308 		}
   1309 		RF_Free(ci_label, sizeof(RF_ComponentLabel_t));
   1310 
   1311 		return (retcode);
   1312 	case RAIDFRAME_SET_AUTOCONFIG:
   1313 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1314 		printf("raid%d: New autoconfig value is: %d\n",
   1315 		       raidPtr->raidid, d);
   1316 		*(int *) data = d;
   1317 		return (retcode);
   1318 
   1319 	case RAIDFRAME_SET_ROOT:
   1320 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1321 		printf("raid%d: New rootpartition value is: %d\n",
   1322 		       raidPtr->raidid, d);
   1323 		*(int *) data = d;
   1324 		return (retcode);
   1325 
   1326 		/* initialize all parity */
   1327 	case RAIDFRAME_REWRITEPARITY:
   1328 
   1329 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1330 			/* Parity for RAID 0 is trivially correct */
   1331 			raidPtr->parity_good = RF_RAID_CLEAN;
   1332 			return(0);
   1333 		}
   1334 
   1335 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1336 			/* Re-write is already in progress! */
   1337 			return(EINVAL);
   1338 		}
   1339 
   1340 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1341 					   rf_RewriteParityThread,
   1342 					   raidPtr,"raid_parity");
   1343 		return (retcode);
   1344 
   1345 
   1346 	case RAIDFRAME_ADD_HOT_SPARE:
   1347 		sparePtr = (RF_SingleComponent_t *) data;
   1348 		memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
   1349 		retcode = rf_add_hot_spare(raidPtr, &component);
   1350 		return(retcode);
   1351 
   1352 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1353 		return(retcode);
   1354 
   1355 	case RAIDFRAME_DELETE_COMPONENT:
   1356 		componentPtr = (RF_SingleComponent_t *)data;
   1357 		memcpy( &component, componentPtr,
   1358 			sizeof(RF_SingleComponent_t));
   1359 		retcode = rf_delete_component(raidPtr, &component);
   1360 		return(retcode);
   1361 
   1362 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1363 		componentPtr = (RF_SingleComponent_t *)data;
   1364 		memcpy( &component, componentPtr,
   1365 			sizeof(RF_SingleComponent_t));
   1366 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1367 		return(retcode);
   1368 
   1369 	case RAIDFRAME_REBUILD_IN_PLACE:
   1370 
   1371 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1372 			/* Can't do this on a RAID 0!! */
   1373 			return(EINVAL);
   1374 		}
   1375 
   1376 		if (raidPtr->recon_in_progress == 1) {
   1377 			/* a reconstruct is already in progress! */
   1378 			return(EINVAL);
   1379 		}
   1380 
   1381 		componentPtr = (RF_SingleComponent_t *) data;
   1382 		memcpy( &component, componentPtr,
   1383 			sizeof(RF_SingleComponent_t));
   1384 		component.row = 0; /* we don't support any more */
   1385 		column = component.column;
   1386 
   1387 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1388 			return(EINVAL);
   1389 		}
   1390 
   1391 		RF_LOCK_MUTEX(raidPtr->mutex);
   1392 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
   1393 		    (raidPtr->numFailures > 0)) {
   1394 			/* XXX 0 above shouldn't be constant!!! */
   1395 			/* some component other than this has failed.
   1396 			   Let's not make things worse than they already
   1397 			   are... */
   1398 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1399 			       raidPtr->raidid);
   1400 			printf("raid%d:     Col: %d   Too many failures.\n",
   1401 			       raidPtr->raidid, column);
   1402 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1403 			return (EINVAL);
   1404 		}
   1405 		if (raidPtr->Disks[column].status ==
   1406 		    rf_ds_reconstructing) {
   1407 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1408 			       raidPtr->raidid);
   1409 			printf("raid%d:    Col: %d   Reconstruction already occuring!\n", raidPtr->raidid, column);
   1410 
   1411 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1412 			return (EINVAL);
   1413 		}
   1414 		if (raidPtr->Disks[column].status == rf_ds_spared) {
   1415 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1416 			return (EINVAL);
   1417 		}
   1418 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1419 
   1420 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1421 		if (rrcopy == NULL)
   1422 			return(ENOMEM);
   1423 
   1424 		rrcopy->raidPtr = (void *) raidPtr;
   1425 		rrcopy->col = column;
   1426 
   1427 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1428 					   rf_ReconstructInPlaceThread,
   1429 					   rrcopy,"raid_reconip");
   1430 		return(retcode);
   1431 
   1432 	case RAIDFRAME_GET_INFO:
   1433 		if (!raidPtr->valid)
   1434 			return (ENODEV);
   1435 		ucfgp = (RF_DeviceConfig_t **) data;
   1436 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1437 			  (RF_DeviceConfig_t *));
   1438 		if (d_cfg == NULL)
   1439 			return (ENOMEM);
   1440 		d_cfg->rows = 1; /* there is only 1 row now */
   1441 		d_cfg->cols = raidPtr->numCol;
   1442 		d_cfg->ndevs = raidPtr->numCol;
   1443 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1444 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1445 			return (ENOMEM);
   1446 		}
   1447 		d_cfg->nspares = raidPtr->numSpare;
   1448 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1449 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1450 			return (ENOMEM);
   1451 		}
   1452 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1453 		d = 0;
   1454 		for (j = 0; j < d_cfg->cols; j++) {
   1455 			d_cfg->devs[d] = raidPtr->Disks[j];
   1456 			d++;
   1457 		}
   1458 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1459 			d_cfg->spares[i] = raidPtr->Disks[j];
   1460 		}
   1461 		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
   1462 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1463 
   1464 		return (retcode);
   1465 
   1466 	case RAIDFRAME_CHECK_PARITY:
   1467 		*(int *) data = raidPtr->parity_good;
   1468 		return (0);
   1469 
   1470 	case RAIDFRAME_RESET_ACCTOTALS:
   1471 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1472 		return (0);
   1473 
   1474 	case RAIDFRAME_GET_ACCTOTALS:
   1475 		totals = (RF_AccTotals_t *) data;
   1476 		*totals = raidPtr->acc_totals;
   1477 		return (0);
   1478 
   1479 	case RAIDFRAME_KEEP_ACCTOTALS:
   1480 		raidPtr->keep_acc_totals = *(int *)data;
   1481 		return (0);
   1482 
   1483 	case RAIDFRAME_GET_SIZE:
   1484 		*(int *) data = raidPtr->totalSectors;
   1485 		return (0);
   1486 
   1487 		/* fail a disk & optionally start reconstruction */
   1488 	case RAIDFRAME_FAIL_DISK:
   1489 
   1490 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1491 			/* Can't do this on a RAID 0!! */
   1492 			return(EINVAL);
   1493 		}
   1494 
   1495 		rr = (struct rf_recon_req *) data;
   1496 		rr->row = 0;
   1497 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
   1498 			return (EINVAL);
   1499 
   1500 
   1501 		RF_LOCK_MUTEX(raidPtr->mutex);
   1502 		if (raidPtr->status == rf_rs_reconstructing) {
   1503 			/* you can't fail a disk while we're reconstructing! */
   1504 			/* XXX wrong for RAID6 */
   1505 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1506 			return (EINVAL);
   1507 		}
   1508 		if ((raidPtr->Disks[rr->col].status ==
   1509 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
   1510 			/* some other component has failed.  Let's not make
   1511 			   things worse. XXX wrong for RAID6 */
   1512 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1513 			return (EINVAL);
   1514 		}
   1515 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
   1516 			/* Can't fail a spared disk! */
   1517 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1518 			return (EINVAL);
   1519 		}
   1520 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1521 
   1522 		/* make a copy of the recon request so that we don't rely on
   1523 		 * the user's buffer */
   1524 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1525 		if (rrcopy == NULL)
   1526 			return(ENOMEM);
   1527 		memcpy(rrcopy, rr, sizeof(*rr));
   1528 		rrcopy->raidPtr = (void *) raidPtr;
   1529 
   1530 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1531 					   rf_ReconThread,
   1532 					   rrcopy,"raid_recon");
   1533 		return (0);
   1534 
   1535 		/* invoke a copyback operation after recon on whatever disk
   1536 		 * needs it, if any */
   1537 	case RAIDFRAME_COPYBACK:
   1538 
   1539 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1540 			/* This makes no sense on a RAID 0!! */
   1541 			return(EINVAL);
   1542 		}
   1543 
   1544 		if (raidPtr->copyback_in_progress == 1) {
   1545 			/* Copyback is already in progress! */
   1546 			return(EINVAL);
   1547 		}
   1548 
   1549 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1550 					   rf_CopybackThread,
   1551 					   raidPtr,"raid_copyback");
   1552 		return (retcode);
   1553 
   1554 		/* return the percentage completion of reconstruction */
   1555 	case RAIDFRAME_CHECK_RECON_STATUS:
   1556 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1557 			/* This makes no sense on a RAID 0, so tell the
   1558 			   user it's done. */
   1559 			*(int *) data = 100;
   1560 			return(0);
   1561 		}
   1562 		if (raidPtr->status != rf_rs_reconstructing)
   1563 			*(int *) data = 100;
   1564 		else {
   1565 			if (raidPtr->reconControl->numRUsTotal > 0) {
   1566 				*(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
   1567 			} else {
   1568 				*(int *) data = 0;
   1569 			}
   1570 		}
   1571 		return (0);
   1572 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1573 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1574 		if (raidPtr->status != rf_rs_reconstructing) {
   1575 			progressInfo.remaining = 0;
   1576 			progressInfo.completed = 100;
   1577 			progressInfo.total = 100;
   1578 		} else {
   1579 			progressInfo.total =
   1580 				raidPtr->reconControl->numRUsTotal;
   1581 			progressInfo.completed =
   1582 				raidPtr->reconControl->numRUsComplete;
   1583 			progressInfo.remaining = progressInfo.total -
   1584 				progressInfo.completed;
   1585 		}
   1586 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1587 				  sizeof(RF_ProgressInfo_t));
   1588 		return (retcode);
   1589 
   1590 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1591 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1592 			/* This makes no sense on a RAID 0, so tell the
   1593 			   user it's done. */
   1594 			*(int *) data = 100;
   1595 			return(0);
   1596 		}
   1597 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1598 			*(int *) data = 100 *
   1599 				raidPtr->parity_rewrite_stripes_done /
   1600 				raidPtr->Layout.numStripe;
   1601 		} else {
   1602 			*(int *) data = 100;
   1603 		}
   1604 		return (0);
   1605 
   1606 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1607 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1608 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1609 			progressInfo.total = raidPtr->Layout.numStripe;
   1610 			progressInfo.completed =
   1611 				raidPtr->parity_rewrite_stripes_done;
   1612 			progressInfo.remaining = progressInfo.total -
   1613 				progressInfo.completed;
   1614 		} else {
   1615 			progressInfo.remaining = 0;
   1616 			progressInfo.completed = 100;
   1617 			progressInfo.total = 100;
   1618 		}
   1619 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1620 				  sizeof(RF_ProgressInfo_t));
   1621 		return (retcode);
   1622 
   1623 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1624 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1625 			/* This makes no sense on a RAID 0 */
   1626 			*(int *) data = 100;
   1627 			return(0);
   1628 		}
   1629 		if (raidPtr->copyback_in_progress == 1) {
   1630 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1631 				raidPtr->Layout.numStripe;
   1632 		} else {
   1633 			*(int *) data = 100;
   1634 		}
   1635 		return (0);
   1636 
   1637 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1638 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1639 		if (raidPtr->copyback_in_progress == 1) {
   1640 			progressInfo.total = raidPtr->Layout.numStripe;
   1641 			progressInfo.completed =
   1642 				raidPtr->copyback_stripes_done;
   1643 			progressInfo.remaining = progressInfo.total -
   1644 				progressInfo.completed;
   1645 		} else {
   1646 			progressInfo.remaining = 0;
   1647 			progressInfo.completed = 100;
   1648 			progressInfo.total = 100;
   1649 		}
   1650 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1651 				  sizeof(RF_ProgressInfo_t));
   1652 		return (retcode);
   1653 
   1654 		/* the sparetable daemon calls this to wait for the kernel to
   1655 		 * need a spare table. this ioctl does not return until a
   1656 		 * spare table is needed. XXX -- calling mpsleep here in the
   1657 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1658 		 * -- I should either compute the spare table in the kernel,
   1659 		 * or have a different -- XXX XXX -- interface (a different
   1660 		 * character device) for delivering the table     -- XXX */
   1661 #if 0
   1662 	case RAIDFRAME_SPARET_WAIT:
   1663 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1664 		while (!rf_sparet_wait_queue)
   1665 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1666 		waitreq = rf_sparet_wait_queue;
   1667 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1668 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1669 
   1670 		/* structure assignment */
   1671 		*((RF_SparetWait_t *) data) = *waitreq;
   1672 
   1673 		RF_Free(waitreq, sizeof(*waitreq));
   1674 		return (0);
   1675 
   1676 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1677 		 * code in it that will cause the dameon to exit */
   1678 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1679 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1680 		waitreq->fcol = -1;
   1681 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1682 		waitreq->next = rf_sparet_wait_queue;
   1683 		rf_sparet_wait_queue = waitreq;
   1684 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1685 		wakeup(&rf_sparet_wait_queue);
   1686 		return (0);
   1687 
   1688 		/* used by the spare table daemon to deliver a spare table
   1689 		 * into the kernel */
   1690 	case RAIDFRAME_SEND_SPARET:
   1691 
   1692 		/* install the spare table */
   1693 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1694 
   1695 		/* respond to the requestor.  the return status of the spare
   1696 		 * table installation is passed in the "fcol" field */
   1697 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1698 		waitreq->fcol = retcode;
   1699 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1700 		waitreq->next = rf_sparet_resp_queue;
   1701 		rf_sparet_resp_queue = waitreq;
   1702 		wakeup(&rf_sparet_resp_queue);
   1703 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1704 
   1705 		return (retcode);
   1706 #endif
   1707 
   1708 	default:
   1709 		break; /* fall through to the os-specific code below */
   1710 
   1711 	}
   1712 
   1713 	if (!raidPtr->valid)
   1714 		return (EINVAL);
   1715 
   1716 	/*
   1717 	 * Add support for "regular" device ioctls here.
   1718 	 */
   1719 
   1720 	switch (cmd) {
   1721 	case DIOCGDINFO:
   1722 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1723 		break;
   1724 #ifdef __HAVE_OLD_DISKLABEL
   1725 	case ODIOCGDINFO:
   1726 		newlabel = *(rs->sc_dkdev.dk_label);
   1727 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1728 			return ENOTTY;
   1729 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1730 		break;
   1731 #endif
   1732 
   1733 	case DIOCGPART:
   1734 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1735 		((struct partinfo *) data)->part =
   1736 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1737 		break;
   1738 
   1739 	case DIOCWDINFO:
   1740 	case DIOCSDINFO:
   1741 #ifdef __HAVE_OLD_DISKLABEL
   1742 	case ODIOCWDINFO:
   1743 	case ODIOCSDINFO:
   1744 #endif
   1745 	{
   1746 		struct disklabel *lp;
   1747 #ifdef __HAVE_OLD_DISKLABEL
   1748 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1749 			memset(&newlabel, 0, sizeof newlabel);
   1750 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1751 			lp = &newlabel;
   1752 		} else
   1753 #endif
   1754 		lp = (struct disklabel *)data;
   1755 
   1756 		if ((error = raidlock(rs)) != 0)
   1757 			return (error);
   1758 
   1759 		rs->sc_flags |= RAIDF_LABELLING;
   1760 
   1761 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1762 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1763 		if (error == 0) {
   1764 			if (cmd == DIOCWDINFO
   1765 #ifdef __HAVE_OLD_DISKLABEL
   1766 			    || cmd == ODIOCWDINFO
   1767 #endif
   1768 			   )
   1769 				error = writedisklabel(RAIDLABELDEV(dev),
   1770 				    raidstrategy, rs->sc_dkdev.dk_label,
   1771 				    rs->sc_dkdev.dk_cpulabel);
   1772 		}
   1773 		rs->sc_flags &= ~RAIDF_LABELLING;
   1774 
   1775 		raidunlock(rs);
   1776 
   1777 		if (error)
   1778 			return (error);
   1779 		break;
   1780 	}
   1781 
   1782 	case DIOCWLABEL:
   1783 		if (*(int *) data != 0)
   1784 			rs->sc_flags |= RAIDF_WLABEL;
   1785 		else
   1786 			rs->sc_flags &= ~RAIDF_WLABEL;
   1787 		break;
   1788 
   1789 	case DIOCGDEFLABEL:
   1790 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1791 		break;
   1792 
   1793 #ifdef __HAVE_OLD_DISKLABEL
   1794 	case ODIOCGDEFLABEL:
   1795 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1796 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1797 			return ENOTTY;
   1798 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1799 		break;
   1800 #endif
   1801 
   1802 	case DIOCAWEDGE:
   1803 	case DIOCDWEDGE:
   1804 	    	dkw = (void *)data;
   1805 
   1806 		/* If the ioctl happens here, the parent is us. */
   1807 		(void)strcpy(dkw->dkw_parent, rs->sc_xname);
   1808 		return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
   1809 
   1810 	case DIOCLWEDGES:
   1811 		return dkwedge_list(&rs->sc_dkdev,
   1812 		    (struct dkwedge_list *)data, l);
   1813 
   1814 	default:
   1815 		retcode = ENOTTY;
   1816 	}
   1817 	return (retcode);
   1818 
   1819 }
   1820 
   1821 
   1822 /* raidinit -- complete the rest of the initialization for the
   1823    RAIDframe device.  */
   1824 
   1825 
   1826 static void
   1827 raidinit(RF_Raid_t *raidPtr)
   1828 {
   1829 	struct cfdata *cf;
   1830 	struct raid_softc *rs;
   1831 	int     unit;
   1832 
   1833 	unit = raidPtr->raidid;
   1834 
   1835 	rs = &raid_softc[unit];
   1836 
   1837 	/* XXX should check return code first... */
   1838 	rs->sc_flags |= RAIDF_INITED;
   1839 
   1840 	/* XXX doesn't check bounds. */
   1841 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
   1842 
   1843 	/* attach the pseudo device */
   1844 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
   1845 	cf->cf_name = raid_cd.cd_name;
   1846 	cf->cf_atname = raid_cd.cd_name;
   1847 	cf->cf_unit = unit;
   1848 	cf->cf_fstate = FSTATE_STAR;
   1849 
   1850 	rs->sc_dev = config_attach_pseudo(cf);
   1851 
   1852 	if (rs->sc_dev==NULL) {
   1853 		printf("raid%d: config_attach_pseudo failed\n",
   1854 		       raidPtr->raidid);
   1855 	}
   1856 
   1857 	/* disk_attach actually creates space for the CPU disklabel, among
   1858 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1859 	 * with disklabels. */
   1860 
   1861 	disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
   1862 	disk_attach(&rs->sc_dkdev);
   1863 
   1864 	/* XXX There may be a weird interaction here between this, and
   1865 	 * protectedSectors, as used in RAIDframe.  */
   1866 
   1867 	rs->sc_size = raidPtr->totalSectors;
   1868 
   1869 	dkwedge_discover(&rs->sc_dkdev);
   1870 
   1871 	rf_set_properties(rs, raidPtr);
   1872 
   1873 }
   1874 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   1875 /* wake up the daemon & tell it to get us a spare table
   1876  * XXX
   1877  * the entries in the queues should be tagged with the raidPtr
   1878  * so that in the extremely rare case that two recons happen at once,
   1879  * we know for which device were requesting a spare table
   1880  * XXX
   1881  *
   1882  * XXX This code is not currently used. GO
   1883  */
   1884 int
   1885 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
   1886 {
   1887 	int     retcode;
   1888 
   1889 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1890 	req->next = rf_sparet_wait_queue;
   1891 	rf_sparet_wait_queue = req;
   1892 	wakeup(&rf_sparet_wait_queue);
   1893 
   1894 	/* mpsleep unlocks the mutex */
   1895 	while (!rf_sparet_resp_queue) {
   1896 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1897 		    "raidframe getsparetable", 0);
   1898 	}
   1899 	req = rf_sparet_resp_queue;
   1900 	rf_sparet_resp_queue = req->next;
   1901 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1902 
   1903 	retcode = req->fcol;
   1904 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1905 					 * alloc'd */
   1906 	return (retcode);
   1907 }
   1908 #endif
   1909 
   1910 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1911  * bp & passes it down.
   1912  * any calls originating in the kernel must use non-blocking I/O
   1913  * do some extra sanity checking to return "appropriate" error values for
   1914  * certain conditions (to make some standard utilities work)
   1915  *
   1916  * Formerly known as: rf_DoAccessKernel
   1917  */
   1918 void
   1919 raidstart(RF_Raid_t *raidPtr)
   1920 {
   1921 	RF_SectorCount_t num_blocks, pb, sum;
   1922 	RF_RaidAddr_t raid_addr;
   1923 	struct partition *pp;
   1924 	daddr_t blocknum;
   1925 	int     unit;
   1926 	struct raid_softc *rs;
   1927 	int     do_async;
   1928 	struct buf *bp;
   1929 	int rc;
   1930 
   1931 	unit = raidPtr->raidid;
   1932 	rs = &raid_softc[unit];
   1933 
   1934 	/* quick check to see if anything has died recently */
   1935 	RF_LOCK_MUTEX(raidPtr->mutex);
   1936 	if (raidPtr->numNewFailures > 0) {
   1937 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1938 		rf_update_component_labels(raidPtr,
   1939 					   RF_NORMAL_COMPONENT_UPDATE);
   1940 		RF_LOCK_MUTEX(raidPtr->mutex);
   1941 		raidPtr->numNewFailures--;
   1942 	}
   1943 
   1944 	/* Check to see if we're at the limit... */
   1945 	while (raidPtr->openings > 0) {
   1946 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1947 
   1948 		/* get the next item, if any, from the queue */
   1949 		if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
   1950 			/* nothing more to do */
   1951 			return;
   1952 		}
   1953 
   1954 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1955 		 * partition.. Need to make it absolute to the underlying
   1956 		 * device.. */
   1957 
   1958 		blocknum = bp->b_blkno;
   1959 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1960 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1961 			blocknum += pp->p_offset;
   1962 		}
   1963 
   1964 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1965 			    (int) blocknum));
   1966 
   1967 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1968 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1969 
   1970 		/* *THIS* is where we adjust what block we're going to...
   1971 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1972 		raid_addr = blocknum;
   1973 
   1974 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1975 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1976 		sum = raid_addr + num_blocks + pb;
   1977 		if (1 || rf_debugKernelAccess) {
   1978 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1979 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1980 				    (int) pb, (int) bp->b_resid));
   1981 		}
   1982 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1983 		    || (sum < num_blocks) || (sum < pb)) {
   1984 			bp->b_error = ENOSPC;
   1985 			bp->b_resid = bp->b_bcount;
   1986 			biodone(bp);
   1987 			RF_LOCK_MUTEX(raidPtr->mutex);
   1988 			continue;
   1989 		}
   1990 		/*
   1991 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1992 		 */
   1993 
   1994 		if (bp->b_bcount & raidPtr->sectorMask) {
   1995 			bp->b_error = EINVAL;
   1996 			bp->b_resid = bp->b_bcount;
   1997 			biodone(bp);
   1998 			RF_LOCK_MUTEX(raidPtr->mutex);
   1999 			continue;
   2000 
   2001 		}
   2002 		db1_printf(("Calling DoAccess..\n"));
   2003 
   2004 
   2005 		RF_LOCK_MUTEX(raidPtr->mutex);
   2006 		raidPtr->openings--;
   2007 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   2008 
   2009 		/*
   2010 		 * Everything is async.
   2011 		 */
   2012 		do_async = 1;
   2013 
   2014 		disk_busy(&rs->sc_dkdev);
   2015 
   2016 		/* XXX we're still at splbio() here... do we *really*
   2017 		   need to be? */
   2018 
   2019 		/* don't ever condition on bp->b_flags & B_WRITE.
   2020 		 * always condition on B_READ instead */
   2021 
   2022 		rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   2023 				 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   2024 				 do_async, raid_addr, num_blocks,
   2025 				 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   2026 
   2027 		if (rc) {
   2028 			bp->b_error = rc;
   2029 			bp->b_resid = bp->b_bcount;
   2030 			biodone(bp);
   2031 			/* continue loop */
   2032 		}
   2033 
   2034 		RF_LOCK_MUTEX(raidPtr->mutex);
   2035 	}
   2036 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   2037 }
   2038 
   2039 
   2040 
   2041 
   2042 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   2043 
   2044 int
   2045 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
   2046 {
   2047 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   2048 	struct buf *bp;
   2049 
   2050 	req->queue = queue;
   2051 
   2052 #if DIAGNOSTIC
   2053 	if (queue->raidPtr->raidid >= numraid) {
   2054 		printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
   2055 		    numraid);
   2056 		panic("Invalid Unit number in rf_DispatchKernelIO");
   2057 	}
   2058 #endif
   2059 
   2060 	bp = req->bp;
   2061 
   2062 	switch (req->type) {
   2063 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   2064 		/* XXX need to do something extra here.. */
   2065 		/* I'm leaving this in, as I've never actually seen it used,
   2066 		 * and I'd like folks to report it... GO */
   2067 		printf(("WAKEUP CALLED\n"));
   2068 		queue->numOutstanding++;
   2069 
   2070 		bp->b_flags = 0;
   2071 		bp->b_private = req;
   2072 
   2073 		KernelWakeupFunc(bp);
   2074 		break;
   2075 
   2076 	case RF_IO_TYPE_READ:
   2077 	case RF_IO_TYPE_WRITE:
   2078 #if RF_ACC_TRACE > 0
   2079 		if (req->tracerec) {
   2080 			RF_ETIMER_START(req->tracerec->timer);
   2081 		}
   2082 #endif
   2083 		InitBP(bp, queue->rf_cinfo->ci_vp,
   2084 		    op, queue->rf_cinfo->ci_dev,
   2085 		    req->sectorOffset, req->numSector,
   2086 		    req->buf, KernelWakeupFunc, (void *) req,
   2087 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   2088 
   2089 		if (rf_debugKernelAccess) {
   2090 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   2091 				(long) bp->b_blkno));
   2092 		}
   2093 		queue->numOutstanding++;
   2094 		queue->last_deq_sector = req->sectorOffset;
   2095 		/* acc wouldn't have been let in if there were any pending
   2096 		 * reqs at any other priority */
   2097 		queue->curPriority = req->priority;
   2098 
   2099 		db1_printf(("Going for %c to unit %d col %d\n",
   2100 			    req->type, queue->raidPtr->raidid,
   2101 			    queue->col));
   2102 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   2103 			(int) req->sectorOffset, (int) req->numSector,
   2104 			(int) (req->numSector <<
   2105 			    queue->raidPtr->logBytesPerSector),
   2106 			(int) queue->raidPtr->logBytesPerSector));
   2107 		bdev_strategy(bp);
   2108 
   2109 		break;
   2110 
   2111 	default:
   2112 		panic("bad req->type in rf_DispatchKernelIO");
   2113 	}
   2114 	db1_printf(("Exiting from DispatchKernelIO\n"));
   2115 
   2116 	return (0);
   2117 }
   2118 /* this is the callback function associated with a I/O invoked from
   2119    kernel code.
   2120  */
   2121 static void
   2122 KernelWakeupFunc(struct buf *bp)
   2123 {
   2124 	RF_DiskQueueData_t *req = NULL;
   2125 	RF_DiskQueue_t *queue;
   2126 	int s;
   2127 
   2128 	s = splbio();
   2129 	db1_printf(("recovering the request queue:\n"));
   2130 	req = bp->b_private;
   2131 
   2132 	queue = (RF_DiskQueue_t *) req->queue;
   2133 
   2134 #if RF_ACC_TRACE > 0
   2135 	if (req->tracerec) {
   2136 		RF_ETIMER_STOP(req->tracerec->timer);
   2137 		RF_ETIMER_EVAL(req->tracerec->timer);
   2138 		RF_LOCK_MUTEX(rf_tracing_mutex);
   2139 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2140 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2141 		req->tracerec->num_phys_ios++;
   2142 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   2143 	}
   2144 #endif
   2145 
   2146 	/* XXX Ok, let's get aggressive... If b_error is set, let's go
   2147 	 * ballistic, and mark the component as hosed... */
   2148 
   2149 	if (bp->b_error != 0) {
   2150 		/* Mark the disk as dead */
   2151 		/* but only mark it once... */
   2152 		/* and only if it wouldn't leave this RAID set
   2153 		   completely broken */
   2154 		if (((queue->raidPtr->Disks[queue->col].status ==
   2155 		      rf_ds_optimal) ||
   2156 		     (queue->raidPtr->Disks[queue->col].status ==
   2157 		      rf_ds_used_spare)) &&
   2158 		     (queue->raidPtr->numFailures <
   2159 		      queue->raidPtr->Layout.map->faultsTolerated)) {
   2160 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   2161 			       queue->raidPtr->raidid,
   2162 			       queue->raidPtr->Disks[queue->col].devname);
   2163 			queue->raidPtr->Disks[queue->col].status =
   2164 			    rf_ds_failed;
   2165 			queue->raidPtr->status = rf_rs_degraded;
   2166 			queue->raidPtr->numFailures++;
   2167 			queue->raidPtr->numNewFailures++;
   2168 		} else {	/* Disk is already dead... */
   2169 			/* printf("Disk already marked as dead!\n"); */
   2170 		}
   2171 
   2172 	}
   2173 
   2174 	/* Fill in the error value */
   2175 
   2176 	req->error = bp->b_error;
   2177 
   2178 	simple_lock(&queue->raidPtr->iodone_lock);
   2179 
   2180 	/* Drop this one on the "finished" queue... */
   2181 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   2182 
   2183 	/* Let the raidio thread know there is work to be done. */
   2184 	wakeup(&(queue->raidPtr->iodone));
   2185 
   2186 	simple_unlock(&queue->raidPtr->iodone_lock);
   2187 
   2188 	splx(s);
   2189 }
   2190 
   2191 
   2192 
   2193 /*
   2194  * initialize a buf structure for doing an I/O in the kernel.
   2195  */
   2196 static void
   2197 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
   2198        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
   2199        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
   2200        struct proc *b_proc)
   2201 {
   2202 	/* bp->b_flags       = B_PHYS | rw_flag; */
   2203 	bp->b_flags = rw_flag;	/* XXX need B_PHYS here too??? */
   2204 	bp->b_oflags = 0;
   2205 	bp->b_cflags = 0;
   2206 	bp->b_bcount = numSect << logBytesPerSector;
   2207 	bp->b_bufsize = bp->b_bcount;
   2208 	bp->b_error = 0;
   2209 	bp->b_dev = dev;
   2210 	bp->b_data = bf;
   2211 	bp->b_blkno = startSect;
   2212 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   2213 	if (bp->b_bcount == 0) {
   2214 		panic("bp->b_bcount is zero in InitBP!!");
   2215 	}
   2216 	bp->b_proc = b_proc;
   2217 	bp->b_iodone = cbFunc;
   2218 	bp->b_private = cbArg;
   2219 }
   2220 
   2221 static void
   2222 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
   2223 		    struct disklabel *lp)
   2224 {
   2225 	memset(lp, 0, sizeof(*lp));
   2226 
   2227 	/* fabricate a label... */
   2228 	lp->d_secperunit = raidPtr->totalSectors;
   2229 	lp->d_secsize = raidPtr->bytesPerSector;
   2230 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2231 	lp->d_ntracks = 4 * raidPtr->numCol;
   2232 	lp->d_ncylinders = raidPtr->totalSectors /
   2233 		(lp->d_nsectors * lp->d_ntracks);
   2234 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2235 
   2236 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2237 	lp->d_type = DTYPE_RAID;
   2238 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2239 	lp->d_rpm = 3600;
   2240 	lp->d_interleave = 1;
   2241 	lp->d_flags = 0;
   2242 
   2243 	lp->d_partitions[RAW_PART].p_offset = 0;
   2244 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2245 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2246 	lp->d_npartitions = RAW_PART + 1;
   2247 
   2248 	lp->d_magic = DISKMAGIC;
   2249 	lp->d_magic2 = DISKMAGIC;
   2250 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2251 
   2252 }
   2253 /*
   2254  * Read the disklabel from the raid device.  If one is not present, fake one
   2255  * up.
   2256  */
   2257 static void
   2258 raidgetdisklabel(dev_t dev)
   2259 {
   2260 	int     unit = raidunit(dev);
   2261 	struct raid_softc *rs = &raid_softc[unit];
   2262 	const char   *errstring;
   2263 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2264 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2265 	RF_Raid_t *raidPtr;
   2266 
   2267 	db1_printf(("Getting the disklabel...\n"));
   2268 
   2269 	memset(clp, 0, sizeof(*clp));
   2270 
   2271 	raidPtr = raidPtrs[unit];
   2272 
   2273 	raidgetdefaultlabel(raidPtr, rs, lp);
   2274 
   2275 	/*
   2276 	 * Call the generic disklabel extraction routine.
   2277 	 */
   2278 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2279 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2280 	if (errstring)
   2281 		raidmakedisklabel(rs);
   2282 	else {
   2283 		int     i;
   2284 		struct partition *pp;
   2285 
   2286 		/*
   2287 		 * Sanity check whether the found disklabel is valid.
   2288 		 *
   2289 		 * This is necessary since total size of the raid device
   2290 		 * may vary when an interleave is changed even though exactly
   2291 		 * same components are used, and old disklabel may used
   2292 		 * if that is found.
   2293 		 */
   2294 		if (lp->d_secperunit != rs->sc_size)
   2295 			printf("raid%d: WARNING: %s: "
   2296 			    "total sector size in disklabel (%d) != "
   2297 			    "the size of raid (%ld)\n", unit, rs->sc_xname,
   2298 			    lp->d_secperunit, (long) rs->sc_size);
   2299 		for (i = 0; i < lp->d_npartitions; i++) {
   2300 			pp = &lp->d_partitions[i];
   2301 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2302 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2303 				       "exceeds the size of raid (%ld)\n",
   2304 				       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2305 		}
   2306 	}
   2307 
   2308 }
   2309 /*
   2310  * Take care of things one might want to take care of in the event
   2311  * that a disklabel isn't present.
   2312  */
   2313 static void
   2314 raidmakedisklabel(struct raid_softc *rs)
   2315 {
   2316 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2317 	db1_printf(("Making a label..\n"));
   2318 
   2319 	/*
   2320 	 * For historical reasons, if there's no disklabel present
   2321 	 * the raw partition must be marked FS_BSDFFS.
   2322 	 */
   2323 
   2324 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2325 
   2326 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2327 
   2328 	lp->d_checksum = dkcksum(lp);
   2329 }
   2330 /*
   2331  * Wait interruptibly for an exclusive lock.
   2332  *
   2333  * XXX
   2334  * Several drivers do this; it should be abstracted and made MP-safe.
   2335  * (Hmm... where have we seen this warning before :->  GO )
   2336  */
   2337 static int
   2338 raidlock(struct raid_softc *rs)
   2339 {
   2340 	int     error;
   2341 
   2342 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2343 		rs->sc_flags |= RAIDF_WANTED;
   2344 		if ((error =
   2345 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2346 			return (error);
   2347 	}
   2348 	rs->sc_flags |= RAIDF_LOCKED;
   2349 	return (0);
   2350 }
   2351 /*
   2352  * Unlock and wake up any waiters.
   2353  */
   2354 static void
   2355 raidunlock(struct raid_softc *rs)
   2356 {
   2357 
   2358 	rs->sc_flags &= ~RAIDF_LOCKED;
   2359 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2360 		rs->sc_flags &= ~RAIDF_WANTED;
   2361 		wakeup(rs);
   2362 	}
   2363 }
   2364 
   2365 
   2366 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2367 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2368 
   2369 int
   2370 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2371 {
   2372 	RF_ComponentLabel_t clabel;
   2373 	raidread_component_label(dev, b_vp, &clabel);
   2374 	clabel.mod_counter = mod_counter;
   2375 	clabel.clean = RF_RAID_CLEAN;
   2376 	raidwrite_component_label(dev, b_vp, &clabel);
   2377 	return(0);
   2378 }
   2379 
   2380 
   2381 int
   2382 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2383 {
   2384 	RF_ComponentLabel_t clabel;
   2385 	raidread_component_label(dev, b_vp, &clabel);
   2386 	clabel.mod_counter = mod_counter;
   2387 	clabel.clean = RF_RAID_DIRTY;
   2388 	raidwrite_component_label(dev, b_vp, &clabel);
   2389 	return(0);
   2390 }
   2391 
   2392 /* ARGSUSED */
   2393 int
   2394 raidread_component_label(dev_t dev, struct vnode *b_vp,
   2395 			 RF_ComponentLabel_t *clabel)
   2396 {
   2397 	struct buf *bp;
   2398 	const struct bdevsw *bdev;
   2399 	int error;
   2400 
   2401 	/* XXX should probably ensure that we don't try to do this if
   2402 	   someone has changed rf_protected_sectors. */
   2403 
   2404 	if (b_vp == NULL) {
   2405 		/* For whatever reason, this component is not valid.
   2406 		   Don't try to read a component label from it. */
   2407 		return(EINVAL);
   2408 	}
   2409 
   2410 	/* get a block of the appropriate size... */
   2411 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2412 	bp->b_dev = dev;
   2413 
   2414 	/* get our ducks in a row for the read */
   2415 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2416 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2417 	bp->b_flags |= B_READ;
   2418  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2419 
   2420 	bdev = bdevsw_lookup(bp->b_dev);
   2421 	if (bdev == NULL)
   2422 		return (ENXIO);
   2423 	(*bdev->d_strategy)(bp);
   2424 
   2425 	error = biowait(bp);
   2426 
   2427 	if (!error) {
   2428 		memcpy(clabel, bp->b_data,
   2429 		       sizeof(RF_ComponentLabel_t));
   2430 	}
   2431 
   2432 	brelse(bp, 0);
   2433 	return(error);
   2434 }
   2435 /* ARGSUSED */
   2436 int
   2437 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
   2438 			  RF_ComponentLabel_t *clabel)
   2439 {
   2440 	struct buf *bp;
   2441 	const struct bdevsw *bdev;
   2442 	int error;
   2443 
   2444 	/* get a block of the appropriate size... */
   2445 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2446 	bp->b_dev = dev;
   2447 
   2448 	/* get our ducks in a row for the write */
   2449 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2450 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2451 	bp->b_flags |= B_WRITE;
   2452  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2453 
   2454 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2455 
   2456 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2457 
   2458 	bdev = bdevsw_lookup(bp->b_dev);
   2459 	if (bdev == NULL)
   2460 		return (ENXIO);
   2461 	(*bdev->d_strategy)(bp);
   2462 	error = biowait(bp);
   2463 	brelse(bp, 0);
   2464 	if (error) {
   2465 #if 1
   2466 		printf("Failed to write RAID component info!\n");
   2467 #endif
   2468 	}
   2469 
   2470 	return(error);
   2471 }
   2472 
   2473 void
   2474 rf_markalldirty(RF_Raid_t *raidPtr)
   2475 {
   2476 	RF_ComponentLabel_t clabel;
   2477 	int sparecol;
   2478 	int c;
   2479 	int j;
   2480 	int scol = -1;
   2481 
   2482 	raidPtr->mod_counter++;
   2483 	for (c = 0; c < raidPtr->numCol; c++) {
   2484 		/* we don't want to touch (at all) a disk that has
   2485 		   failed */
   2486 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   2487 			raidread_component_label(
   2488 						 raidPtr->Disks[c].dev,
   2489 						 raidPtr->raid_cinfo[c].ci_vp,
   2490 						 &clabel);
   2491 			if (clabel.status == rf_ds_spared) {
   2492 				/* XXX do something special...
   2493 				   but whatever you do, don't
   2494 				   try to access it!! */
   2495 			} else {
   2496 				raidmarkdirty(
   2497 					      raidPtr->Disks[c].dev,
   2498 					      raidPtr->raid_cinfo[c].ci_vp,
   2499 					      raidPtr->mod_counter);
   2500 			}
   2501 		}
   2502 	}
   2503 
   2504 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2505 		sparecol = raidPtr->numCol + c;
   2506 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2507 			/*
   2508 
   2509 			   we claim this disk is "optimal" if it's
   2510 			   rf_ds_used_spare, as that means it should be
   2511 			   directly substitutable for the disk it replaced.
   2512 			   We note that too...
   2513 
   2514 			 */
   2515 
   2516 			for(j=0;j<raidPtr->numCol;j++) {
   2517 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2518 					scol = j;
   2519 					break;
   2520 				}
   2521 			}
   2522 
   2523 			raidread_component_label(
   2524 				 raidPtr->Disks[sparecol].dev,
   2525 				 raidPtr->raid_cinfo[sparecol].ci_vp,
   2526 				 &clabel);
   2527 			/* make sure status is noted */
   2528 
   2529 			raid_init_component_label(raidPtr, &clabel);
   2530 
   2531 			clabel.row = 0;
   2532 			clabel.column = scol;
   2533 			/* Note: we *don't* change status from rf_ds_used_spare
   2534 			   to rf_ds_optimal */
   2535 			/* clabel.status = rf_ds_optimal; */
   2536 
   2537 			raidmarkdirty(raidPtr->Disks[sparecol].dev,
   2538 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2539 				      raidPtr->mod_counter);
   2540 		}
   2541 	}
   2542 }
   2543 
   2544 
   2545 void
   2546 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
   2547 {
   2548 	RF_ComponentLabel_t clabel;
   2549 	int sparecol;
   2550 	int c;
   2551 	int j;
   2552 	int scol;
   2553 
   2554 	scol = -1;
   2555 
   2556 	/* XXX should do extra checks to make sure things really are clean,
   2557 	   rather than blindly setting the clean bit... */
   2558 
   2559 	raidPtr->mod_counter++;
   2560 
   2561 	for (c = 0; c < raidPtr->numCol; c++) {
   2562 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   2563 			raidread_component_label(
   2564 						 raidPtr->Disks[c].dev,
   2565 						 raidPtr->raid_cinfo[c].ci_vp,
   2566 						 &clabel);
   2567 			/* make sure status is noted */
   2568 			clabel.status = rf_ds_optimal;
   2569 
   2570 			/* bump the counter */
   2571 			clabel.mod_counter = raidPtr->mod_counter;
   2572 
   2573 			/* note what unit we are configured as */
   2574 			clabel.last_unit = raidPtr->raidid;
   2575 
   2576 			raidwrite_component_label(
   2577 						  raidPtr->Disks[c].dev,
   2578 						  raidPtr->raid_cinfo[c].ci_vp,
   2579 						  &clabel);
   2580 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2581 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2582 					raidmarkclean(
   2583 						      raidPtr->Disks[c].dev,
   2584 						      raidPtr->raid_cinfo[c].ci_vp,
   2585 						      raidPtr->mod_counter);
   2586 				}
   2587 			}
   2588 		}
   2589 		/* else we don't touch it.. */
   2590 	}
   2591 
   2592 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2593 		sparecol = raidPtr->numCol + c;
   2594 		/* Need to ensure that the reconstruct actually completed! */
   2595 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2596 			/*
   2597 
   2598 			   we claim this disk is "optimal" if it's
   2599 			   rf_ds_used_spare, as that means it should be
   2600 			   directly substitutable for the disk it replaced.
   2601 			   We note that too...
   2602 
   2603 			 */
   2604 
   2605 			for(j=0;j<raidPtr->numCol;j++) {
   2606 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2607 					scol = j;
   2608 					break;
   2609 				}
   2610 			}
   2611 
   2612 			/* XXX shouldn't *really* need this... */
   2613 			raidread_component_label(
   2614 				      raidPtr->Disks[sparecol].dev,
   2615 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2616 				      &clabel);
   2617 			/* make sure status is noted */
   2618 
   2619 			raid_init_component_label(raidPtr, &clabel);
   2620 
   2621 			clabel.mod_counter = raidPtr->mod_counter;
   2622 			clabel.column = scol;
   2623 			clabel.status = rf_ds_optimal;
   2624 			clabel.last_unit = raidPtr->raidid;
   2625 
   2626 			raidwrite_component_label(
   2627 				      raidPtr->Disks[sparecol].dev,
   2628 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2629 				      &clabel);
   2630 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2631 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2632 					raidmarkclean( raidPtr->Disks[sparecol].dev,
   2633 						       raidPtr->raid_cinfo[sparecol].ci_vp,
   2634 						       raidPtr->mod_counter);
   2635 				}
   2636 			}
   2637 		}
   2638 	}
   2639 }
   2640 
   2641 void
   2642 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
   2643 {
   2644 
   2645 	if (vp != NULL) {
   2646 		if (auto_configured == 1) {
   2647 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2648 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2649 			vput(vp);
   2650 
   2651 		} else {
   2652 			(void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
   2653 		}
   2654 	}
   2655 }
   2656 
   2657 
   2658 void
   2659 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
   2660 {
   2661 	int r,c;
   2662 	struct vnode *vp;
   2663 	int acd;
   2664 
   2665 
   2666 	/* We take this opportunity to close the vnodes like we should.. */
   2667 
   2668 	for (c = 0; c < raidPtr->numCol; c++) {
   2669 		vp = raidPtr->raid_cinfo[c].ci_vp;
   2670 		acd = raidPtr->Disks[c].auto_configured;
   2671 		rf_close_component(raidPtr, vp, acd);
   2672 		raidPtr->raid_cinfo[c].ci_vp = NULL;
   2673 		raidPtr->Disks[c].auto_configured = 0;
   2674 	}
   2675 
   2676 	for (r = 0; r < raidPtr->numSpare; r++) {
   2677 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
   2678 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
   2679 		rf_close_component(raidPtr, vp, acd);
   2680 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
   2681 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
   2682 	}
   2683 }
   2684 
   2685 
   2686 void
   2687 rf_ReconThread(struct rf_recon_req *req)
   2688 {
   2689 	int     s;
   2690 	RF_Raid_t *raidPtr;
   2691 
   2692 	s = splbio();
   2693 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2694 	raidPtr->recon_in_progress = 1;
   2695 
   2696 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
   2697 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2698 
   2699 	RF_Free(req, sizeof(*req));
   2700 
   2701 	raidPtr->recon_in_progress = 0;
   2702 	splx(s);
   2703 
   2704 	/* That's all... */
   2705 	kthread_exit(0);	/* does not return */
   2706 }
   2707 
   2708 void
   2709 rf_RewriteParityThread(RF_Raid_t *raidPtr)
   2710 {
   2711 	int retcode;
   2712 	int s;
   2713 
   2714 	raidPtr->parity_rewrite_stripes_done = 0;
   2715 	raidPtr->parity_rewrite_in_progress = 1;
   2716 	s = splbio();
   2717 	retcode = rf_RewriteParity(raidPtr);
   2718 	splx(s);
   2719 	if (retcode) {
   2720 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2721 	} else {
   2722 		/* set the clean bit!  If we shutdown correctly,
   2723 		   the clean bit on each component label will get
   2724 		   set */
   2725 		raidPtr->parity_good = RF_RAID_CLEAN;
   2726 	}
   2727 	raidPtr->parity_rewrite_in_progress = 0;
   2728 
   2729 	/* Anyone waiting for us to stop?  If so, inform them... */
   2730 	if (raidPtr->waitShutdown) {
   2731 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2732 	}
   2733 
   2734 	/* That's all... */
   2735 	kthread_exit(0);	/* does not return */
   2736 }
   2737 
   2738 
   2739 void
   2740 rf_CopybackThread(RF_Raid_t *raidPtr)
   2741 {
   2742 	int s;
   2743 
   2744 	raidPtr->copyback_in_progress = 1;
   2745 	s = splbio();
   2746 	rf_CopybackReconstructedData(raidPtr);
   2747 	splx(s);
   2748 	raidPtr->copyback_in_progress = 0;
   2749 
   2750 	/* That's all... */
   2751 	kthread_exit(0);	/* does not return */
   2752 }
   2753 
   2754 
   2755 void
   2756 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
   2757 {
   2758 	int s;
   2759 	RF_Raid_t *raidPtr;
   2760 
   2761 	s = splbio();
   2762 	raidPtr = req->raidPtr;
   2763 	raidPtr->recon_in_progress = 1;
   2764 	rf_ReconstructInPlace(raidPtr, req->col);
   2765 	RF_Free(req, sizeof(*req));
   2766 	raidPtr->recon_in_progress = 0;
   2767 	splx(s);
   2768 
   2769 	/* That's all... */
   2770 	kthread_exit(0);	/* does not return */
   2771 }
   2772 
   2773 static RF_AutoConfig_t *
   2774 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
   2775     const char *cname, RF_SectorCount_t size)
   2776 {
   2777 	int good_one = 0;
   2778 	RF_ComponentLabel_t *clabel;
   2779 	RF_AutoConfig_t *ac;
   2780 
   2781 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
   2782 	if (clabel == NULL) {
   2783 oomem:
   2784 		    while(ac_list) {
   2785 			    ac = ac_list;
   2786 			    if (ac->clabel)
   2787 				    free(ac->clabel, M_RAIDFRAME);
   2788 			    ac_list = ac_list->next;
   2789 			    free(ac, M_RAIDFRAME);
   2790 		    }
   2791 		    printf("RAID auto config: out of memory!\n");
   2792 		    return NULL; /* XXX probably should panic? */
   2793 	}
   2794 
   2795 	if (!raidread_component_label(dev, vp, clabel)) {
   2796 		    /* Got the label.  Does it look reasonable? */
   2797 		    if (rf_reasonable_label(clabel) &&
   2798 			(clabel->partitionSize <= size)) {
   2799 #ifdef DEBUG
   2800 			    printf("Component on: %s: %llu\n",
   2801 				cname, (unsigned long long)size);
   2802 			    rf_print_component_label(clabel);
   2803 #endif
   2804 			    /* if it's reasonable, add it, else ignore it. */
   2805 			    ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
   2806 				M_NOWAIT);
   2807 			    if (ac == NULL) {
   2808 				    free(clabel, M_RAIDFRAME);
   2809 				    goto oomem;
   2810 			    }
   2811 			    strlcpy(ac->devname, cname, sizeof(ac->devname));
   2812 			    ac->dev = dev;
   2813 			    ac->vp = vp;
   2814 			    ac->clabel = clabel;
   2815 			    ac->next = ac_list;
   2816 			    ac_list = ac;
   2817 			    good_one = 1;
   2818 		    }
   2819 	}
   2820 	if (!good_one) {
   2821 		/* cleanup */
   2822 		free(clabel, M_RAIDFRAME);
   2823 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2824 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2825 		vput(vp);
   2826 	}
   2827 	return ac_list;
   2828 }
   2829 
   2830 RF_AutoConfig_t *
   2831 rf_find_raid_components()
   2832 {
   2833 	struct vnode *vp;
   2834 	struct disklabel label;
   2835 	struct device *dv;
   2836 	dev_t dev;
   2837 	int bmajor, bminor, wedge;
   2838 	int error;
   2839 	int i;
   2840 	RF_AutoConfig_t *ac_list;
   2841 
   2842 
   2843 	/* initialize the AutoConfig list */
   2844 	ac_list = NULL;
   2845 
   2846 	/* we begin by trolling through *all* the devices on the system */
   2847 
   2848 	for (dv = alldevs.tqh_first; dv != NULL;
   2849 	     dv = dv->dv_list.tqe_next) {
   2850 
   2851 		/* we are only interested in disks... */
   2852 		if (device_class(dv) != DV_DISK)
   2853 			continue;
   2854 
   2855 		/* we don't care about floppies... */
   2856 		if (device_is_a(dv, "fd")) {
   2857 			continue;
   2858 		}
   2859 
   2860 		/* we don't care about CD's... */
   2861 		if (device_is_a(dv, "cd")) {
   2862 			continue;
   2863 		}
   2864 
   2865 		/* hdfd is the Atari/Hades floppy driver */
   2866 		if (device_is_a(dv, "hdfd")) {
   2867 			continue;
   2868 		}
   2869 
   2870 		/* fdisa is the Atari/Milan floppy driver */
   2871 		if (device_is_a(dv, "fdisa")) {
   2872 			continue;
   2873 		}
   2874 
   2875 		/* need to find the device_name_to_block_device_major stuff */
   2876 		bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
   2877 
   2878 		/* get a vnode for the raw partition of this disk */
   2879 
   2880 		wedge = device_is_a(dv, "dk");
   2881 		bminor = minor(device_unit(dv));
   2882 		dev = wedge ? makedev(bmajor, bminor) :
   2883 		    MAKEDISKDEV(bmajor, bminor, RAW_PART);
   2884 		if (bdevvp(dev, &vp))
   2885 			panic("RAID can't alloc vnode");
   2886 
   2887 		error = VOP_OPEN(vp, FREAD, NOCRED);
   2888 
   2889 		if (error) {
   2890 			/* "Who cares."  Continue looking
   2891 			   for something that exists*/
   2892 			vput(vp);
   2893 			continue;
   2894 		}
   2895 
   2896 		if (wedge) {
   2897 			struct dkwedge_info dkw;
   2898 			error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
   2899 			    NOCRED);
   2900 			if (error) {
   2901 				printf("RAIDframe: can't get wedge info for "
   2902 				    "dev %s (%d)\n", device_xname(dv), error);
   2903 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2904 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2905 				vput(vp);
   2906 				continue;
   2907 			}
   2908 
   2909 			if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
   2910 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2911 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2912 				vput(vp);
   2913 				continue;
   2914 			}
   2915 
   2916 			ac_list = rf_get_component(ac_list, dev, vp,
   2917 			    device_xname(dv), dkw.dkw_size);
   2918 			continue;
   2919 		}
   2920 
   2921 		/* Ok, the disk exists.  Go get the disklabel. */
   2922 		error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
   2923 		if (error) {
   2924 			/*
   2925 			 * XXX can't happen - open() would
   2926 			 * have errored out (or faked up one)
   2927 			 */
   2928 			if (error != ENOTTY)
   2929 				printf("RAIDframe: can't get label for dev "
   2930 				    "%s (%d)\n", device_xname(dv), error);
   2931 		}
   2932 
   2933 		/* don't need this any more.  We'll allocate it again
   2934 		   a little later if we really do... */
   2935 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2936 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2937 		vput(vp);
   2938 
   2939 		if (error)
   2940 			continue;
   2941 
   2942 		for (i = 0; i < label.d_npartitions; i++) {
   2943 			char cname[sizeof(ac_list->devname)];
   2944 
   2945 			/* We only support partitions marked as RAID */
   2946 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2947 				continue;
   2948 
   2949 			dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
   2950 			if (bdevvp(dev, &vp))
   2951 				panic("RAID can't alloc vnode");
   2952 
   2953 			error = VOP_OPEN(vp, FREAD, NOCRED);
   2954 			if (error) {
   2955 				/* Whatever... */
   2956 				vput(vp);
   2957 				continue;
   2958 			}
   2959 			snprintf(cname, sizeof(cname), "%s%c",
   2960 			    device_xname(dv), 'a' + i);
   2961 			ac_list = rf_get_component(ac_list, dev, vp, cname,
   2962 				label.d_partitions[i].p_size);
   2963 		}
   2964 	}
   2965 	return ac_list;
   2966 }
   2967 
   2968 
   2969 static int
   2970 rf_reasonable_label(RF_ComponentLabel_t *clabel)
   2971 {
   2972 
   2973 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2974 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2975 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2976 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2977 	    clabel->row >=0 &&
   2978 	    clabel->column >= 0 &&
   2979 	    clabel->num_rows > 0 &&
   2980 	    clabel->num_columns > 0 &&
   2981 	    clabel->row < clabel->num_rows &&
   2982 	    clabel->column < clabel->num_columns &&
   2983 	    clabel->blockSize > 0 &&
   2984 	    clabel->numBlocks > 0) {
   2985 		/* label looks reasonable enough... */
   2986 		return(1);
   2987 	}
   2988 	return(0);
   2989 }
   2990 
   2991 
   2992 #ifdef DEBUG
   2993 void
   2994 rf_print_component_label(RF_ComponentLabel_t *clabel)
   2995 {
   2996 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2997 	       clabel->row, clabel->column,
   2998 	       clabel->num_rows, clabel->num_columns);
   2999 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   3000 	       clabel->version, clabel->serial_number,
   3001 	       clabel->mod_counter);
   3002 	printf("   Clean: %s Status: %d\n",
   3003 	       clabel->clean ? "Yes" : "No", clabel->status );
   3004 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   3005 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   3006 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   3007 	       (char) clabel->parityConfig, clabel->blockSize,
   3008 	       clabel->numBlocks);
   3009 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   3010 	printf("   Contains root partition: %s\n",
   3011 	       clabel->root_partition ? "Yes" : "No" );
   3012 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   3013 #if 0
   3014 	   printf("   Config order: %d\n", clabel->config_order);
   3015 #endif
   3016 
   3017 }
   3018 #endif
   3019 
   3020 RF_ConfigSet_t *
   3021 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
   3022 {
   3023 	RF_AutoConfig_t *ac;
   3024 	RF_ConfigSet_t *config_sets;
   3025 	RF_ConfigSet_t *cset;
   3026 	RF_AutoConfig_t *ac_next;
   3027 
   3028 
   3029 	config_sets = NULL;
   3030 
   3031 	/* Go through the AutoConfig list, and figure out which components
   3032 	   belong to what sets.  */
   3033 	ac = ac_list;
   3034 	while(ac!=NULL) {
   3035 		/* we're going to putz with ac->next, so save it here
   3036 		   for use at the end of the loop */
   3037 		ac_next = ac->next;
   3038 
   3039 		if (config_sets == NULL) {
   3040 			/* will need at least this one... */
   3041 			config_sets = (RF_ConfigSet_t *)
   3042 				malloc(sizeof(RF_ConfigSet_t),
   3043 				       M_RAIDFRAME, M_NOWAIT);
   3044 			if (config_sets == NULL) {
   3045 				panic("rf_create_auto_sets: No memory!");
   3046 			}
   3047 			/* this one is easy :) */
   3048 			config_sets->ac = ac;
   3049 			config_sets->next = NULL;
   3050 			config_sets->rootable = 0;
   3051 			ac->next = NULL;
   3052 		} else {
   3053 			/* which set does this component fit into? */
   3054 			cset = config_sets;
   3055 			while(cset!=NULL) {
   3056 				if (rf_does_it_fit(cset, ac)) {
   3057 					/* looks like it matches... */
   3058 					ac->next = cset->ac;
   3059 					cset->ac = ac;
   3060 					break;
   3061 				}
   3062 				cset = cset->next;
   3063 			}
   3064 			if (cset==NULL) {
   3065 				/* didn't find a match above... new set..*/
   3066 				cset = (RF_ConfigSet_t *)
   3067 					malloc(sizeof(RF_ConfigSet_t),
   3068 					       M_RAIDFRAME, M_NOWAIT);
   3069 				if (cset == NULL) {
   3070 					panic("rf_create_auto_sets: No memory!");
   3071 				}
   3072 				cset->ac = ac;
   3073 				ac->next = NULL;
   3074 				cset->next = config_sets;
   3075 				cset->rootable = 0;
   3076 				config_sets = cset;
   3077 			}
   3078 		}
   3079 		ac = ac_next;
   3080 	}
   3081 
   3082 
   3083 	return(config_sets);
   3084 }
   3085 
   3086 static int
   3087 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
   3088 {
   3089 	RF_ComponentLabel_t *clabel1, *clabel2;
   3090 
   3091 	/* If this one matches the *first* one in the set, that's good
   3092 	   enough, since the other members of the set would have been
   3093 	   through here too... */
   3094 	/* note that we are not checking partitionSize here..
   3095 
   3096 	   Note that we are also not checking the mod_counters here.
   3097 	   If everything else matches execpt the mod_counter, that's
   3098 	   good enough for this test.  We will deal with the mod_counters
   3099 	   a little later in the autoconfiguration process.
   3100 
   3101 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   3102 
   3103 	   The reason we don't check for this is that failed disks
   3104 	   will have lower modification counts.  If those disks are
   3105 	   not added to the set they used to belong to, then they will
   3106 	   form their own set, which may result in 2 different sets,
   3107 	   for example, competing to be configured at raid0, and
   3108 	   perhaps competing to be the root filesystem set.  If the
   3109 	   wrong ones get configured, or both attempt to become /,
   3110 	   weird behaviour and or serious lossage will occur.  Thus we
   3111 	   need to bring them into the fold here, and kick them out at
   3112 	   a later point.
   3113 
   3114 	*/
   3115 
   3116 	clabel1 = cset->ac->clabel;
   3117 	clabel2 = ac->clabel;
   3118 	if ((clabel1->version == clabel2->version) &&
   3119 	    (clabel1->serial_number == clabel2->serial_number) &&
   3120 	    (clabel1->num_rows == clabel2->num_rows) &&
   3121 	    (clabel1->num_columns == clabel2->num_columns) &&
   3122 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   3123 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   3124 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   3125 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   3126 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   3127 	    (clabel1->blockSize == clabel2->blockSize) &&
   3128 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   3129 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   3130 	    (clabel1->root_partition == clabel2->root_partition) &&
   3131 	    (clabel1->last_unit == clabel2->last_unit) &&
   3132 	    (clabel1->config_order == clabel2->config_order)) {
   3133 		/* if it get's here, it almost *has* to be a match */
   3134 	} else {
   3135 		/* it's not consistent with somebody in the set..
   3136 		   punt */
   3137 		return(0);
   3138 	}
   3139 	/* all was fine.. it must fit... */
   3140 	return(1);
   3141 }
   3142 
   3143 int
   3144 rf_have_enough_components(RF_ConfigSet_t *cset)
   3145 {
   3146 	RF_AutoConfig_t *ac;
   3147 	RF_AutoConfig_t *auto_config;
   3148 	RF_ComponentLabel_t *clabel;
   3149 	int c;
   3150 	int num_cols;
   3151 	int num_missing;
   3152 	int mod_counter;
   3153 	int mod_counter_found;
   3154 	int even_pair_failed;
   3155 	char parity_type;
   3156 
   3157 
   3158 	/* check to see that we have enough 'live' components
   3159 	   of this set.  If so, we can configure it if necessary */
   3160 
   3161 	num_cols = cset->ac->clabel->num_columns;
   3162 	parity_type = cset->ac->clabel->parityConfig;
   3163 
   3164 	/* XXX Check for duplicate components!?!?!? */
   3165 
   3166 	/* Determine what the mod_counter is supposed to be for this set. */
   3167 
   3168 	mod_counter_found = 0;
   3169 	mod_counter = 0;
   3170 	ac = cset->ac;
   3171 	while(ac!=NULL) {
   3172 		if (mod_counter_found==0) {
   3173 			mod_counter = ac->clabel->mod_counter;
   3174 			mod_counter_found = 1;
   3175 		} else {
   3176 			if (ac->clabel->mod_counter > mod_counter) {
   3177 				mod_counter = ac->clabel->mod_counter;
   3178 			}
   3179 		}
   3180 		ac = ac->next;
   3181 	}
   3182 
   3183 	num_missing = 0;
   3184 	auto_config = cset->ac;
   3185 
   3186 	even_pair_failed = 0;
   3187 	for(c=0; c<num_cols; c++) {
   3188 		ac = auto_config;
   3189 		while(ac!=NULL) {
   3190 			if ((ac->clabel->column == c) &&
   3191 			    (ac->clabel->mod_counter == mod_counter)) {
   3192 				/* it's this one... */
   3193 #ifdef DEBUG
   3194 				printf("Found: %s at %d\n",
   3195 				       ac->devname,c);
   3196 #endif
   3197 				break;
   3198 			}
   3199 			ac=ac->next;
   3200 		}
   3201 		if (ac==NULL) {
   3202 				/* Didn't find one here! */
   3203 				/* special case for RAID 1, especially
   3204 				   where there are more than 2
   3205 				   components (where RAIDframe treats
   3206 				   things a little differently :( ) */
   3207 			if (parity_type == '1') {
   3208 				if (c%2 == 0) { /* even component */
   3209 					even_pair_failed = 1;
   3210 				} else { /* odd component.  If
   3211 					    we're failed, and
   3212 					    so is the even
   3213 					    component, it's
   3214 					    "Good Night, Charlie" */
   3215 					if (even_pair_failed == 1) {
   3216 						return(0);
   3217 					}
   3218 				}
   3219 			} else {
   3220 				/* normal accounting */
   3221 				num_missing++;
   3222 			}
   3223 		}
   3224 		if ((parity_type == '1') && (c%2 == 1)) {
   3225 				/* Just did an even component, and we didn't
   3226 				   bail.. reset the even_pair_failed flag,
   3227 				   and go on to the next component.... */
   3228 			even_pair_failed = 0;
   3229 		}
   3230 	}
   3231 
   3232 	clabel = cset->ac->clabel;
   3233 
   3234 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3235 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3236 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3237 		/* XXX this needs to be made *much* more general */
   3238 		/* Too many failures */
   3239 		return(0);
   3240 	}
   3241 	/* otherwise, all is well, and we've got enough to take a kick
   3242 	   at autoconfiguring this set */
   3243 	return(1);
   3244 }
   3245 
   3246 void
   3247 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
   3248 			RF_Raid_t *raidPtr)
   3249 {
   3250 	RF_ComponentLabel_t *clabel;
   3251 	int i;
   3252 
   3253 	clabel = ac->clabel;
   3254 
   3255 	/* 1. Fill in the common stuff */
   3256 	config->numRow = clabel->num_rows = 1;
   3257 	config->numCol = clabel->num_columns;
   3258 	config->numSpare = 0; /* XXX should this be set here? */
   3259 	config->sectPerSU = clabel->sectPerSU;
   3260 	config->SUsPerPU = clabel->SUsPerPU;
   3261 	config->SUsPerRU = clabel->SUsPerRU;
   3262 	config->parityConfig = clabel->parityConfig;
   3263 	/* XXX... */
   3264 	strcpy(config->diskQueueType,"fifo");
   3265 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3266 	config->layoutSpecificSize = 0; /* XXX ?? */
   3267 
   3268 	while(ac!=NULL) {
   3269 		/* row/col values will be in range due to the checks
   3270 		   in reasonable_label() */
   3271 		strcpy(config->devnames[0][ac->clabel->column],
   3272 		       ac->devname);
   3273 		ac = ac->next;
   3274 	}
   3275 
   3276 	for(i=0;i<RF_MAXDBGV;i++) {
   3277 		config->debugVars[i][0] = 0;
   3278 	}
   3279 }
   3280 
   3281 int
   3282 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
   3283 {
   3284 	RF_ComponentLabel_t clabel;
   3285 	struct vnode *vp;
   3286 	dev_t dev;
   3287 	int column;
   3288 	int sparecol;
   3289 
   3290 	raidPtr->autoconfigure = new_value;
   3291 
   3292 	for(column=0; column<raidPtr->numCol; column++) {
   3293 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3294 			dev = raidPtr->Disks[column].dev;
   3295 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3296 			raidread_component_label(dev, vp, &clabel);
   3297 			clabel.autoconfigure = new_value;
   3298 			raidwrite_component_label(dev, vp, &clabel);
   3299 		}
   3300 	}
   3301 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3302 		sparecol = raidPtr->numCol + column;
   3303 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3304 			dev = raidPtr->Disks[sparecol].dev;
   3305 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3306 			raidread_component_label(dev, vp, &clabel);
   3307 			clabel.autoconfigure = new_value;
   3308 			raidwrite_component_label(dev, vp, &clabel);
   3309 		}
   3310 	}
   3311 	return(new_value);
   3312 }
   3313 
   3314 int
   3315 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
   3316 {
   3317 	RF_ComponentLabel_t clabel;
   3318 	struct vnode *vp;
   3319 	dev_t dev;
   3320 	int column;
   3321 	int sparecol;
   3322 
   3323 	raidPtr->root_partition = new_value;
   3324 	for(column=0; column<raidPtr->numCol; column++) {
   3325 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3326 			dev = raidPtr->Disks[column].dev;
   3327 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3328 			raidread_component_label(dev, vp, &clabel);
   3329 			clabel.root_partition = new_value;
   3330 			raidwrite_component_label(dev, vp, &clabel);
   3331 		}
   3332 	}
   3333 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3334 		sparecol = raidPtr->numCol + column;
   3335 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3336 			dev = raidPtr->Disks[sparecol].dev;
   3337 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3338 			raidread_component_label(dev, vp, &clabel);
   3339 			clabel.root_partition = new_value;
   3340 			raidwrite_component_label(dev, vp, &clabel);
   3341 		}
   3342 	}
   3343 	return(new_value);
   3344 }
   3345 
   3346 void
   3347 rf_release_all_vps(RF_ConfigSet_t *cset)
   3348 {
   3349 	RF_AutoConfig_t *ac;
   3350 
   3351 	ac = cset->ac;
   3352 	while(ac!=NULL) {
   3353 		/* Close the vp, and give it back */
   3354 		if (ac->vp) {
   3355 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3356 			VOP_CLOSE(ac->vp, FREAD, NOCRED);
   3357 			vput(ac->vp);
   3358 			ac->vp = NULL;
   3359 		}
   3360 		ac = ac->next;
   3361 	}
   3362 }
   3363 
   3364 
   3365 void
   3366 rf_cleanup_config_set(RF_ConfigSet_t *cset)
   3367 {
   3368 	RF_AutoConfig_t *ac;
   3369 	RF_AutoConfig_t *next_ac;
   3370 
   3371 	ac = cset->ac;
   3372 	while(ac!=NULL) {
   3373 		next_ac = ac->next;
   3374 		/* nuke the label */
   3375 		free(ac->clabel, M_RAIDFRAME);
   3376 		/* cleanup the config structure */
   3377 		free(ac, M_RAIDFRAME);
   3378 		/* "next.." */
   3379 		ac = next_ac;
   3380 	}
   3381 	/* and, finally, nuke the config set */
   3382 	free(cset, M_RAIDFRAME);
   3383 }
   3384 
   3385 
   3386 void
   3387 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   3388 {
   3389 	/* current version number */
   3390 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3391 	clabel->serial_number = raidPtr->serial_number;
   3392 	clabel->mod_counter = raidPtr->mod_counter;
   3393 	clabel->num_rows = 1;
   3394 	clabel->num_columns = raidPtr->numCol;
   3395 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3396 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3397 
   3398 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3399 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3400 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3401 
   3402 	clabel->blockSize = raidPtr->bytesPerSector;
   3403 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3404 
   3405 	/* XXX not portable */
   3406 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3407 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3408 	clabel->autoconfigure = raidPtr->autoconfigure;
   3409 	clabel->root_partition = raidPtr->root_partition;
   3410 	clabel->last_unit = raidPtr->raidid;
   3411 	clabel->config_order = raidPtr->config_order;
   3412 }
   3413 
   3414 int
   3415 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
   3416 {
   3417 	RF_Raid_t *raidPtr;
   3418 	RF_Config_t *config;
   3419 	int raidID;
   3420 	int retcode;
   3421 
   3422 #ifdef DEBUG
   3423 	printf("RAID autoconfigure\n");
   3424 #endif
   3425 
   3426 	retcode = 0;
   3427 	*unit = -1;
   3428 
   3429 	/* 1. Create a config structure */
   3430 
   3431 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3432 				       M_RAIDFRAME,
   3433 				       M_NOWAIT);
   3434 	if (config==NULL) {
   3435 		printf("Out of mem!?!?\n");
   3436 				/* XXX do something more intelligent here. */
   3437 		return(1);
   3438 	}
   3439 
   3440 	memset(config, 0, sizeof(RF_Config_t));
   3441 
   3442 	/*
   3443 	   2. Figure out what RAID ID this one is supposed to live at
   3444 	   See if we can get the same RAID dev that it was configured
   3445 	   on last time..
   3446 	*/
   3447 
   3448 	raidID = cset->ac->clabel->last_unit;
   3449 	if ((raidID < 0) || (raidID >= numraid)) {
   3450 		/* let's not wander off into lala land. */
   3451 		raidID = numraid - 1;
   3452 	}
   3453 	if (raidPtrs[raidID]->valid != 0) {
   3454 
   3455 		/*
   3456 		   Nope... Go looking for an alternative...
   3457 		   Start high so we don't immediately use raid0 if that's
   3458 		   not taken.
   3459 		*/
   3460 
   3461 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3462 			if (raidPtrs[raidID]->valid == 0) {
   3463 				/* can use this one! */
   3464 				break;
   3465 			}
   3466 		}
   3467 	}
   3468 
   3469 	if (raidID < 0) {
   3470 		/* punt... */
   3471 		printf("Unable to auto configure this set!\n");
   3472 		printf("(Out of RAID devs!)\n");
   3473 		free(config, M_RAIDFRAME);
   3474 		return(1);
   3475 	}
   3476 
   3477 #ifdef DEBUG
   3478 	printf("Configuring raid%d:\n",raidID);
   3479 #endif
   3480 
   3481 	raidPtr = raidPtrs[raidID];
   3482 
   3483 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3484 	raidPtr->raidid = raidID;
   3485 	raidPtr->openings = RAIDOUTSTANDING;
   3486 
   3487 	/* 3. Build the configuration structure */
   3488 	rf_create_configuration(cset->ac, config, raidPtr);
   3489 
   3490 	/* 4. Do the configuration */
   3491 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3492 
   3493 	if (retcode == 0) {
   3494 
   3495 		raidinit(raidPtrs[raidID]);
   3496 
   3497 		rf_markalldirty(raidPtrs[raidID]);
   3498 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3499 		if (cset->ac->clabel->root_partition==1) {
   3500 			/* everything configured just fine.  Make a note
   3501 			   that this set is eligible to be root. */
   3502 			cset->rootable = 1;
   3503 			/* XXX do this here? */
   3504 			raidPtrs[raidID]->root_partition = 1;
   3505 		}
   3506 	}
   3507 
   3508 	/* 5. Cleanup */
   3509 	free(config, M_RAIDFRAME);
   3510 
   3511 	*unit = raidID;
   3512 	return(retcode);
   3513 }
   3514 
   3515 void
   3516 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
   3517 {
   3518 	struct buf *bp;
   3519 
   3520 	bp = (struct buf *)desc->bp;
   3521 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3522 	    (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
   3523 }
   3524 
   3525 void
   3526 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
   3527 	     size_t xmin, size_t xmax)
   3528 {
   3529 	pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
   3530 	pool_sethiwat(p, xmax);
   3531 	pool_prime(p, xmin);
   3532 	pool_setlowat(p, xmin);
   3533 }
   3534 
   3535 /*
   3536  * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
   3537  * if there is IO pending and if that IO could possibly be done for a
   3538  * given RAID set.  Returns 0 if IO is waiting and can be done, 1
   3539  * otherwise.
   3540  *
   3541  */
   3542 
   3543 int
   3544 rf_buf_queue_check(int raidid)
   3545 {
   3546 	if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
   3547 	    raidPtrs[raidid]->openings > 0) {
   3548 		/* there is work to do */
   3549 		return 0;
   3550 	}
   3551 	/* default is nothing to do */
   3552 	return 1;
   3553 }
   3554 
   3555 int
   3556 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
   3557 {
   3558 	struct partinfo dpart;
   3559 	struct dkwedge_info dkw;
   3560 	int error;
   3561 
   3562 	error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred);
   3563 	if (error == 0) {
   3564 		diskPtr->blockSize = dpart.disklab->d_secsize;
   3565 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
   3566 		diskPtr->partitionSize = dpart.part->p_size;
   3567 		return 0;
   3568 	}
   3569 
   3570 	error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred);
   3571 	if (error == 0) {
   3572 		diskPtr->blockSize = 512;	/* XXX */
   3573 		diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
   3574 		diskPtr->partitionSize = dkw.dkw_size;
   3575 		return 0;
   3576 	}
   3577 	return error;
   3578 }
   3579 
   3580 static int
   3581 raid_match(struct device *self, struct cfdata *cfdata,
   3582     void *aux)
   3583 {
   3584 	return 1;
   3585 }
   3586 
   3587 static void
   3588 raid_attach(struct device *parent, struct device *self,
   3589     void *aux)
   3590 {
   3591 
   3592 }
   3593 
   3594 
   3595 static int
   3596 raid_detach(struct device *self, int flags)
   3597 {
   3598 	struct raid_softc *rs = (struct raid_softc *)self;
   3599 
   3600 	if (rs->sc_flags & RAIDF_INITED)
   3601 		return EBUSY;
   3602 
   3603 	return 0;
   3604 }
   3605 
   3606 static void
   3607 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
   3608 {
   3609 	prop_dictionary_t disk_info, odisk_info, geom;
   3610 	disk_info = prop_dictionary_create();
   3611 	geom = prop_dictionary_create();
   3612 	prop_dictionary_set_uint64(geom, "sectors-per-unit",
   3613 				   raidPtr->totalSectors);
   3614 	prop_dictionary_set_uint32(geom, "sector-size",
   3615 				   raidPtr->bytesPerSector);
   3616 
   3617 	prop_dictionary_set_uint16(geom, "sectors-per-track",
   3618 				   raidPtr->Layout.dataSectorsPerStripe);
   3619 	prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
   3620 				   4 * raidPtr->numCol);
   3621 
   3622 	prop_dictionary_set_uint64(geom, "cylinders-per-unit",
   3623 	   raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
   3624 	   (4 * raidPtr->numCol)));
   3625 
   3626 	prop_dictionary_set(disk_info, "geometry", geom);
   3627 	prop_object_release(geom);
   3628 	prop_dictionary_set(device_properties(rs->sc_dev),
   3629 			    "disk-info", disk_info);
   3630 	odisk_info = rs->sc_dkdev.dk_info;
   3631 	rs->sc_dkdev.dk_info = disk_info;
   3632 	if (odisk_info)
   3633 		prop_object_release(odisk_info);
   3634 }
   3635