Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.250.2.2
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.250.2.2 2009/03/03 18:31:51 skrll Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 /*
     32  * Copyright (c) 1990, 1993
     33  *      The Regents of the University of California.  All rights reserved.
     34  *
     35  * This code is derived from software contributed to Berkeley by
     36  * the Systems Programming Group of the University of Utah Computer
     37  * Science Department.
     38  *
     39  * Redistribution and use in source and binary forms, with or without
     40  * modification, are permitted provided that the following conditions
     41  * are met:
     42  * 1. Redistributions of source code must retain the above copyright
     43  *    notice, this list of conditions and the following disclaimer.
     44  * 2. Redistributions in binary form must reproduce the above copyright
     45  *    notice, this list of conditions and the following disclaimer in the
     46  *    documentation and/or other materials provided with the distribution.
     47  * 3. Neither the name of the University nor the names of its contributors
     48  *    may be used to endorse or promote products derived from this software
     49  *    without specific prior written permission.
     50  *
     51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     61  * SUCH DAMAGE.
     62  *
     63  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     64  *
     65  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     66  */
     67 
     68 /*
     69  * Copyright (c) 1988 University of Utah.
     70  *
     71  * This code is derived from software contributed to Berkeley by
     72  * the Systems Programming Group of the University of Utah Computer
     73  * Science Department.
     74  *
     75  * Redistribution and use in source and binary forms, with or without
     76  * modification, are permitted provided that the following conditions
     77  * are met:
     78  * 1. Redistributions of source code must retain the above copyright
     79  *    notice, this list of conditions and the following disclaimer.
     80  * 2. Redistributions in binary form must reproduce the above copyright
     81  *    notice, this list of conditions and the following disclaimer in the
     82  *    documentation and/or other materials provided with the distribution.
     83  * 3. All advertising materials mentioning features or use of this software
     84  *    must display the following acknowledgement:
     85  *      This product includes software developed by the University of
     86  *      California, Berkeley and its contributors.
     87  * 4. Neither the name of the University nor the names of its contributors
     88  *    may be used to endorse or promote products derived from this software
     89  *    without specific prior written permission.
     90  *
     91  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     92  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     93  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     94  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     95  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     96  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     97  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     98  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     99  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    100  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    101  * SUCH DAMAGE.
    102  *
    103  * from: Utah $Hdr: cd.c 1.6 90/11/28$
    104  *
    105  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
    106  */
    107 
    108 /*
    109  * Copyright (c) 1995 Carnegie-Mellon University.
    110  * All rights reserved.
    111  *
    112  * Authors: Mark Holland, Jim Zelenka
    113  *
    114  * Permission to use, copy, modify and distribute this software and
    115  * its documentation is hereby granted, provided that both the copyright
    116  * notice and this permission notice appear in all copies of the
    117  * software, derivative works or modified versions, and any portions
    118  * thereof, and that both notices appear in supporting documentation.
    119  *
    120  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
    121  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
    122  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
    123  *
    124  * Carnegie Mellon requests users of this software to return to
    125  *
    126  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    127  *  School of Computer Science
    128  *  Carnegie Mellon University
    129  *  Pittsburgh PA 15213-3890
    130  *
    131  * any improvements or extensions that they make and grant Carnegie the
    132  * rights to redistribute these changes.
    133  */
    134 
    135 /***********************************************************
    136  *
    137  * rf_kintf.c -- the kernel interface routines for RAIDframe
    138  *
    139  ***********************************************************/
    140 
    141 #include <sys/cdefs.h>
    142 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.250.2.2 2009/03/03 18:31:51 skrll Exp $");
    143 
    144 #ifdef _KERNEL_OPT
    145 #include "opt_compat_netbsd.h"
    146 #include "opt_raid_autoconfig.h"
    147 #include "raid.h"
    148 #endif
    149 
    150 #include <sys/param.h>
    151 #include <sys/errno.h>
    152 #include <sys/pool.h>
    153 #include <sys/proc.h>
    154 #include <sys/queue.h>
    155 #include <sys/disk.h>
    156 #include <sys/device.h>
    157 #include <sys/stat.h>
    158 #include <sys/ioctl.h>
    159 #include <sys/fcntl.h>
    160 #include <sys/systm.h>
    161 #include <sys/vnode.h>
    162 #include <sys/disklabel.h>
    163 #include <sys/conf.h>
    164 #include <sys/buf.h>
    165 #include <sys/bufq.h>
    166 #include <sys/user.h>
    167 #include <sys/reboot.h>
    168 #include <sys/kauth.h>
    169 
    170 #include <prop/proplib.h>
    171 
    172 #include <dev/raidframe/raidframevar.h>
    173 #include <dev/raidframe/raidframeio.h>
    174 
    175 #include "rf_raid.h"
    176 #include "rf_copyback.h"
    177 #include "rf_dag.h"
    178 #include "rf_dagflags.h"
    179 #include "rf_desc.h"
    180 #include "rf_diskqueue.h"
    181 #include "rf_etimer.h"
    182 #include "rf_general.h"
    183 #include "rf_kintf.h"
    184 #include "rf_options.h"
    185 #include "rf_driver.h"
    186 #include "rf_parityscan.h"
    187 #include "rf_threadstuff.h"
    188 
    189 #ifdef COMPAT_50
    190 #include "rf_compat50.h"
    191 #endif
    192 
    193 #ifdef DEBUG
    194 int     rf_kdebug_level = 0;
    195 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    196 #else				/* DEBUG */
    197 #define db1_printf(a) { }
    198 #endif				/* DEBUG */
    199 
    200 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    201 
    202 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
    203 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    204 
    205 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    206 						 * spare table */
    207 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    208 						 * installation process */
    209 #endif
    210 
    211 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
    212 
    213 /* prototypes */
    214 static void KernelWakeupFunc(struct buf *);
    215 static void InitBP(struct buf *, struct vnode *, unsigned,
    216     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
    217     void *, int, struct proc *);
    218 static void raidinit(RF_Raid_t *);
    219 
    220 void raidattach(int);
    221 static int raid_match(struct device *, struct cfdata *, void *);
    222 static void raid_attach(struct device *, struct device *, void *);
    223 static int raid_detach(struct device *, int);
    224 
    225 dev_type_open(raidopen);
    226 dev_type_close(raidclose);
    227 dev_type_read(raidread);
    228 dev_type_write(raidwrite);
    229 dev_type_ioctl(raidioctl);
    230 dev_type_strategy(raidstrategy);
    231 dev_type_dump(raiddump);
    232 dev_type_size(raidsize);
    233 
    234 const struct bdevsw raid_bdevsw = {
    235 	raidopen, raidclose, raidstrategy, raidioctl,
    236 	raiddump, raidsize, D_DISK
    237 };
    238 
    239 const struct cdevsw raid_cdevsw = {
    240 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    241 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
    242 };
    243 
    244 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
    245 
    246 /* XXX Not sure if the following should be replacing the raidPtrs above,
    247    or if it should be used in conjunction with that...
    248 */
    249 
    250 struct raid_softc {
    251 	struct device *sc_dev;
    252 	int     sc_flags;	/* flags */
    253 	int     sc_cflags;	/* configuration flags */
    254 	uint64_t sc_size;	/* size of the raid device */
    255 	char    sc_xname[20];	/* XXX external name */
    256 	struct disk sc_dkdev;	/* generic disk device info */
    257 	struct bufq_state *buf_queue;	/* used for the device queue */
    258 };
    259 /* sc_flags */
    260 #define RAIDF_INITED	0x01	/* unit has been initialized */
    261 #define RAIDF_WLABEL	0x02	/* label area is writable */
    262 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    263 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    264 #define RAIDF_LOCKED	0x80	/* unit is locked */
    265 
    266 #define	raidunit(x)	DISKUNIT(x)
    267 int numraid = 0;
    268 
    269 extern struct cfdriver raid_cd;
    270 CFATTACH_DECL_NEW(raid, sizeof(struct raid_softc),
    271     raid_match, raid_attach, raid_detach, NULL);
    272 
    273 /*
    274  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    275  * Be aware that large numbers can allow the driver to consume a lot of
    276  * kernel memory, especially on writes, and in degraded mode reads.
    277  *
    278  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    279  * a single 64K write will typically require 64K for the old data,
    280  * 64K for the old parity, and 64K for the new parity, for a total
    281  * of 192K (if the parity buffer is not re-used immediately).
    282  * Even it if is used immediately, that's still 128K, which when multiplied
    283  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    284  *
    285  * Now in degraded mode, for example, a 64K read on the above setup may
    286  * require data reconstruction, which will require *all* of the 4 remaining
    287  * disks to participate -- 4 * 32K/disk == 128K again.
    288  */
    289 
    290 #ifndef RAIDOUTSTANDING
    291 #define RAIDOUTSTANDING   6
    292 #endif
    293 
    294 #define RAIDLABELDEV(dev)	\
    295 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    296 
    297 /* declared here, and made public, for the benefit of KVM stuff.. */
    298 struct raid_softc *raid_softc;
    299 
    300 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    301 				     struct disklabel *);
    302 static void raidgetdisklabel(dev_t);
    303 static void raidmakedisklabel(struct raid_softc *);
    304 
    305 static int raidlock(struct raid_softc *);
    306 static void raidunlock(struct raid_softc *);
    307 
    308 static void rf_markalldirty(RF_Raid_t *);
    309 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
    310 
    311 void rf_ReconThread(struct rf_recon_req *);
    312 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    313 void rf_CopybackThread(RF_Raid_t *raidPtr);
    314 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    315 int rf_autoconfig(struct device *self);
    316 void rf_buildroothack(RF_ConfigSet_t *);
    317 
    318 RF_AutoConfig_t *rf_find_raid_components(void);
    319 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    320 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    321 static int rf_reasonable_label(RF_ComponentLabel_t *);
    322 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    323 int rf_set_autoconfig(RF_Raid_t *, int);
    324 int rf_set_rootpartition(RF_Raid_t *, int);
    325 void rf_release_all_vps(RF_ConfigSet_t *);
    326 void rf_cleanup_config_set(RF_ConfigSet_t *);
    327 int rf_have_enough_components(RF_ConfigSet_t *);
    328 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    329 static int rf_sync_component_caches(RF_Raid_t *raidPtr);
    330 
    331 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    332 				  allow autoconfig to take place.
    333 				  Note that this is overridden by having
    334 				  RAID_AUTOCONFIG as an option in the
    335 				  kernel config file.  */
    336 
    337 struct RF_Pools_s rf_pools;
    338 
    339 void
    340 raidattach(int num)
    341 {
    342 	int raidID;
    343 	int i, rc;
    344 
    345 #ifdef DEBUG
    346 	printf("raidattach: Asked for %d units\n", num);
    347 #endif
    348 
    349 	if (num <= 0) {
    350 #ifdef DIAGNOSTIC
    351 		panic("raidattach: count <= 0");
    352 #endif
    353 		return;
    354 	}
    355 	/* This is where all the initialization stuff gets done. */
    356 
    357 	numraid = num;
    358 
    359 	/* Make some space for requested number of units... */
    360 
    361 	RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
    362 	if (raidPtrs == NULL) {
    363 		panic("raidPtrs is NULL!!");
    364 	}
    365 
    366 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
    367 	rf_mutex_init(&rf_sparet_wait_mutex);
    368 
    369 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    370 #endif
    371 
    372 	for (i = 0; i < num; i++)
    373 		raidPtrs[i] = NULL;
    374 	rc = rf_BootRaidframe();
    375 	if (rc == 0)
    376 		aprint_normal("Kernelized RAIDframe activated\n");
    377 	else
    378 		panic("Serious error booting RAID!!");
    379 
    380 	/* put together some datastructures like the CCD device does.. This
    381 	 * lets us lock the device and what-not when it gets opened. */
    382 
    383 	raid_softc = (struct raid_softc *)
    384 		malloc(num * sizeof(struct raid_softc),
    385 		       M_RAIDFRAME, M_NOWAIT);
    386 	if (raid_softc == NULL) {
    387 		aprint_error("WARNING: no memory for RAIDframe driver\n");
    388 		return;
    389 	}
    390 
    391 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    392 
    393 	for (raidID = 0; raidID < num; raidID++) {
    394 		bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
    395 
    396 		RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
    397 			  (RF_Raid_t *));
    398 		if (raidPtrs[raidID] == NULL) {
    399 			aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID);
    400 			numraid = raidID;
    401 			return;
    402 		}
    403 	}
    404 
    405 	if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
    406 		aprint_error("raidattach: config_cfattach_attach failed?\n");
    407 	}
    408 
    409 #ifdef RAID_AUTOCONFIG
    410 	raidautoconfig = 1;
    411 #endif
    412 
    413 	/*
    414 	 * Register a finalizer which will be used to auto-config RAID
    415 	 * sets once all real hardware devices have been found.
    416 	 */
    417 	if (config_finalize_register(NULL, rf_autoconfig) != 0)
    418 		aprint_error("WARNING: unable to register RAIDframe finalizer\n");
    419 }
    420 
    421 int
    422 rf_autoconfig(struct device *self)
    423 {
    424 	RF_AutoConfig_t *ac_list;
    425 	RF_ConfigSet_t *config_sets;
    426 
    427 	if (raidautoconfig == 0)
    428 		return (0);
    429 
    430 	/* XXX This code can only be run once. */
    431 	raidautoconfig = 0;
    432 
    433 	/* 1. locate all RAID components on the system */
    434 #ifdef DEBUG
    435 	printf("Searching for RAID components...\n");
    436 #endif
    437 	ac_list = rf_find_raid_components();
    438 
    439 	/* 2. Sort them into their respective sets. */
    440 	config_sets = rf_create_auto_sets(ac_list);
    441 
    442 	/*
    443 	 * 3. Evaluate each set andconfigure the valid ones.
    444 	 * This gets done in rf_buildroothack().
    445 	 */
    446 	rf_buildroothack(config_sets);
    447 
    448 	return 1;
    449 }
    450 
    451 void
    452 rf_buildroothack(RF_ConfigSet_t *config_sets)
    453 {
    454 	RF_ConfigSet_t *cset;
    455 	RF_ConfigSet_t *next_cset;
    456 	int retcode;
    457 	int raidID;
    458 	int rootID;
    459 	int col;
    460 	int num_root;
    461 	char *devname;
    462 
    463 	rootID = 0;
    464 	num_root = 0;
    465 	cset = config_sets;
    466 	while(cset != NULL ) {
    467 		next_cset = cset->next;
    468 		if (rf_have_enough_components(cset) &&
    469 		    cset->ac->clabel->autoconfigure==1) {
    470 			retcode = rf_auto_config_set(cset,&raidID);
    471 			if (!retcode) {
    472 #ifdef DEBUG
    473 				printf("raid%d: configured ok\n", raidID);
    474 #endif
    475 				if (cset->rootable) {
    476 					rootID = raidID;
    477 					num_root++;
    478 				}
    479 			} else {
    480 				/* The autoconfig didn't work :( */
    481 #ifdef DEBUG
    482 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    483 #endif
    484 				rf_release_all_vps(cset);
    485 			}
    486 		} else {
    487 			/* we're not autoconfiguring this set...
    488 			   release the associated resources */
    489 			rf_release_all_vps(cset);
    490 		}
    491 		/* cleanup */
    492 		rf_cleanup_config_set(cset);
    493 		cset = next_cset;
    494 	}
    495 
    496 	/* if the user has specified what the root device should be
    497 	   then we don't touch booted_device or boothowto... */
    498 
    499 	if (rootspec != NULL)
    500 		return;
    501 
    502 	/* we found something bootable... */
    503 
    504 	if (num_root == 1) {
    505 		booted_device = raid_softc[rootID].sc_dev;
    506 	} else if (num_root > 1) {
    507 
    508 		/*
    509 		 * Maybe the MD code can help. If it cannot, then
    510 		 * setroot() will discover that we have no
    511 		 * booted_device and will ask the user if nothing was
    512 		 * hardwired in the kernel config file
    513 		 */
    514 
    515 		if (booted_device == NULL)
    516 			cpu_rootconf();
    517 		if (booted_device == NULL)
    518 			return;
    519 
    520 		num_root = 0;
    521 		for (raidID = 0; raidID < numraid; raidID++) {
    522 			if (raidPtrs[raidID]->valid == 0)
    523 				continue;
    524 
    525 			if (raidPtrs[raidID]->root_partition == 0)
    526 				continue;
    527 
    528 			for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
    529 				devname = raidPtrs[raidID]->Disks[col].devname;
    530 				devname += sizeof("/dev/") - 1;
    531 				if (strncmp(devname, device_xname(booted_device),
    532 					    strlen(device_xname(booted_device))) != 0)
    533 					continue;
    534 #ifdef DEBUG
    535 				printf("raid%d includes boot device %s\n",
    536 				       raidID, devname);
    537 #endif
    538 				num_root++;
    539 				rootID = raidID;
    540 			}
    541 		}
    542 
    543 		if (num_root == 1) {
    544 			booted_device = raid_softc[rootID].sc_dev;
    545 		} else {
    546 			/* we can't guess.. require the user to answer... */
    547 			boothowto |= RB_ASKNAME;
    548 		}
    549 	}
    550 }
    551 
    552 
    553 int
    554 raidsize(dev_t dev)
    555 {
    556 	struct raid_softc *rs;
    557 	struct disklabel *lp;
    558 	int     part, unit, omask, size;
    559 
    560 	unit = raidunit(dev);
    561 	if (unit >= numraid)
    562 		return (-1);
    563 	rs = &raid_softc[unit];
    564 
    565 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    566 		return (-1);
    567 
    568 	part = DISKPART(dev);
    569 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    570 	lp = rs->sc_dkdev.dk_label;
    571 
    572 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
    573 		return (-1);
    574 
    575 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    576 		size = -1;
    577 	else
    578 		size = lp->d_partitions[part].p_size *
    579 		    (lp->d_secsize / DEV_BSIZE);
    580 
    581 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
    582 		return (-1);
    583 
    584 	return (size);
    585 
    586 }
    587 
    588 int
    589 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
    590 {
    591 	int     unit = raidunit(dev);
    592 	struct raid_softc *rs;
    593 	const struct bdevsw *bdev;
    594 	struct disklabel *lp;
    595 	RF_Raid_t *raidPtr;
    596 	daddr_t offset;
    597 	int     part, c, sparecol, j, scol, dumpto;
    598 	int     error = 0;
    599 
    600 	if (unit >= numraid)
    601 		return (ENXIO);
    602 
    603 	rs = &raid_softc[unit];
    604 	raidPtr = raidPtrs[unit];
    605 
    606 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    607 		return ENXIO;
    608 
    609 	/* we only support dumping to RAID 1 sets */
    610 	if (raidPtr->Layout.numDataCol != 1 ||
    611 	    raidPtr->Layout.numParityCol != 1)
    612 		return EINVAL;
    613 
    614 
    615 	if ((error = raidlock(rs)) != 0)
    616 		return error;
    617 
    618 	if (size % DEV_BSIZE != 0) {
    619 		error = EINVAL;
    620 		goto out;
    621 	}
    622 
    623 	if (blkno + size / DEV_BSIZE > rs->sc_size) {
    624 		printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
    625 		    "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
    626 		    size / DEV_BSIZE, rs->sc_size);
    627 		error = EINVAL;
    628 		goto out;
    629 	}
    630 
    631 	part = DISKPART(dev);
    632 	lp = rs->sc_dkdev.dk_label;
    633 	offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
    634 
    635 	/* figure out what device is alive.. */
    636 
    637 	/*
    638 	   Look for a component to dump to.  The preference for the
    639 	   component to dump to is as follows:
    640 	   1) the master
    641 	   2) a used_spare of the master
    642 	   3) the slave
    643 	   4) a used_spare of the slave
    644 	*/
    645 
    646 	dumpto = -1;
    647 	for (c = 0; c < raidPtr->numCol; c++) {
    648 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
    649 			/* this might be the one */
    650 			dumpto = c;
    651 			break;
    652 		}
    653 	}
    654 
    655 	/*
    656 	   At this point we have possibly selected a live master or a
    657 	   live slave.  We now check to see if there is a spared
    658 	   master (or a spared slave), if we didn't find a live master
    659 	   or a live slave.
    660 	*/
    661 
    662 	for (c = 0; c < raidPtr->numSpare; c++) {
    663 		sparecol = raidPtr->numCol + c;
    664 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
    665 			/* How about this one? */
    666 			scol = -1;
    667 			for(j=0;j<raidPtr->numCol;j++) {
    668 				if (raidPtr->Disks[j].spareCol == sparecol) {
    669 					scol = j;
    670 					break;
    671 				}
    672 			}
    673 			if (scol == 0) {
    674 				/*
    675 				   We must have found a spared master!
    676 				   We'll take that over anything else
    677 				   found so far.  (We couldn't have
    678 				   found a real master before, since
    679 				   this is a used spare, and it's
    680 				   saying that it's replacing the
    681 				   master.)  On reboot (with
    682 				   autoconfiguration turned on)
    683 				   sparecol will become the 1st
    684 				   component (component0) of this set.
    685 				*/
    686 				dumpto = sparecol;
    687 				break;
    688 			} else if (scol != -1) {
    689 				/*
    690 				   Must be a spared slave.  We'll dump
    691 				   to that if we havn't found anything
    692 				   else so far.
    693 				*/
    694 				if (dumpto == -1)
    695 					dumpto = sparecol;
    696 			}
    697 		}
    698 	}
    699 
    700 	if (dumpto == -1) {
    701 		/* we couldn't find any live components to dump to!?!?
    702 		 */
    703 		error = EINVAL;
    704 		goto out;
    705 	}
    706 
    707 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
    708 
    709 	/*
    710 	   Note that blkno is relative to this particular partition.
    711 	   By adding the offset of this partition in the RAID
    712 	   set, and also adding RF_PROTECTED_SECTORS, we get a
    713 	   value that is relative to the partition used for the
    714 	   underlying component.
    715 	*/
    716 
    717 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
    718 				blkno + offset, va, size);
    719 
    720 out:
    721 	raidunlock(rs);
    722 
    723 	return error;
    724 }
    725 /* ARGSUSED */
    726 int
    727 raidopen(dev_t dev, int flags, int fmt,
    728     struct lwp *l)
    729 {
    730 	int     unit = raidunit(dev);
    731 	struct raid_softc *rs;
    732 	struct disklabel *lp;
    733 	int     part, pmask;
    734 	int     error = 0;
    735 
    736 	if (unit >= numraid)
    737 		return (ENXIO);
    738 	rs = &raid_softc[unit];
    739 
    740 	if ((error = raidlock(rs)) != 0)
    741 		return (error);
    742 	lp = rs->sc_dkdev.dk_label;
    743 
    744 	part = DISKPART(dev);
    745 
    746 	/*
    747 	 * If there are wedges, and this is not RAW_PART, then we
    748 	 * need to fail.
    749 	 */
    750 	if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
    751 		error = EBUSY;
    752 		goto bad;
    753 	}
    754 	pmask = (1 << part);
    755 
    756 	if ((rs->sc_flags & RAIDF_INITED) &&
    757 	    (rs->sc_dkdev.dk_openmask == 0))
    758 		raidgetdisklabel(dev);
    759 
    760 	/* make sure that this partition exists */
    761 
    762 	if (part != RAW_PART) {
    763 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    764 		    ((part >= lp->d_npartitions) ||
    765 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    766 			error = ENXIO;
    767 			goto bad;
    768 		}
    769 	}
    770 	/* Prevent this unit from being unconfigured while open. */
    771 	switch (fmt) {
    772 	case S_IFCHR:
    773 		rs->sc_dkdev.dk_copenmask |= pmask;
    774 		break;
    775 
    776 	case S_IFBLK:
    777 		rs->sc_dkdev.dk_bopenmask |= pmask;
    778 		break;
    779 	}
    780 
    781 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    782 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    783 		/* First one... mark things as dirty... Note that we *MUST*
    784 		 have done a configure before this.  I DO NOT WANT TO BE
    785 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    786 		 THAT THEY BELONG TOGETHER!!!!! */
    787 		/* XXX should check to see if we're only open for reading
    788 		   here... If so, we needn't do this, but then need some
    789 		   other way of keeping track of what's happened.. */
    790 
    791 		rf_markalldirty( raidPtrs[unit] );
    792 	}
    793 
    794 
    795 	rs->sc_dkdev.dk_openmask =
    796 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    797 
    798 bad:
    799 	raidunlock(rs);
    800 
    801 	return (error);
    802 
    803 
    804 }
    805 /* ARGSUSED */
    806 int
    807 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
    808 {
    809 	int     unit = raidunit(dev);
    810 	struct cfdata *cf;
    811 	struct raid_softc *rs;
    812 	int     error = 0;
    813 	int     part;
    814 
    815 	if (unit >= numraid)
    816 		return (ENXIO);
    817 	rs = &raid_softc[unit];
    818 
    819 	if ((error = raidlock(rs)) != 0)
    820 		return (error);
    821 
    822 	part = DISKPART(dev);
    823 
    824 	/* ...that much closer to allowing unconfiguration... */
    825 	switch (fmt) {
    826 	case S_IFCHR:
    827 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    828 		break;
    829 
    830 	case S_IFBLK:
    831 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    832 		break;
    833 	}
    834 	rs->sc_dkdev.dk_openmask =
    835 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    836 
    837 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    838 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    839 		/* Last one... device is not unconfigured yet.
    840 		   Device shutdown has taken care of setting the
    841 		   clean bits if RAIDF_INITED is not set
    842 		   mark things as clean... */
    843 
    844 		rf_update_component_labels(raidPtrs[unit],
    845 						 RF_FINAL_COMPONENT_UPDATE);
    846 		if (doing_shutdown) {
    847 			/* last one, and we're going down, so
    848 			   lights out for this RAID set too. */
    849 			error = rf_Shutdown(raidPtrs[unit]);
    850 
    851 			/* It's no longer initialized... */
    852 			rs->sc_flags &= ~RAIDF_INITED;
    853 
    854 			/* detach the device */
    855 
    856 			cf = device_cfdata(rs->sc_dev);
    857 			error = config_detach(rs->sc_dev, DETACH_QUIET);
    858 			free(cf, M_RAIDFRAME);
    859 
    860 			/* Detach the disk. */
    861 			disk_detach(&rs->sc_dkdev);
    862 			disk_destroy(&rs->sc_dkdev);
    863 		}
    864 	}
    865 
    866 	raidunlock(rs);
    867 	return (0);
    868 
    869 }
    870 
    871 void
    872 raidstrategy(struct buf *bp)
    873 {
    874 	int s;
    875 
    876 	unsigned int raidID = raidunit(bp->b_dev);
    877 	RF_Raid_t *raidPtr;
    878 	struct raid_softc *rs = &raid_softc[raidID];
    879 	int     wlabel;
    880 
    881 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    882 		bp->b_error = ENXIO;
    883 		goto done;
    884 	}
    885 	if (raidID >= numraid || !raidPtrs[raidID]) {
    886 		bp->b_error = ENODEV;
    887 		goto done;
    888 	}
    889 	raidPtr = raidPtrs[raidID];
    890 	if (!raidPtr->valid) {
    891 		bp->b_error = ENODEV;
    892 		goto done;
    893 	}
    894 	if (bp->b_bcount == 0) {
    895 		db1_printf(("b_bcount is zero..\n"));
    896 		goto done;
    897 	}
    898 
    899 	/*
    900 	 * Do bounds checking and adjust transfer.  If there's an
    901 	 * error, the bounds check will flag that for us.
    902 	 */
    903 
    904 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    905 	if (DISKPART(bp->b_dev) == RAW_PART) {
    906 		uint64_t size; /* device size in DEV_BSIZE unit */
    907 
    908 		if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
    909 			size = raidPtr->totalSectors <<
    910 			    (raidPtr->logBytesPerSector - DEV_BSHIFT);
    911 		} else {
    912 			size = raidPtr->totalSectors >>
    913 			    (DEV_BSHIFT - raidPtr->logBytesPerSector);
    914 		}
    915 		if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
    916 			goto done;
    917 		}
    918 	} else {
    919 		if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
    920 			db1_printf(("Bounds check failed!!:%d %d\n",
    921 				(int) bp->b_blkno, (int) wlabel));
    922 			goto done;
    923 		}
    924 	}
    925 	s = splbio();
    926 
    927 	bp->b_resid = 0;
    928 
    929 	/* stuff it onto our queue */
    930 	bufq_put(rs->buf_queue, bp);
    931 
    932 	/* scheduled the IO to happen at the next convenient time */
    933 	wakeup(&(raidPtrs[raidID]->iodone));
    934 
    935 	splx(s);
    936 	return;
    937 
    938 done:
    939 	bp->b_resid = bp->b_bcount;
    940 	biodone(bp);
    941 }
    942 /* ARGSUSED */
    943 int
    944 raidread(dev_t dev, struct uio *uio, int flags)
    945 {
    946 	int     unit = raidunit(dev);
    947 	struct raid_softc *rs;
    948 
    949 	if (unit >= numraid)
    950 		return (ENXIO);
    951 	rs = &raid_softc[unit];
    952 
    953 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    954 		return (ENXIO);
    955 
    956 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    957 
    958 }
    959 /* ARGSUSED */
    960 int
    961 raidwrite(dev_t dev, struct uio *uio, int flags)
    962 {
    963 	int     unit = raidunit(dev);
    964 	struct raid_softc *rs;
    965 
    966 	if (unit >= numraid)
    967 		return (ENXIO);
    968 	rs = &raid_softc[unit];
    969 
    970 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    971 		return (ENXIO);
    972 
    973 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    974 
    975 }
    976 
    977 int
    978 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
    979 {
    980 	int     unit = raidunit(dev);
    981 	int     error = 0;
    982 	int     part, pmask;
    983 	struct cfdata *cf;
    984 	struct raid_softc *rs;
    985 	RF_Config_t *k_cfg, *u_cfg;
    986 	RF_Raid_t *raidPtr;
    987 	RF_RaidDisk_t *diskPtr;
    988 	RF_AccTotals_t *totals;
    989 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    990 	u_char *specific_buf;
    991 	int retcode = 0;
    992 	int column;
    993 	int raidid;
    994 	struct rf_recon_req *rrcopy, *rr;
    995 	RF_ComponentLabel_t *clabel;
    996 	RF_ComponentLabel_t *ci_label;
    997 	RF_ComponentLabel_t **clabel_ptr;
    998 	RF_SingleComponent_t *sparePtr,*componentPtr;
    999 	RF_SingleComponent_t component;
   1000 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
   1001 	int i, j, d;
   1002 #ifdef __HAVE_OLD_DISKLABEL
   1003 	struct disklabel newlabel;
   1004 #endif
   1005 	struct dkwedge_info *dkw;
   1006 
   1007 	if (unit >= numraid)
   1008 		return (ENXIO);
   1009 	rs = &raid_softc[unit];
   1010 	raidPtr = raidPtrs[unit];
   1011 
   1012 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
   1013 		(int) DISKPART(dev), (int) unit, (int) cmd));
   1014 
   1015 	/* Must be open for writes for these commands... */
   1016 	switch (cmd) {
   1017 #ifdef DIOCGSECTORSIZE
   1018 	case DIOCGSECTORSIZE:
   1019 		*(u_int *)data = raidPtr->bytesPerSector;
   1020 		return 0;
   1021 	case DIOCGMEDIASIZE:
   1022 		*(off_t *)data =
   1023 		    (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
   1024 		return 0;
   1025 #endif
   1026 	case DIOCSDINFO:
   1027 	case DIOCWDINFO:
   1028 #ifdef __HAVE_OLD_DISKLABEL
   1029 	case ODIOCWDINFO:
   1030 	case ODIOCSDINFO:
   1031 #endif
   1032 	case DIOCWLABEL:
   1033 	case DIOCAWEDGE:
   1034 	case DIOCDWEDGE:
   1035 		if ((flag & FWRITE) == 0)
   1036 			return (EBADF);
   1037 	}
   1038 
   1039 	/* Must be initialized for these... */
   1040 	switch (cmd) {
   1041 	case DIOCGDINFO:
   1042 	case DIOCSDINFO:
   1043 	case DIOCWDINFO:
   1044 #ifdef __HAVE_OLD_DISKLABEL
   1045 	case ODIOCGDINFO:
   1046 	case ODIOCWDINFO:
   1047 	case ODIOCSDINFO:
   1048 	case ODIOCGDEFLABEL:
   1049 #endif
   1050 	case DIOCGPART:
   1051 	case DIOCWLABEL:
   1052 	case DIOCGDEFLABEL:
   1053 	case DIOCAWEDGE:
   1054 	case DIOCDWEDGE:
   1055 	case DIOCLWEDGES:
   1056 	case DIOCCACHESYNC:
   1057 	case RAIDFRAME_SHUTDOWN:
   1058 	case RAIDFRAME_REWRITEPARITY:
   1059 	case RAIDFRAME_GET_INFO:
   1060 	case RAIDFRAME_RESET_ACCTOTALS:
   1061 	case RAIDFRAME_GET_ACCTOTALS:
   1062 	case RAIDFRAME_KEEP_ACCTOTALS:
   1063 	case RAIDFRAME_GET_SIZE:
   1064 	case RAIDFRAME_FAIL_DISK:
   1065 	case RAIDFRAME_COPYBACK:
   1066 	case RAIDFRAME_CHECK_RECON_STATUS:
   1067 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1068 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1069 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1070 	case RAIDFRAME_ADD_HOT_SPARE:
   1071 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1072 	case RAIDFRAME_INIT_LABELS:
   1073 	case RAIDFRAME_REBUILD_IN_PLACE:
   1074 	case RAIDFRAME_CHECK_PARITY:
   1075 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1076 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1077 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1078 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1079 	case RAIDFRAME_SET_AUTOCONFIG:
   1080 	case RAIDFRAME_SET_ROOT:
   1081 	case RAIDFRAME_DELETE_COMPONENT:
   1082 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1083 		if ((rs->sc_flags & RAIDF_INITED) == 0)
   1084 			return (ENXIO);
   1085 	}
   1086 
   1087 	switch (cmd) {
   1088 #ifdef COMPAT_50
   1089 	case RAIDFRAME_GET_INFO50:
   1090 		return rf_get_info50(raidPtr, data);
   1091 
   1092 	case RAIDFRAME_CONFIGURE50:
   1093 		if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
   1094 			return retcode;
   1095 		goto config;
   1096 #endif
   1097 		/* configure the system */
   1098 	case RAIDFRAME_CONFIGURE:
   1099 
   1100 		if (raidPtr->valid) {
   1101 			/* There is a valid RAID set running on this unit! */
   1102 			printf("raid%d: Device already configured!\n",unit);
   1103 			return(EINVAL);
   1104 		}
   1105 
   1106 		/* copy-in the configuration information */
   1107 		/* data points to a pointer to the configuration structure */
   1108 
   1109 		u_cfg = *((RF_Config_t **) data);
   1110 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
   1111 		if (k_cfg == NULL) {
   1112 			return (ENOMEM);
   1113 		}
   1114 		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
   1115 		if (retcode) {
   1116 			RF_Free(k_cfg, sizeof(RF_Config_t));
   1117 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
   1118 				retcode));
   1119 			return (retcode);
   1120 		}
   1121 		goto config;
   1122 	config:
   1123 		/* allocate a buffer for the layout-specific data, and copy it
   1124 		 * in */
   1125 		if (k_cfg->layoutSpecificSize) {
   1126 			if (k_cfg->layoutSpecificSize > 10000) {
   1127 				/* sanity check */
   1128 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1129 				return (EINVAL);
   1130 			}
   1131 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
   1132 			    (u_char *));
   1133 			if (specific_buf == NULL) {
   1134 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1135 				return (ENOMEM);
   1136 			}
   1137 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
   1138 			    k_cfg->layoutSpecificSize);
   1139 			if (retcode) {
   1140 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1141 				RF_Free(specific_buf,
   1142 					k_cfg->layoutSpecificSize);
   1143 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
   1144 					retcode));
   1145 				return (retcode);
   1146 			}
   1147 		} else
   1148 			specific_buf = NULL;
   1149 		k_cfg->layoutSpecific = specific_buf;
   1150 
   1151 		/* should do some kind of sanity check on the configuration.
   1152 		 * Store the sum of all the bytes in the last byte? */
   1153 
   1154 		/* configure the system */
   1155 
   1156 		/*
   1157 		 * Clear the entire RAID descriptor, just to make sure
   1158 		 *  there is no stale data left in the case of a
   1159 		 *  reconfiguration
   1160 		 */
   1161 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
   1162 		raidPtr->raidid = unit;
   1163 
   1164 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
   1165 
   1166 		if (retcode == 0) {
   1167 
   1168 			/* allow this many simultaneous IO's to
   1169 			   this RAID device */
   1170 			raidPtr->openings = RAIDOUTSTANDING;
   1171 
   1172 			raidinit(raidPtr);
   1173 			rf_markalldirty(raidPtr);
   1174 		}
   1175 		/* free the buffers.  No return code here. */
   1176 		if (k_cfg->layoutSpecificSize) {
   1177 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
   1178 		}
   1179 		RF_Free(k_cfg, sizeof(RF_Config_t));
   1180 
   1181 		return (retcode);
   1182 
   1183 		/* shutdown the system */
   1184 	case RAIDFRAME_SHUTDOWN:
   1185 
   1186 		if ((error = raidlock(rs)) != 0)
   1187 			return (error);
   1188 
   1189 		/*
   1190 		 * If somebody has a partition mounted, we shouldn't
   1191 		 * shutdown.
   1192 		 */
   1193 
   1194 		part = DISKPART(dev);
   1195 		pmask = (1 << part);
   1196 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
   1197 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
   1198 			(rs->sc_dkdev.dk_copenmask & pmask))) {
   1199 			raidunlock(rs);
   1200 			return (EBUSY);
   1201 		}
   1202 
   1203 		retcode = rf_Shutdown(raidPtr);
   1204 
   1205 		/* It's no longer initialized... */
   1206 		rs->sc_flags &= ~RAIDF_INITED;
   1207 
   1208 		/* free the pseudo device attach bits */
   1209 
   1210 		cf = device_cfdata(rs->sc_dev);
   1211 		/* XXX this causes us to not return any errors
   1212 		   from the above call to rf_Shutdown() */
   1213 		retcode = config_detach(rs->sc_dev, DETACH_QUIET);
   1214 		free(cf, M_RAIDFRAME);
   1215 
   1216 		/* Detach the disk. */
   1217 		disk_detach(&rs->sc_dkdev);
   1218 		disk_destroy(&rs->sc_dkdev);
   1219 
   1220 		raidunlock(rs);
   1221 
   1222 		return (retcode);
   1223 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1224 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1225 		/* need to read the component label for the disk indicated
   1226 		   by row,column in clabel */
   1227 
   1228 		/* For practice, let's get it directly fromdisk, rather
   1229 		   than from the in-core copy */
   1230 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
   1231 			   (RF_ComponentLabel_t *));
   1232 		if (clabel == NULL)
   1233 			return (ENOMEM);
   1234 
   1235 		retcode = copyin( *clabel_ptr, clabel,
   1236 				  sizeof(RF_ComponentLabel_t));
   1237 
   1238 		if (retcode) {
   1239 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1240 			return(retcode);
   1241 		}
   1242 
   1243 		clabel->row = 0; /* Don't allow looking at anything else.*/
   1244 
   1245 		column = clabel->column;
   1246 
   1247 		if ((column < 0) || (column >= raidPtr->numCol +
   1248 				     raidPtr->numSpare)) {
   1249 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1250 			return(EINVAL);
   1251 		}
   1252 
   1253 		retcode = raidread_component_label(raidPtr->Disks[column].dev,
   1254 				raidPtr->raid_cinfo[column].ci_vp,
   1255 				clabel );
   1256 
   1257 		if (retcode == 0) {
   1258 			retcode = copyout(clabel, *clabel_ptr,
   1259 					  sizeof(RF_ComponentLabel_t));
   1260 		}
   1261 		RF_Free(clabel, sizeof(RF_ComponentLabel_t));
   1262 		return (retcode);
   1263 
   1264 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1265 		clabel = (RF_ComponentLabel_t *) data;
   1266 
   1267 		/* XXX check the label for valid stuff... */
   1268 		/* Note that some things *should not* get modified --
   1269 		   the user should be re-initing the labels instead of
   1270 		   trying to patch things.
   1271 		   */
   1272 
   1273 		raidid = raidPtr->raidid;
   1274 #ifdef DEBUG
   1275 		printf("raid%d: Got component label:\n", raidid);
   1276 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1277 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1278 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1279 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1280 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1281 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1282 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1283 #endif
   1284 		clabel->row = 0;
   1285 		column = clabel->column;
   1286 
   1287 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1288 			return(EINVAL);
   1289 		}
   1290 
   1291 		/* XXX this isn't allowed to do anything for now :-) */
   1292 
   1293 		/* XXX and before it is, we need to fill in the rest
   1294 		   of the fields!?!?!?! */
   1295 #if 0
   1296 		raidwrite_component_label(
   1297 		     raidPtr->Disks[column].dev,
   1298 			    raidPtr->raid_cinfo[column].ci_vp,
   1299 			    clabel );
   1300 #endif
   1301 		return (0);
   1302 
   1303 	case RAIDFRAME_INIT_LABELS:
   1304 		clabel = (RF_ComponentLabel_t *) data;
   1305 		/*
   1306 		   we only want the serial number from
   1307 		   the above.  We get all the rest of the information
   1308 		   from the config that was used to create this RAID
   1309 		   set.
   1310 		   */
   1311 
   1312 		raidPtr->serial_number = clabel->serial_number;
   1313 
   1314 		RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t),
   1315 			  (RF_ComponentLabel_t *));
   1316 		if (ci_label == NULL)
   1317 			return (ENOMEM);
   1318 
   1319 		raid_init_component_label(raidPtr, ci_label);
   1320 		ci_label->serial_number = clabel->serial_number;
   1321 		ci_label->row = 0; /* we dont' pretend to support more */
   1322 
   1323 		for(column=0;column<raidPtr->numCol;column++) {
   1324 			diskPtr = &raidPtr->Disks[column];
   1325 			if (!RF_DEAD_DISK(diskPtr->status)) {
   1326 				ci_label->partitionSize = diskPtr->partitionSize;
   1327 				ci_label->column = column;
   1328 				raidwrite_component_label(
   1329 							  raidPtr->Disks[column].dev,
   1330 							  raidPtr->raid_cinfo[column].ci_vp,
   1331 							  ci_label );
   1332 			}
   1333 		}
   1334 		RF_Free(ci_label, sizeof(RF_ComponentLabel_t));
   1335 
   1336 		return (retcode);
   1337 	case RAIDFRAME_SET_AUTOCONFIG:
   1338 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1339 		printf("raid%d: New autoconfig value is: %d\n",
   1340 		       raidPtr->raidid, d);
   1341 		*(int *) data = d;
   1342 		return (retcode);
   1343 
   1344 	case RAIDFRAME_SET_ROOT:
   1345 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1346 		printf("raid%d: New rootpartition value is: %d\n",
   1347 		       raidPtr->raidid, d);
   1348 		*(int *) data = d;
   1349 		return (retcode);
   1350 
   1351 		/* initialize all parity */
   1352 	case RAIDFRAME_REWRITEPARITY:
   1353 
   1354 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1355 			/* Parity for RAID 0 is trivially correct */
   1356 			raidPtr->parity_good = RF_RAID_CLEAN;
   1357 			return(0);
   1358 		}
   1359 
   1360 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1361 			/* Re-write is already in progress! */
   1362 			return(EINVAL);
   1363 		}
   1364 
   1365 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1366 					   rf_RewriteParityThread,
   1367 					   raidPtr,"raid_parity");
   1368 		return (retcode);
   1369 
   1370 
   1371 	case RAIDFRAME_ADD_HOT_SPARE:
   1372 		sparePtr = (RF_SingleComponent_t *) data;
   1373 		memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
   1374 		retcode = rf_add_hot_spare(raidPtr, &component);
   1375 		return(retcode);
   1376 
   1377 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1378 		return(retcode);
   1379 
   1380 	case RAIDFRAME_DELETE_COMPONENT:
   1381 		componentPtr = (RF_SingleComponent_t *)data;
   1382 		memcpy( &component, componentPtr,
   1383 			sizeof(RF_SingleComponent_t));
   1384 		retcode = rf_delete_component(raidPtr, &component);
   1385 		return(retcode);
   1386 
   1387 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1388 		componentPtr = (RF_SingleComponent_t *)data;
   1389 		memcpy( &component, componentPtr,
   1390 			sizeof(RF_SingleComponent_t));
   1391 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1392 		return(retcode);
   1393 
   1394 	case RAIDFRAME_REBUILD_IN_PLACE:
   1395 
   1396 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1397 			/* Can't do this on a RAID 0!! */
   1398 			return(EINVAL);
   1399 		}
   1400 
   1401 		if (raidPtr->recon_in_progress == 1) {
   1402 			/* a reconstruct is already in progress! */
   1403 			return(EINVAL);
   1404 		}
   1405 
   1406 		componentPtr = (RF_SingleComponent_t *) data;
   1407 		memcpy( &component, componentPtr,
   1408 			sizeof(RF_SingleComponent_t));
   1409 		component.row = 0; /* we don't support any more */
   1410 		column = component.column;
   1411 
   1412 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1413 			return(EINVAL);
   1414 		}
   1415 
   1416 		RF_LOCK_MUTEX(raidPtr->mutex);
   1417 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
   1418 		    (raidPtr->numFailures > 0)) {
   1419 			/* XXX 0 above shouldn't be constant!!! */
   1420 			/* some component other than this has failed.
   1421 			   Let's not make things worse than they already
   1422 			   are... */
   1423 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1424 			       raidPtr->raidid);
   1425 			printf("raid%d:     Col: %d   Too many failures.\n",
   1426 			       raidPtr->raidid, column);
   1427 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1428 			return (EINVAL);
   1429 		}
   1430 		if (raidPtr->Disks[column].status ==
   1431 		    rf_ds_reconstructing) {
   1432 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1433 			       raidPtr->raidid);
   1434 			printf("raid%d:    Col: %d   Reconstruction already occuring!\n", raidPtr->raidid, column);
   1435 
   1436 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1437 			return (EINVAL);
   1438 		}
   1439 		if (raidPtr->Disks[column].status == rf_ds_spared) {
   1440 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1441 			return (EINVAL);
   1442 		}
   1443 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1444 
   1445 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1446 		if (rrcopy == NULL)
   1447 			return(ENOMEM);
   1448 
   1449 		rrcopy->raidPtr = (void *) raidPtr;
   1450 		rrcopy->col = column;
   1451 
   1452 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1453 					   rf_ReconstructInPlaceThread,
   1454 					   rrcopy,"raid_reconip");
   1455 		return(retcode);
   1456 
   1457 	case RAIDFRAME_GET_INFO:
   1458 		if (!raidPtr->valid)
   1459 			return (ENODEV);
   1460 		ucfgp = (RF_DeviceConfig_t **) data;
   1461 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1462 			  (RF_DeviceConfig_t *));
   1463 		if (d_cfg == NULL)
   1464 			return (ENOMEM);
   1465 		d_cfg->rows = 1; /* there is only 1 row now */
   1466 		d_cfg->cols = raidPtr->numCol;
   1467 		d_cfg->ndevs = raidPtr->numCol;
   1468 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1469 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1470 			return (ENOMEM);
   1471 		}
   1472 		d_cfg->nspares = raidPtr->numSpare;
   1473 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1474 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1475 			return (ENOMEM);
   1476 		}
   1477 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1478 		d = 0;
   1479 		for (j = 0; j < d_cfg->cols; j++) {
   1480 			d_cfg->devs[d] = raidPtr->Disks[j];
   1481 			d++;
   1482 		}
   1483 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1484 			d_cfg->spares[i] = raidPtr->Disks[j];
   1485 		}
   1486 		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
   1487 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1488 
   1489 		return (retcode);
   1490 
   1491 	case RAIDFRAME_CHECK_PARITY:
   1492 		*(int *) data = raidPtr->parity_good;
   1493 		return (0);
   1494 
   1495 	case RAIDFRAME_RESET_ACCTOTALS:
   1496 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1497 		return (0);
   1498 
   1499 	case RAIDFRAME_GET_ACCTOTALS:
   1500 		totals = (RF_AccTotals_t *) data;
   1501 		*totals = raidPtr->acc_totals;
   1502 		return (0);
   1503 
   1504 	case RAIDFRAME_KEEP_ACCTOTALS:
   1505 		raidPtr->keep_acc_totals = *(int *)data;
   1506 		return (0);
   1507 
   1508 	case RAIDFRAME_GET_SIZE:
   1509 		*(int *) data = raidPtr->totalSectors;
   1510 		return (0);
   1511 
   1512 		/* fail a disk & optionally start reconstruction */
   1513 	case RAIDFRAME_FAIL_DISK:
   1514 
   1515 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1516 			/* Can't do this on a RAID 0!! */
   1517 			return(EINVAL);
   1518 		}
   1519 
   1520 		rr = (struct rf_recon_req *) data;
   1521 		rr->row = 0;
   1522 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
   1523 			return (EINVAL);
   1524 
   1525 
   1526 		RF_LOCK_MUTEX(raidPtr->mutex);
   1527 		if (raidPtr->status == rf_rs_reconstructing) {
   1528 			/* you can't fail a disk while we're reconstructing! */
   1529 			/* XXX wrong for RAID6 */
   1530 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1531 			return (EINVAL);
   1532 		}
   1533 		if ((raidPtr->Disks[rr->col].status ==
   1534 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
   1535 			/* some other component has failed.  Let's not make
   1536 			   things worse. XXX wrong for RAID6 */
   1537 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1538 			return (EINVAL);
   1539 		}
   1540 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
   1541 			/* Can't fail a spared disk! */
   1542 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1543 			return (EINVAL);
   1544 		}
   1545 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1546 
   1547 		/* make a copy of the recon request so that we don't rely on
   1548 		 * the user's buffer */
   1549 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1550 		if (rrcopy == NULL)
   1551 			return(ENOMEM);
   1552 		memcpy(rrcopy, rr, sizeof(*rr));
   1553 		rrcopy->raidPtr = (void *) raidPtr;
   1554 
   1555 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1556 					   rf_ReconThread,
   1557 					   rrcopy,"raid_recon");
   1558 		return (0);
   1559 
   1560 		/* invoke a copyback operation after recon on whatever disk
   1561 		 * needs it, if any */
   1562 	case RAIDFRAME_COPYBACK:
   1563 
   1564 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1565 			/* This makes no sense on a RAID 0!! */
   1566 			return(EINVAL);
   1567 		}
   1568 
   1569 		if (raidPtr->copyback_in_progress == 1) {
   1570 			/* Copyback is already in progress! */
   1571 			return(EINVAL);
   1572 		}
   1573 
   1574 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1575 					   rf_CopybackThread,
   1576 					   raidPtr,"raid_copyback");
   1577 		return (retcode);
   1578 
   1579 		/* return the percentage completion of reconstruction */
   1580 	case RAIDFRAME_CHECK_RECON_STATUS:
   1581 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1582 			/* This makes no sense on a RAID 0, so tell the
   1583 			   user it's done. */
   1584 			*(int *) data = 100;
   1585 			return(0);
   1586 		}
   1587 		if (raidPtr->status != rf_rs_reconstructing)
   1588 			*(int *) data = 100;
   1589 		else {
   1590 			if (raidPtr->reconControl->numRUsTotal > 0) {
   1591 				*(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
   1592 			} else {
   1593 				*(int *) data = 0;
   1594 			}
   1595 		}
   1596 		return (0);
   1597 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1598 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1599 		if (raidPtr->status != rf_rs_reconstructing) {
   1600 			progressInfo.remaining = 0;
   1601 			progressInfo.completed = 100;
   1602 			progressInfo.total = 100;
   1603 		} else {
   1604 			progressInfo.total =
   1605 				raidPtr->reconControl->numRUsTotal;
   1606 			progressInfo.completed =
   1607 				raidPtr->reconControl->numRUsComplete;
   1608 			progressInfo.remaining = progressInfo.total -
   1609 				progressInfo.completed;
   1610 		}
   1611 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1612 				  sizeof(RF_ProgressInfo_t));
   1613 		return (retcode);
   1614 
   1615 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1616 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1617 			/* This makes no sense on a RAID 0, so tell the
   1618 			   user it's done. */
   1619 			*(int *) data = 100;
   1620 			return(0);
   1621 		}
   1622 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1623 			*(int *) data = 100 *
   1624 				raidPtr->parity_rewrite_stripes_done /
   1625 				raidPtr->Layout.numStripe;
   1626 		} else {
   1627 			*(int *) data = 100;
   1628 		}
   1629 		return (0);
   1630 
   1631 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1632 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1633 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1634 			progressInfo.total = raidPtr->Layout.numStripe;
   1635 			progressInfo.completed =
   1636 				raidPtr->parity_rewrite_stripes_done;
   1637 			progressInfo.remaining = progressInfo.total -
   1638 				progressInfo.completed;
   1639 		} else {
   1640 			progressInfo.remaining = 0;
   1641 			progressInfo.completed = 100;
   1642 			progressInfo.total = 100;
   1643 		}
   1644 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1645 				  sizeof(RF_ProgressInfo_t));
   1646 		return (retcode);
   1647 
   1648 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1649 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1650 			/* This makes no sense on a RAID 0 */
   1651 			*(int *) data = 100;
   1652 			return(0);
   1653 		}
   1654 		if (raidPtr->copyback_in_progress == 1) {
   1655 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1656 				raidPtr->Layout.numStripe;
   1657 		} else {
   1658 			*(int *) data = 100;
   1659 		}
   1660 		return (0);
   1661 
   1662 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1663 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1664 		if (raidPtr->copyback_in_progress == 1) {
   1665 			progressInfo.total = raidPtr->Layout.numStripe;
   1666 			progressInfo.completed =
   1667 				raidPtr->copyback_stripes_done;
   1668 			progressInfo.remaining = progressInfo.total -
   1669 				progressInfo.completed;
   1670 		} else {
   1671 			progressInfo.remaining = 0;
   1672 			progressInfo.completed = 100;
   1673 			progressInfo.total = 100;
   1674 		}
   1675 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1676 				  sizeof(RF_ProgressInfo_t));
   1677 		return (retcode);
   1678 
   1679 		/* the sparetable daemon calls this to wait for the kernel to
   1680 		 * need a spare table. this ioctl does not return until a
   1681 		 * spare table is needed. XXX -- calling mpsleep here in the
   1682 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1683 		 * -- I should either compute the spare table in the kernel,
   1684 		 * or have a different -- XXX XXX -- interface (a different
   1685 		 * character device) for delivering the table     -- XXX */
   1686 #if 0
   1687 	case RAIDFRAME_SPARET_WAIT:
   1688 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1689 		while (!rf_sparet_wait_queue)
   1690 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1691 		waitreq = rf_sparet_wait_queue;
   1692 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1693 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1694 
   1695 		/* structure assignment */
   1696 		*((RF_SparetWait_t *) data) = *waitreq;
   1697 
   1698 		RF_Free(waitreq, sizeof(*waitreq));
   1699 		return (0);
   1700 
   1701 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1702 		 * code in it that will cause the dameon to exit */
   1703 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1704 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1705 		waitreq->fcol = -1;
   1706 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1707 		waitreq->next = rf_sparet_wait_queue;
   1708 		rf_sparet_wait_queue = waitreq;
   1709 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1710 		wakeup(&rf_sparet_wait_queue);
   1711 		return (0);
   1712 
   1713 		/* used by the spare table daemon to deliver a spare table
   1714 		 * into the kernel */
   1715 	case RAIDFRAME_SEND_SPARET:
   1716 
   1717 		/* install the spare table */
   1718 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1719 
   1720 		/* respond to the requestor.  the return status of the spare
   1721 		 * table installation is passed in the "fcol" field */
   1722 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1723 		waitreq->fcol = retcode;
   1724 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1725 		waitreq->next = rf_sparet_resp_queue;
   1726 		rf_sparet_resp_queue = waitreq;
   1727 		wakeup(&rf_sparet_resp_queue);
   1728 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1729 
   1730 		return (retcode);
   1731 #endif
   1732 
   1733 	default:
   1734 		break; /* fall through to the os-specific code below */
   1735 
   1736 	}
   1737 
   1738 	if (!raidPtr->valid)
   1739 		return (EINVAL);
   1740 
   1741 	/*
   1742 	 * Add support for "regular" device ioctls here.
   1743 	 */
   1744 
   1745 	switch (cmd) {
   1746 	case DIOCGDINFO:
   1747 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1748 		break;
   1749 #ifdef __HAVE_OLD_DISKLABEL
   1750 	case ODIOCGDINFO:
   1751 		newlabel = *(rs->sc_dkdev.dk_label);
   1752 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1753 			return ENOTTY;
   1754 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1755 		break;
   1756 #endif
   1757 
   1758 	case DIOCGPART:
   1759 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1760 		((struct partinfo *) data)->part =
   1761 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1762 		break;
   1763 
   1764 	case DIOCWDINFO:
   1765 	case DIOCSDINFO:
   1766 #ifdef __HAVE_OLD_DISKLABEL
   1767 	case ODIOCWDINFO:
   1768 	case ODIOCSDINFO:
   1769 #endif
   1770 	{
   1771 		struct disklabel *lp;
   1772 #ifdef __HAVE_OLD_DISKLABEL
   1773 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1774 			memset(&newlabel, 0, sizeof newlabel);
   1775 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1776 			lp = &newlabel;
   1777 		} else
   1778 #endif
   1779 		lp = (struct disklabel *)data;
   1780 
   1781 		if ((error = raidlock(rs)) != 0)
   1782 			return (error);
   1783 
   1784 		rs->sc_flags |= RAIDF_LABELLING;
   1785 
   1786 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1787 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1788 		if (error == 0) {
   1789 			if (cmd == DIOCWDINFO
   1790 #ifdef __HAVE_OLD_DISKLABEL
   1791 			    || cmd == ODIOCWDINFO
   1792 #endif
   1793 			   )
   1794 				error = writedisklabel(RAIDLABELDEV(dev),
   1795 				    raidstrategy, rs->sc_dkdev.dk_label,
   1796 				    rs->sc_dkdev.dk_cpulabel);
   1797 		}
   1798 		rs->sc_flags &= ~RAIDF_LABELLING;
   1799 
   1800 		raidunlock(rs);
   1801 
   1802 		if (error)
   1803 			return (error);
   1804 		break;
   1805 	}
   1806 
   1807 	case DIOCWLABEL:
   1808 		if (*(int *) data != 0)
   1809 			rs->sc_flags |= RAIDF_WLABEL;
   1810 		else
   1811 			rs->sc_flags &= ~RAIDF_WLABEL;
   1812 		break;
   1813 
   1814 	case DIOCGDEFLABEL:
   1815 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1816 		break;
   1817 
   1818 #ifdef __HAVE_OLD_DISKLABEL
   1819 	case ODIOCGDEFLABEL:
   1820 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1821 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1822 			return ENOTTY;
   1823 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1824 		break;
   1825 #endif
   1826 
   1827 	case DIOCAWEDGE:
   1828 	case DIOCDWEDGE:
   1829 	    	dkw = (void *)data;
   1830 
   1831 		/* If the ioctl happens here, the parent is us. */
   1832 		(void)strcpy(dkw->dkw_parent, rs->sc_xname);
   1833 		return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
   1834 
   1835 	case DIOCLWEDGES:
   1836 		return dkwedge_list(&rs->sc_dkdev,
   1837 		    (struct dkwedge_list *)data, l);
   1838 	case DIOCCACHESYNC:
   1839 		return rf_sync_component_caches(raidPtr);
   1840 	default:
   1841 		retcode = ENOTTY;
   1842 	}
   1843 	return (retcode);
   1844 
   1845 }
   1846 
   1847 
   1848 /* raidinit -- complete the rest of the initialization for the
   1849    RAIDframe device.  */
   1850 
   1851 
   1852 static void
   1853 raidinit(RF_Raid_t *raidPtr)
   1854 {
   1855 	struct cfdata *cf;
   1856 	struct raid_softc *rs;
   1857 	int     unit;
   1858 
   1859 	unit = raidPtr->raidid;
   1860 
   1861 	rs = &raid_softc[unit];
   1862 
   1863 	/* XXX should check return code first... */
   1864 	rs->sc_flags |= RAIDF_INITED;
   1865 
   1866 	/* XXX doesn't check bounds. */
   1867 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
   1868 
   1869 	/* attach the pseudo device */
   1870 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
   1871 	cf->cf_name = raid_cd.cd_name;
   1872 	cf->cf_atname = raid_cd.cd_name;
   1873 	cf->cf_unit = unit;
   1874 	cf->cf_fstate = FSTATE_STAR;
   1875 
   1876 	rs->sc_dev = config_attach_pseudo(cf);
   1877 
   1878 	if (rs->sc_dev==NULL) {
   1879 		printf("raid%d: config_attach_pseudo failed\n",
   1880 		       raidPtr->raidid);
   1881 	}
   1882 
   1883 	/* disk_attach actually creates space for the CPU disklabel, among
   1884 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1885 	 * with disklabels. */
   1886 
   1887 	disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
   1888 	disk_attach(&rs->sc_dkdev);
   1889 
   1890 	/* XXX There may be a weird interaction here between this, and
   1891 	 * protectedSectors, as used in RAIDframe.  */
   1892 
   1893 	rs->sc_size = raidPtr->totalSectors;
   1894 
   1895 	dkwedge_discover(&rs->sc_dkdev);
   1896 
   1897 	rf_set_properties(rs, raidPtr);
   1898 
   1899 }
   1900 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   1901 /* wake up the daemon & tell it to get us a spare table
   1902  * XXX
   1903  * the entries in the queues should be tagged with the raidPtr
   1904  * so that in the extremely rare case that two recons happen at once,
   1905  * we know for which device were requesting a spare table
   1906  * XXX
   1907  *
   1908  * XXX This code is not currently used. GO
   1909  */
   1910 int
   1911 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
   1912 {
   1913 	int     retcode;
   1914 
   1915 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1916 	req->next = rf_sparet_wait_queue;
   1917 	rf_sparet_wait_queue = req;
   1918 	wakeup(&rf_sparet_wait_queue);
   1919 
   1920 	/* mpsleep unlocks the mutex */
   1921 	while (!rf_sparet_resp_queue) {
   1922 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1923 		    "raidframe getsparetable", 0);
   1924 	}
   1925 	req = rf_sparet_resp_queue;
   1926 	rf_sparet_resp_queue = req->next;
   1927 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1928 
   1929 	retcode = req->fcol;
   1930 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1931 					 * alloc'd */
   1932 	return (retcode);
   1933 }
   1934 #endif
   1935 
   1936 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1937  * bp & passes it down.
   1938  * any calls originating in the kernel must use non-blocking I/O
   1939  * do some extra sanity checking to return "appropriate" error values for
   1940  * certain conditions (to make some standard utilities work)
   1941  *
   1942  * Formerly known as: rf_DoAccessKernel
   1943  */
   1944 void
   1945 raidstart(RF_Raid_t *raidPtr)
   1946 {
   1947 	RF_SectorCount_t num_blocks, pb, sum;
   1948 	RF_RaidAddr_t raid_addr;
   1949 	struct partition *pp;
   1950 	daddr_t blocknum;
   1951 	int     unit;
   1952 	struct raid_softc *rs;
   1953 	int     do_async;
   1954 	struct buf *bp;
   1955 	int rc;
   1956 
   1957 	unit = raidPtr->raidid;
   1958 	rs = &raid_softc[unit];
   1959 
   1960 	/* quick check to see if anything has died recently */
   1961 	RF_LOCK_MUTEX(raidPtr->mutex);
   1962 	if (raidPtr->numNewFailures > 0) {
   1963 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1964 		rf_update_component_labels(raidPtr,
   1965 					   RF_NORMAL_COMPONENT_UPDATE);
   1966 		RF_LOCK_MUTEX(raidPtr->mutex);
   1967 		raidPtr->numNewFailures--;
   1968 	}
   1969 
   1970 	/* Check to see if we're at the limit... */
   1971 	while (raidPtr->openings > 0) {
   1972 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1973 
   1974 		/* get the next item, if any, from the queue */
   1975 		if ((bp = bufq_get(rs->buf_queue)) == NULL) {
   1976 			/* nothing more to do */
   1977 			return;
   1978 		}
   1979 
   1980 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1981 		 * partition.. Need to make it absolute to the underlying
   1982 		 * device.. */
   1983 
   1984 		blocknum = bp->b_blkno;
   1985 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1986 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1987 			blocknum += pp->p_offset;
   1988 		}
   1989 
   1990 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1991 			    (int) blocknum));
   1992 
   1993 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1994 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1995 
   1996 		/* *THIS* is where we adjust what block we're going to...
   1997 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1998 		raid_addr = blocknum;
   1999 
   2000 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   2001 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   2002 		sum = raid_addr + num_blocks + pb;
   2003 		if (1 || rf_debugKernelAccess) {
   2004 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   2005 				    (int) raid_addr, (int) sum, (int) num_blocks,
   2006 				    (int) pb, (int) bp->b_resid));
   2007 		}
   2008 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   2009 		    || (sum < num_blocks) || (sum < pb)) {
   2010 			bp->b_error = ENOSPC;
   2011 			bp->b_resid = bp->b_bcount;
   2012 			biodone(bp);
   2013 			RF_LOCK_MUTEX(raidPtr->mutex);
   2014 			continue;
   2015 		}
   2016 		/*
   2017 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   2018 		 */
   2019 
   2020 		if (bp->b_bcount & raidPtr->sectorMask) {
   2021 			bp->b_error = EINVAL;
   2022 			bp->b_resid = bp->b_bcount;
   2023 			biodone(bp);
   2024 			RF_LOCK_MUTEX(raidPtr->mutex);
   2025 			continue;
   2026 
   2027 		}
   2028 		db1_printf(("Calling DoAccess..\n"));
   2029 
   2030 
   2031 		RF_LOCK_MUTEX(raidPtr->mutex);
   2032 		raidPtr->openings--;
   2033 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   2034 
   2035 		/*
   2036 		 * Everything is async.
   2037 		 */
   2038 		do_async = 1;
   2039 
   2040 		disk_busy(&rs->sc_dkdev);
   2041 
   2042 		/* XXX we're still at splbio() here... do we *really*
   2043 		   need to be? */
   2044 
   2045 		/* don't ever condition on bp->b_flags & B_WRITE.
   2046 		 * always condition on B_READ instead */
   2047 
   2048 		rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   2049 				 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   2050 				 do_async, raid_addr, num_blocks,
   2051 				 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   2052 
   2053 		if (rc) {
   2054 			bp->b_error = rc;
   2055 			bp->b_resid = bp->b_bcount;
   2056 			biodone(bp);
   2057 			/* continue loop */
   2058 		}
   2059 
   2060 		RF_LOCK_MUTEX(raidPtr->mutex);
   2061 	}
   2062 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   2063 }
   2064 
   2065 
   2066 
   2067 
   2068 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   2069 
   2070 int
   2071 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
   2072 {
   2073 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   2074 	struct buf *bp;
   2075 
   2076 	req->queue = queue;
   2077 	bp = req->bp;
   2078 
   2079 	switch (req->type) {
   2080 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   2081 		/* XXX need to do something extra here.. */
   2082 		/* I'm leaving this in, as I've never actually seen it used,
   2083 		 * and I'd like folks to report it... GO */
   2084 		printf(("WAKEUP CALLED\n"));
   2085 		queue->numOutstanding++;
   2086 
   2087 		bp->b_flags = 0;
   2088 		bp->b_private = req;
   2089 
   2090 		KernelWakeupFunc(bp);
   2091 		break;
   2092 
   2093 	case RF_IO_TYPE_READ:
   2094 	case RF_IO_TYPE_WRITE:
   2095 #if RF_ACC_TRACE > 0
   2096 		if (req->tracerec) {
   2097 			RF_ETIMER_START(req->tracerec->timer);
   2098 		}
   2099 #endif
   2100 		InitBP(bp, queue->rf_cinfo->ci_vp,
   2101 		    op, queue->rf_cinfo->ci_dev,
   2102 		    req->sectorOffset, req->numSector,
   2103 		    req->buf, KernelWakeupFunc, (void *) req,
   2104 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   2105 
   2106 		if (rf_debugKernelAccess) {
   2107 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   2108 				(long) bp->b_blkno));
   2109 		}
   2110 		queue->numOutstanding++;
   2111 		queue->last_deq_sector = req->sectorOffset;
   2112 		/* acc wouldn't have been let in if there were any pending
   2113 		 * reqs at any other priority */
   2114 		queue->curPriority = req->priority;
   2115 
   2116 		db1_printf(("Going for %c to unit %d col %d\n",
   2117 			    req->type, queue->raidPtr->raidid,
   2118 			    queue->col));
   2119 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   2120 			(int) req->sectorOffset, (int) req->numSector,
   2121 			(int) (req->numSector <<
   2122 			    queue->raidPtr->logBytesPerSector),
   2123 			(int) queue->raidPtr->logBytesPerSector));
   2124 
   2125 		/*
   2126 		 * XXX: drop lock here since this can block at
   2127 		 * least with backing SCSI devices.  Retake it
   2128 		 * to minimize fuss with calling interfaces.
   2129 		 */
   2130 
   2131 		RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
   2132 		bdev_strategy(bp);
   2133 		RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
   2134 		break;
   2135 
   2136 	default:
   2137 		panic("bad req->type in rf_DispatchKernelIO");
   2138 	}
   2139 	db1_printf(("Exiting from DispatchKernelIO\n"));
   2140 
   2141 	return (0);
   2142 }
   2143 /* this is the callback function associated with a I/O invoked from
   2144    kernel code.
   2145  */
   2146 static void
   2147 KernelWakeupFunc(struct buf *bp)
   2148 {
   2149 	RF_DiskQueueData_t *req = NULL;
   2150 	RF_DiskQueue_t *queue;
   2151 	int s;
   2152 
   2153 	s = splbio();
   2154 	db1_printf(("recovering the request queue:\n"));
   2155 	req = bp->b_private;
   2156 
   2157 	queue = (RF_DiskQueue_t *) req->queue;
   2158 
   2159 #if RF_ACC_TRACE > 0
   2160 	if (req->tracerec) {
   2161 		RF_ETIMER_STOP(req->tracerec->timer);
   2162 		RF_ETIMER_EVAL(req->tracerec->timer);
   2163 		RF_LOCK_MUTEX(rf_tracing_mutex);
   2164 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2165 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2166 		req->tracerec->num_phys_ios++;
   2167 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   2168 	}
   2169 #endif
   2170 
   2171 	/* XXX Ok, let's get aggressive... If b_error is set, let's go
   2172 	 * ballistic, and mark the component as hosed... */
   2173 
   2174 	if (bp->b_error != 0) {
   2175 		/* Mark the disk as dead */
   2176 		/* but only mark it once... */
   2177 		/* and only if it wouldn't leave this RAID set
   2178 		   completely broken */
   2179 		if (((queue->raidPtr->Disks[queue->col].status ==
   2180 		      rf_ds_optimal) ||
   2181 		     (queue->raidPtr->Disks[queue->col].status ==
   2182 		      rf_ds_used_spare)) &&
   2183 		     (queue->raidPtr->numFailures <
   2184 		      queue->raidPtr->Layout.map->faultsTolerated)) {
   2185 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   2186 			       queue->raidPtr->raidid,
   2187 			       queue->raidPtr->Disks[queue->col].devname);
   2188 			queue->raidPtr->Disks[queue->col].status =
   2189 			    rf_ds_failed;
   2190 			queue->raidPtr->status = rf_rs_degraded;
   2191 			queue->raidPtr->numFailures++;
   2192 			queue->raidPtr->numNewFailures++;
   2193 		} else {	/* Disk is already dead... */
   2194 			/* printf("Disk already marked as dead!\n"); */
   2195 		}
   2196 
   2197 	}
   2198 
   2199 	/* Fill in the error value */
   2200 
   2201 	req->error = bp->b_error;
   2202 
   2203 	simple_lock(&queue->raidPtr->iodone_lock);
   2204 
   2205 	/* Drop this one on the "finished" queue... */
   2206 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   2207 
   2208 	/* Let the raidio thread know there is work to be done. */
   2209 	wakeup(&(queue->raidPtr->iodone));
   2210 
   2211 	simple_unlock(&queue->raidPtr->iodone_lock);
   2212 
   2213 	splx(s);
   2214 }
   2215 
   2216 
   2217 
   2218 /*
   2219  * initialize a buf structure for doing an I/O in the kernel.
   2220  */
   2221 static void
   2222 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
   2223        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
   2224        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
   2225        struct proc *b_proc)
   2226 {
   2227 	/* bp->b_flags       = B_PHYS | rw_flag; */
   2228 	bp->b_flags = rw_flag;	/* XXX need B_PHYS here too??? */
   2229 	bp->b_oflags = 0;
   2230 	bp->b_cflags = 0;
   2231 	bp->b_bcount = numSect << logBytesPerSector;
   2232 	bp->b_bufsize = bp->b_bcount;
   2233 	bp->b_error = 0;
   2234 	bp->b_dev = dev;
   2235 	bp->b_data = bf;
   2236 	bp->b_blkno = startSect;
   2237 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   2238 	if (bp->b_bcount == 0) {
   2239 		panic("bp->b_bcount is zero in InitBP!!");
   2240 	}
   2241 	bp->b_proc = b_proc;
   2242 	bp->b_iodone = cbFunc;
   2243 	bp->b_private = cbArg;
   2244 }
   2245 
   2246 static void
   2247 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
   2248 		    struct disklabel *lp)
   2249 {
   2250 	memset(lp, 0, sizeof(*lp));
   2251 
   2252 	/* fabricate a label... */
   2253 	lp->d_secperunit = raidPtr->totalSectors;
   2254 	lp->d_secsize = raidPtr->bytesPerSector;
   2255 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2256 	lp->d_ntracks = 4 * raidPtr->numCol;
   2257 	lp->d_ncylinders = raidPtr->totalSectors /
   2258 		(lp->d_nsectors * lp->d_ntracks);
   2259 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2260 
   2261 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2262 	lp->d_type = DTYPE_RAID;
   2263 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2264 	lp->d_rpm = 3600;
   2265 	lp->d_interleave = 1;
   2266 	lp->d_flags = 0;
   2267 
   2268 	lp->d_partitions[RAW_PART].p_offset = 0;
   2269 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2270 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2271 	lp->d_npartitions = RAW_PART + 1;
   2272 
   2273 	lp->d_magic = DISKMAGIC;
   2274 	lp->d_magic2 = DISKMAGIC;
   2275 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2276 
   2277 }
   2278 /*
   2279  * Read the disklabel from the raid device.  If one is not present, fake one
   2280  * up.
   2281  */
   2282 static void
   2283 raidgetdisklabel(dev_t dev)
   2284 {
   2285 	int     unit = raidunit(dev);
   2286 	struct raid_softc *rs = &raid_softc[unit];
   2287 	const char   *errstring;
   2288 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2289 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2290 	RF_Raid_t *raidPtr;
   2291 
   2292 	db1_printf(("Getting the disklabel...\n"));
   2293 
   2294 	memset(clp, 0, sizeof(*clp));
   2295 
   2296 	raidPtr = raidPtrs[unit];
   2297 
   2298 	raidgetdefaultlabel(raidPtr, rs, lp);
   2299 
   2300 	/*
   2301 	 * Call the generic disklabel extraction routine.
   2302 	 */
   2303 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2304 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2305 	if (errstring)
   2306 		raidmakedisklabel(rs);
   2307 	else {
   2308 		int     i;
   2309 		struct partition *pp;
   2310 
   2311 		/*
   2312 		 * Sanity check whether the found disklabel is valid.
   2313 		 *
   2314 		 * This is necessary since total size of the raid device
   2315 		 * may vary when an interleave is changed even though exactly
   2316 		 * same components are used, and old disklabel may used
   2317 		 * if that is found.
   2318 		 */
   2319 		if (lp->d_secperunit != rs->sc_size)
   2320 			printf("raid%d: WARNING: %s: "
   2321 			    "total sector size in disklabel (%d) != "
   2322 			    "the size of raid (%ld)\n", unit, rs->sc_xname,
   2323 			    lp->d_secperunit, (long) rs->sc_size);
   2324 		for (i = 0; i < lp->d_npartitions; i++) {
   2325 			pp = &lp->d_partitions[i];
   2326 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2327 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2328 				       "exceeds the size of raid (%ld)\n",
   2329 				       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2330 		}
   2331 	}
   2332 
   2333 }
   2334 /*
   2335  * Take care of things one might want to take care of in the event
   2336  * that a disklabel isn't present.
   2337  */
   2338 static void
   2339 raidmakedisklabel(struct raid_softc *rs)
   2340 {
   2341 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2342 	db1_printf(("Making a label..\n"));
   2343 
   2344 	/*
   2345 	 * For historical reasons, if there's no disklabel present
   2346 	 * the raw partition must be marked FS_BSDFFS.
   2347 	 */
   2348 
   2349 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2350 
   2351 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2352 
   2353 	lp->d_checksum = dkcksum(lp);
   2354 }
   2355 /*
   2356  * Wait interruptibly for an exclusive lock.
   2357  *
   2358  * XXX
   2359  * Several drivers do this; it should be abstracted and made MP-safe.
   2360  * (Hmm... where have we seen this warning before :->  GO )
   2361  */
   2362 static int
   2363 raidlock(struct raid_softc *rs)
   2364 {
   2365 	int     error;
   2366 
   2367 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2368 		rs->sc_flags |= RAIDF_WANTED;
   2369 		if ((error =
   2370 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2371 			return (error);
   2372 	}
   2373 	rs->sc_flags |= RAIDF_LOCKED;
   2374 	return (0);
   2375 }
   2376 /*
   2377  * Unlock and wake up any waiters.
   2378  */
   2379 static void
   2380 raidunlock(struct raid_softc *rs)
   2381 {
   2382 
   2383 	rs->sc_flags &= ~RAIDF_LOCKED;
   2384 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2385 		rs->sc_flags &= ~RAIDF_WANTED;
   2386 		wakeup(rs);
   2387 	}
   2388 }
   2389 
   2390 
   2391 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2392 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2393 
   2394 int
   2395 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2396 {
   2397 	RF_ComponentLabel_t clabel;
   2398 	raidread_component_label(dev, b_vp, &clabel);
   2399 	clabel.mod_counter = mod_counter;
   2400 	clabel.clean = RF_RAID_CLEAN;
   2401 	raidwrite_component_label(dev, b_vp, &clabel);
   2402 	return(0);
   2403 }
   2404 
   2405 
   2406 int
   2407 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2408 {
   2409 	RF_ComponentLabel_t clabel;
   2410 	raidread_component_label(dev, b_vp, &clabel);
   2411 	clabel.mod_counter = mod_counter;
   2412 	clabel.clean = RF_RAID_DIRTY;
   2413 	raidwrite_component_label(dev, b_vp, &clabel);
   2414 	return(0);
   2415 }
   2416 
   2417 /* ARGSUSED */
   2418 int
   2419 raidread_component_label(dev_t dev, struct vnode *b_vp,
   2420 			 RF_ComponentLabel_t *clabel)
   2421 {
   2422 	struct buf *bp;
   2423 	const struct bdevsw *bdev;
   2424 	int error;
   2425 
   2426 	/* XXX should probably ensure that we don't try to do this if
   2427 	   someone has changed rf_protected_sectors. */
   2428 
   2429 	if (b_vp == NULL) {
   2430 		/* For whatever reason, this component is not valid.
   2431 		   Don't try to read a component label from it. */
   2432 		return(EINVAL);
   2433 	}
   2434 
   2435 	/* get a block of the appropriate size... */
   2436 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2437 	bp->b_dev = dev;
   2438 
   2439 	/* get our ducks in a row for the read */
   2440 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2441 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2442 	bp->b_flags |= B_READ;
   2443  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2444 
   2445 	bdev = bdevsw_lookup(bp->b_dev);
   2446 	if (bdev == NULL)
   2447 		return (ENXIO);
   2448 	(*bdev->d_strategy)(bp);
   2449 
   2450 	error = biowait(bp);
   2451 
   2452 	if (!error) {
   2453 		memcpy(clabel, bp->b_data,
   2454 		       sizeof(RF_ComponentLabel_t));
   2455 	}
   2456 
   2457 	brelse(bp, 0);
   2458 	return(error);
   2459 }
   2460 /* ARGSUSED */
   2461 int
   2462 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
   2463 			  RF_ComponentLabel_t *clabel)
   2464 {
   2465 	struct buf *bp;
   2466 	const struct bdevsw *bdev;
   2467 	int error;
   2468 
   2469 	/* get a block of the appropriate size... */
   2470 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2471 	bp->b_dev = dev;
   2472 
   2473 	/* get our ducks in a row for the write */
   2474 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2475 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2476 	bp->b_flags |= B_WRITE;
   2477  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2478 
   2479 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2480 
   2481 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2482 
   2483 	bdev = bdevsw_lookup(bp->b_dev);
   2484 	if (bdev == NULL)
   2485 		return (ENXIO);
   2486 	(*bdev->d_strategy)(bp);
   2487 	error = biowait(bp);
   2488 	brelse(bp, 0);
   2489 	if (error) {
   2490 #if 1
   2491 		printf("Failed to write RAID component info!\n");
   2492 #endif
   2493 	}
   2494 
   2495 	return(error);
   2496 }
   2497 
   2498 void
   2499 rf_markalldirty(RF_Raid_t *raidPtr)
   2500 {
   2501 	RF_ComponentLabel_t clabel;
   2502 	int sparecol;
   2503 	int c;
   2504 	int j;
   2505 	int scol = -1;
   2506 
   2507 	raidPtr->mod_counter++;
   2508 	for (c = 0; c < raidPtr->numCol; c++) {
   2509 		/* we don't want to touch (at all) a disk that has
   2510 		   failed */
   2511 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   2512 			raidread_component_label(
   2513 						 raidPtr->Disks[c].dev,
   2514 						 raidPtr->raid_cinfo[c].ci_vp,
   2515 						 &clabel);
   2516 			if (clabel.status == rf_ds_spared) {
   2517 				/* XXX do something special...
   2518 				   but whatever you do, don't
   2519 				   try to access it!! */
   2520 			} else {
   2521 				raidmarkdirty(
   2522 					      raidPtr->Disks[c].dev,
   2523 					      raidPtr->raid_cinfo[c].ci_vp,
   2524 					      raidPtr->mod_counter);
   2525 			}
   2526 		}
   2527 	}
   2528 
   2529 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2530 		sparecol = raidPtr->numCol + c;
   2531 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2532 			/*
   2533 
   2534 			   we claim this disk is "optimal" if it's
   2535 			   rf_ds_used_spare, as that means it should be
   2536 			   directly substitutable for the disk it replaced.
   2537 			   We note that too...
   2538 
   2539 			 */
   2540 
   2541 			for(j=0;j<raidPtr->numCol;j++) {
   2542 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2543 					scol = j;
   2544 					break;
   2545 				}
   2546 			}
   2547 
   2548 			raidread_component_label(
   2549 				 raidPtr->Disks[sparecol].dev,
   2550 				 raidPtr->raid_cinfo[sparecol].ci_vp,
   2551 				 &clabel);
   2552 			/* make sure status is noted */
   2553 
   2554 			raid_init_component_label(raidPtr, &clabel);
   2555 
   2556 			clabel.row = 0;
   2557 			clabel.column = scol;
   2558 			/* Note: we *don't* change status from rf_ds_used_spare
   2559 			   to rf_ds_optimal */
   2560 			/* clabel.status = rf_ds_optimal; */
   2561 
   2562 			raidmarkdirty(raidPtr->Disks[sparecol].dev,
   2563 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2564 				      raidPtr->mod_counter);
   2565 		}
   2566 	}
   2567 }
   2568 
   2569 
   2570 void
   2571 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
   2572 {
   2573 	RF_ComponentLabel_t clabel;
   2574 	int sparecol;
   2575 	int c;
   2576 	int j;
   2577 	int scol;
   2578 
   2579 	scol = -1;
   2580 
   2581 	/* XXX should do extra checks to make sure things really are clean,
   2582 	   rather than blindly setting the clean bit... */
   2583 
   2584 	raidPtr->mod_counter++;
   2585 
   2586 	for (c = 0; c < raidPtr->numCol; c++) {
   2587 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   2588 			raidread_component_label(
   2589 						 raidPtr->Disks[c].dev,
   2590 						 raidPtr->raid_cinfo[c].ci_vp,
   2591 						 &clabel);
   2592 			/* make sure status is noted */
   2593 			clabel.status = rf_ds_optimal;
   2594 
   2595 			/* bump the counter */
   2596 			clabel.mod_counter = raidPtr->mod_counter;
   2597 
   2598 			/* note what unit we are configured as */
   2599 			clabel.last_unit = raidPtr->raidid;
   2600 
   2601 			raidwrite_component_label(
   2602 						  raidPtr->Disks[c].dev,
   2603 						  raidPtr->raid_cinfo[c].ci_vp,
   2604 						  &clabel);
   2605 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2606 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2607 					raidmarkclean(
   2608 						      raidPtr->Disks[c].dev,
   2609 						      raidPtr->raid_cinfo[c].ci_vp,
   2610 						      raidPtr->mod_counter);
   2611 				}
   2612 			}
   2613 		}
   2614 		/* else we don't touch it.. */
   2615 	}
   2616 
   2617 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2618 		sparecol = raidPtr->numCol + c;
   2619 		/* Need to ensure that the reconstruct actually completed! */
   2620 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2621 			/*
   2622 
   2623 			   we claim this disk is "optimal" if it's
   2624 			   rf_ds_used_spare, as that means it should be
   2625 			   directly substitutable for the disk it replaced.
   2626 			   We note that too...
   2627 
   2628 			 */
   2629 
   2630 			for(j=0;j<raidPtr->numCol;j++) {
   2631 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2632 					scol = j;
   2633 					break;
   2634 				}
   2635 			}
   2636 
   2637 			/* XXX shouldn't *really* need this... */
   2638 			raidread_component_label(
   2639 				      raidPtr->Disks[sparecol].dev,
   2640 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2641 				      &clabel);
   2642 			/* make sure status is noted */
   2643 
   2644 			raid_init_component_label(raidPtr, &clabel);
   2645 
   2646 			clabel.mod_counter = raidPtr->mod_counter;
   2647 			clabel.column = scol;
   2648 			clabel.status = rf_ds_optimal;
   2649 			clabel.last_unit = raidPtr->raidid;
   2650 
   2651 			raidwrite_component_label(
   2652 				      raidPtr->Disks[sparecol].dev,
   2653 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2654 				      &clabel);
   2655 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2656 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2657 					raidmarkclean( raidPtr->Disks[sparecol].dev,
   2658 						       raidPtr->raid_cinfo[sparecol].ci_vp,
   2659 						       raidPtr->mod_counter);
   2660 				}
   2661 			}
   2662 		}
   2663 	}
   2664 }
   2665 
   2666 void
   2667 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
   2668 {
   2669 
   2670 	if (vp != NULL) {
   2671 		if (auto_configured == 1) {
   2672 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2673 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2674 			vput(vp);
   2675 
   2676 		} else {
   2677 			(void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
   2678 		}
   2679 	}
   2680 }
   2681 
   2682 
   2683 void
   2684 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
   2685 {
   2686 	int r,c;
   2687 	struct vnode *vp;
   2688 	int acd;
   2689 
   2690 
   2691 	/* We take this opportunity to close the vnodes like we should.. */
   2692 
   2693 	for (c = 0; c < raidPtr->numCol; c++) {
   2694 		vp = raidPtr->raid_cinfo[c].ci_vp;
   2695 		acd = raidPtr->Disks[c].auto_configured;
   2696 		rf_close_component(raidPtr, vp, acd);
   2697 		raidPtr->raid_cinfo[c].ci_vp = NULL;
   2698 		raidPtr->Disks[c].auto_configured = 0;
   2699 	}
   2700 
   2701 	for (r = 0; r < raidPtr->numSpare; r++) {
   2702 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
   2703 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
   2704 		rf_close_component(raidPtr, vp, acd);
   2705 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
   2706 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
   2707 	}
   2708 }
   2709 
   2710 
   2711 void
   2712 rf_ReconThread(struct rf_recon_req *req)
   2713 {
   2714 	int     s;
   2715 	RF_Raid_t *raidPtr;
   2716 
   2717 	s = splbio();
   2718 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2719 	raidPtr->recon_in_progress = 1;
   2720 
   2721 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
   2722 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2723 
   2724 	RF_Free(req, sizeof(*req));
   2725 
   2726 	raidPtr->recon_in_progress = 0;
   2727 	splx(s);
   2728 
   2729 	/* That's all... */
   2730 	kthread_exit(0);	/* does not return */
   2731 }
   2732 
   2733 void
   2734 rf_RewriteParityThread(RF_Raid_t *raidPtr)
   2735 {
   2736 	int retcode;
   2737 	int s;
   2738 
   2739 	raidPtr->parity_rewrite_stripes_done = 0;
   2740 	raidPtr->parity_rewrite_in_progress = 1;
   2741 	s = splbio();
   2742 	retcode = rf_RewriteParity(raidPtr);
   2743 	splx(s);
   2744 	if (retcode) {
   2745 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2746 	} else {
   2747 		/* set the clean bit!  If we shutdown correctly,
   2748 		   the clean bit on each component label will get
   2749 		   set */
   2750 		raidPtr->parity_good = RF_RAID_CLEAN;
   2751 	}
   2752 	raidPtr->parity_rewrite_in_progress = 0;
   2753 
   2754 	/* Anyone waiting for us to stop?  If so, inform them... */
   2755 	if (raidPtr->waitShutdown) {
   2756 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2757 	}
   2758 
   2759 	/* That's all... */
   2760 	kthread_exit(0);	/* does not return */
   2761 }
   2762 
   2763 
   2764 void
   2765 rf_CopybackThread(RF_Raid_t *raidPtr)
   2766 {
   2767 	int s;
   2768 
   2769 	raidPtr->copyback_in_progress = 1;
   2770 	s = splbio();
   2771 	rf_CopybackReconstructedData(raidPtr);
   2772 	splx(s);
   2773 	raidPtr->copyback_in_progress = 0;
   2774 
   2775 	/* That's all... */
   2776 	kthread_exit(0);	/* does not return */
   2777 }
   2778 
   2779 
   2780 void
   2781 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
   2782 {
   2783 	int s;
   2784 	RF_Raid_t *raidPtr;
   2785 
   2786 	s = splbio();
   2787 	raidPtr = req->raidPtr;
   2788 	raidPtr->recon_in_progress = 1;
   2789 	rf_ReconstructInPlace(raidPtr, req->col);
   2790 	RF_Free(req, sizeof(*req));
   2791 	raidPtr->recon_in_progress = 0;
   2792 	splx(s);
   2793 
   2794 	/* That's all... */
   2795 	kthread_exit(0);	/* does not return */
   2796 }
   2797 
   2798 static RF_AutoConfig_t *
   2799 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
   2800     const char *cname, RF_SectorCount_t size)
   2801 {
   2802 	int good_one = 0;
   2803 	RF_ComponentLabel_t *clabel;
   2804 	RF_AutoConfig_t *ac;
   2805 
   2806 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
   2807 	if (clabel == NULL) {
   2808 oomem:
   2809 		    while(ac_list) {
   2810 			    ac = ac_list;
   2811 			    if (ac->clabel)
   2812 				    free(ac->clabel, M_RAIDFRAME);
   2813 			    ac_list = ac_list->next;
   2814 			    free(ac, M_RAIDFRAME);
   2815 		    }
   2816 		    printf("RAID auto config: out of memory!\n");
   2817 		    return NULL; /* XXX probably should panic? */
   2818 	}
   2819 
   2820 	if (!raidread_component_label(dev, vp, clabel)) {
   2821 		    /* Got the label.  Does it look reasonable? */
   2822 		    if (rf_reasonable_label(clabel) &&
   2823 			(clabel->partitionSize <= size)) {
   2824 #ifdef DEBUG
   2825 			    printf("Component on: %s: %llu\n",
   2826 				cname, (unsigned long long)size);
   2827 			    rf_print_component_label(clabel);
   2828 #endif
   2829 			    /* if it's reasonable, add it, else ignore it. */
   2830 			    ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
   2831 				M_NOWAIT);
   2832 			    if (ac == NULL) {
   2833 				    free(clabel, M_RAIDFRAME);
   2834 				    goto oomem;
   2835 			    }
   2836 			    strlcpy(ac->devname, cname, sizeof(ac->devname));
   2837 			    ac->dev = dev;
   2838 			    ac->vp = vp;
   2839 			    ac->clabel = clabel;
   2840 			    ac->next = ac_list;
   2841 			    ac_list = ac;
   2842 			    good_one = 1;
   2843 		    }
   2844 	}
   2845 	if (!good_one) {
   2846 		/* cleanup */
   2847 		free(clabel, M_RAIDFRAME);
   2848 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2849 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2850 		vput(vp);
   2851 	}
   2852 	return ac_list;
   2853 }
   2854 
   2855 RF_AutoConfig_t *
   2856 rf_find_raid_components()
   2857 {
   2858 	struct vnode *vp;
   2859 	struct disklabel label;
   2860 	struct device *dv;
   2861 	dev_t dev;
   2862 	int bmajor, bminor, wedge;
   2863 	int error;
   2864 	int i;
   2865 	RF_AutoConfig_t *ac_list;
   2866 
   2867 
   2868 	/* initialize the AutoConfig list */
   2869 	ac_list = NULL;
   2870 
   2871 	/* we begin by trolling through *all* the devices on the system */
   2872 
   2873 	for (dv = alldevs.tqh_first; dv != NULL;
   2874 	     dv = dv->dv_list.tqe_next) {
   2875 
   2876 		/* we are only interested in disks... */
   2877 		if (device_class(dv) != DV_DISK)
   2878 			continue;
   2879 
   2880 		/* we don't care about floppies... */
   2881 		if (device_is_a(dv, "fd")) {
   2882 			continue;
   2883 		}
   2884 
   2885 		/* we don't care about CD's... */
   2886 		if (device_is_a(dv, "cd")) {
   2887 			continue;
   2888 		}
   2889 
   2890 		/* we don't care about md's... */
   2891 		if (device_is_a(dv, "md")) {
   2892 			continue;
   2893 		}
   2894 
   2895 		/* hdfd is the Atari/Hades floppy driver */
   2896 		if (device_is_a(dv, "hdfd")) {
   2897 			continue;
   2898 		}
   2899 
   2900 		/* fdisa is the Atari/Milan floppy driver */
   2901 		if (device_is_a(dv, "fdisa")) {
   2902 			continue;
   2903 		}
   2904 
   2905 		/* need to find the device_name_to_block_device_major stuff */
   2906 		bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
   2907 
   2908 		/* get a vnode for the raw partition of this disk */
   2909 
   2910 		wedge = device_is_a(dv, "dk");
   2911 		bminor = minor(device_unit(dv));
   2912 		dev = wedge ? makedev(bmajor, bminor) :
   2913 		    MAKEDISKDEV(bmajor, bminor, RAW_PART);
   2914 		if (bdevvp(dev, &vp))
   2915 			panic("RAID can't alloc vnode");
   2916 
   2917 		error = VOP_OPEN(vp, FREAD, NOCRED);
   2918 
   2919 		if (error) {
   2920 			/* "Who cares."  Continue looking
   2921 			   for something that exists*/
   2922 			vput(vp);
   2923 			continue;
   2924 		}
   2925 
   2926 		if (wedge) {
   2927 			struct dkwedge_info dkw;
   2928 			error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
   2929 			    NOCRED);
   2930 			if (error) {
   2931 				printf("RAIDframe: can't get wedge info for "
   2932 				    "dev %s (%d)\n", device_xname(dv), error);
   2933 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2934 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2935 				vput(vp);
   2936 				continue;
   2937 			}
   2938 
   2939 			if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
   2940 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2941 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2942 				vput(vp);
   2943 				continue;
   2944 			}
   2945 
   2946 			ac_list = rf_get_component(ac_list, dev, vp,
   2947 			    device_xname(dv), dkw.dkw_size);
   2948 			continue;
   2949 		}
   2950 
   2951 		/* Ok, the disk exists.  Go get the disklabel. */
   2952 		error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
   2953 		if (error) {
   2954 			/*
   2955 			 * XXX can't happen - open() would
   2956 			 * have errored out (or faked up one)
   2957 			 */
   2958 			if (error != ENOTTY)
   2959 				printf("RAIDframe: can't get label for dev "
   2960 				    "%s (%d)\n", device_xname(dv), error);
   2961 		}
   2962 
   2963 		/* don't need this any more.  We'll allocate it again
   2964 		   a little later if we really do... */
   2965 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2966 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2967 		vput(vp);
   2968 
   2969 		if (error)
   2970 			continue;
   2971 
   2972 		for (i = 0; i < label.d_npartitions; i++) {
   2973 			char cname[sizeof(ac_list->devname)];
   2974 
   2975 			/* We only support partitions marked as RAID */
   2976 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2977 				continue;
   2978 
   2979 			dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
   2980 			if (bdevvp(dev, &vp))
   2981 				panic("RAID can't alloc vnode");
   2982 
   2983 			error = VOP_OPEN(vp, FREAD, NOCRED);
   2984 			if (error) {
   2985 				/* Whatever... */
   2986 				vput(vp);
   2987 				continue;
   2988 			}
   2989 			snprintf(cname, sizeof(cname), "%s%c",
   2990 			    device_xname(dv), 'a' + i);
   2991 			ac_list = rf_get_component(ac_list, dev, vp, cname,
   2992 				label.d_partitions[i].p_size);
   2993 		}
   2994 	}
   2995 	return ac_list;
   2996 }
   2997 
   2998 
   2999 static int
   3000 rf_reasonable_label(RF_ComponentLabel_t *clabel)
   3001 {
   3002 
   3003 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   3004 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   3005 	    ((clabel->clean == RF_RAID_CLEAN) ||
   3006 	     (clabel->clean == RF_RAID_DIRTY)) &&
   3007 	    clabel->row >=0 &&
   3008 	    clabel->column >= 0 &&
   3009 	    clabel->num_rows > 0 &&
   3010 	    clabel->num_columns > 0 &&
   3011 	    clabel->row < clabel->num_rows &&
   3012 	    clabel->column < clabel->num_columns &&
   3013 	    clabel->blockSize > 0 &&
   3014 	    clabel->numBlocks > 0) {
   3015 		/* label looks reasonable enough... */
   3016 		return(1);
   3017 	}
   3018 	return(0);
   3019 }
   3020 
   3021 
   3022 #ifdef DEBUG
   3023 void
   3024 rf_print_component_label(RF_ComponentLabel_t *clabel)
   3025 {
   3026 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   3027 	       clabel->row, clabel->column,
   3028 	       clabel->num_rows, clabel->num_columns);
   3029 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   3030 	       clabel->version, clabel->serial_number,
   3031 	       clabel->mod_counter);
   3032 	printf("   Clean: %s Status: %d\n",
   3033 	       clabel->clean ? "Yes" : "No", clabel->status );
   3034 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   3035 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   3036 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   3037 	       (char) clabel->parityConfig, clabel->blockSize,
   3038 	       clabel->numBlocks);
   3039 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   3040 	printf("   Contains root partition: %s\n",
   3041 	       clabel->root_partition ? "Yes" : "No" );
   3042 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   3043 #if 0
   3044 	   printf("   Config order: %d\n", clabel->config_order);
   3045 #endif
   3046 
   3047 }
   3048 #endif
   3049 
   3050 RF_ConfigSet_t *
   3051 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
   3052 {
   3053 	RF_AutoConfig_t *ac;
   3054 	RF_ConfigSet_t *config_sets;
   3055 	RF_ConfigSet_t *cset;
   3056 	RF_AutoConfig_t *ac_next;
   3057 
   3058 
   3059 	config_sets = NULL;
   3060 
   3061 	/* Go through the AutoConfig list, and figure out which components
   3062 	   belong to what sets.  */
   3063 	ac = ac_list;
   3064 	while(ac!=NULL) {
   3065 		/* we're going to putz with ac->next, so save it here
   3066 		   for use at the end of the loop */
   3067 		ac_next = ac->next;
   3068 
   3069 		if (config_sets == NULL) {
   3070 			/* will need at least this one... */
   3071 			config_sets = (RF_ConfigSet_t *)
   3072 				malloc(sizeof(RF_ConfigSet_t),
   3073 				       M_RAIDFRAME, M_NOWAIT);
   3074 			if (config_sets == NULL) {
   3075 				panic("rf_create_auto_sets: No memory!");
   3076 			}
   3077 			/* this one is easy :) */
   3078 			config_sets->ac = ac;
   3079 			config_sets->next = NULL;
   3080 			config_sets->rootable = 0;
   3081 			ac->next = NULL;
   3082 		} else {
   3083 			/* which set does this component fit into? */
   3084 			cset = config_sets;
   3085 			while(cset!=NULL) {
   3086 				if (rf_does_it_fit(cset, ac)) {
   3087 					/* looks like it matches... */
   3088 					ac->next = cset->ac;
   3089 					cset->ac = ac;
   3090 					break;
   3091 				}
   3092 				cset = cset->next;
   3093 			}
   3094 			if (cset==NULL) {
   3095 				/* didn't find a match above... new set..*/
   3096 				cset = (RF_ConfigSet_t *)
   3097 					malloc(sizeof(RF_ConfigSet_t),
   3098 					       M_RAIDFRAME, M_NOWAIT);
   3099 				if (cset == NULL) {
   3100 					panic("rf_create_auto_sets: No memory!");
   3101 				}
   3102 				cset->ac = ac;
   3103 				ac->next = NULL;
   3104 				cset->next = config_sets;
   3105 				cset->rootable = 0;
   3106 				config_sets = cset;
   3107 			}
   3108 		}
   3109 		ac = ac_next;
   3110 	}
   3111 
   3112 
   3113 	return(config_sets);
   3114 }
   3115 
   3116 static int
   3117 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
   3118 {
   3119 	RF_ComponentLabel_t *clabel1, *clabel2;
   3120 
   3121 	/* If this one matches the *first* one in the set, that's good
   3122 	   enough, since the other members of the set would have been
   3123 	   through here too... */
   3124 	/* note that we are not checking partitionSize here..
   3125 
   3126 	   Note that we are also not checking the mod_counters here.
   3127 	   If everything else matches execpt the mod_counter, that's
   3128 	   good enough for this test.  We will deal with the mod_counters
   3129 	   a little later in the autoconfiguration process.
   3130 
   3131 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   3132 
   3133 	   The reason we don't check for this is that failed disks
   3134 	   will have lower modification counts.  If those disks are
   3135 	   not added to the set they used to belong to, then they will
   3136 	   form their own set, which may result in 2 different sets,
   3137 	   for example, competing to be configured at raid0, and
   3138 	   perhaps competing to be the root filesystem set.  If the
   3139 	   wrong ones get configured, or both attempt to become /,
   3140 	   weird behaviour and or serious lossage will occur.  Thus we
   3141 	   need to bring them into the fold here, and kick them out at
   3142 	   a later point.
   3143 
   3144 	*/
   3145 
   3146 	clabel1 = cset->ac->clabel;
   3147 	clabel2 = ac->clabel;
   3148 	if ((clabel1->version == clabel2->version) &&
   3149 	    (clabel1->serial_number == clabel2->serial_number) &&
   3150 	    (clabel1->num_rows == clabel2->num_rows) &&
   3151 	    (clabel1->num_columns == clabel2->num_columns) &&
   3152 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   3153 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   3154 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   3155 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   3156 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   3157 	    (clabel1->blockSize == clabel2->blockSize) &&
   3158 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   3159 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   3160 	    (clabel1->root_partition == clabel2->root_partition) &&
   3161 	    (clabel1->last_unit == clabel2->last_unit) &&
   3162 	    (clabel1->config_order == clabel2->config_order)) {
   3163 		/* if it get's here, it almost *has* to be a match */
   3164 	} else {
   3165 		/* it's not consistent with somebody in the set..
   3166 		   punt */
   3167 		return(0);
   3168 	}
   3169 	/* all was fine.. it must fit... */
   3170 	return(1);
   3171 }
   3172 
   3173 int
   3174 rf_have_enough_components(RF_ConfigSet_t *cset)
   3175 {
   3176 	RF_AutoConfig_t *ac;
   3177 	RF_AutoConfig_t *auto_config;
   3178 	RF_ComponentLabel_t *clabel;
   3179 	int c;
   3180 	int num_cols;
   3181 	int num_missing;
   3182 	int mod_counter;
   3183 	int mod_counter_found;
   3184 	int even_pair_failed;
   3185 	char parity_type;
   3186 
   3187 
   3188 	/* check to see that we have enough 'live' components
   3189 	   of this set.  If so, we can configure it if necessary */
   3190 
   3191 	num_cols = cset->ac->clabel->num_columns;
   3192 	parity_type = cset->ac->clabel->parityConfig;
   3193 
   3194 	/* XXX Check for duplicate components!?!?!? */
   3195 
   3196 	/* Determine what the mod_counter is supposed to be for this set. */
   3197 
   3198 	mod_counter_found = 0;
   3199 	mod_counter = 0;
   3200 	ac = cset->ac;
   3201 	while(ac!=NULL) {
   3202 		if (mod_counter_found==0) {
   3203 			mod_counter = ac->clabel->mod_counter;
   3204 			mod_counter_found = 1;
   3205 		} else {
   3206 			if (ac->clabel->mod_counter > mod_counter) {
   3207 				mod_counter = ac->clabel->mod_counter;
   3208 			}
   3209 		}
   3210 		ac = ac->next;
   3211 	}
   3212 
   3213 	num_missing = 0;
   3214 	auto_config = cset->ac;
   3215 
   3216 	even_pair_failed = 0;
   3217 	for(c=0; c<num_cols; c++) {
   3218 		ac = auto_config;
   3219 		while(ac!=NULL) {
   3220 			if ((ac->clabel->column == c) &&
   3221 			    (ac->clabel->mod_counter == mod_counter)) {
   3222 				/* it's this one... */
   3223 #ifdef DEBUG
   3224 				printf("Found: %s at %d\n",
   3225 				       ac->devname,c);
   3226 #endif
   3227 				break;
   3228 			}
   3229 			ac=ac->next;
   3230 		}
   3231 		if (ac==NULL) {
   3232 				/* Didn't find one here! */
   3233 				/* special case for RAID 1, especially
   3234 				   where there are more than 2
   3235 				   components (where RAIDframe treats
   3236 				   things a little differently :( ) */
   3237 			if (parity_type == '1') {
   3238 				if (c%2 == 0) { /* even component */
   3239 					even_pair_failed = 1;
   3240 				} else { /* odd component.  If
   3241 					    we're failed, and
   3242 					    so is the even
   3243 					    component, it's
   3244 					    "Good Night, Charlie" */
   3245 					if (even_pair_failed == 1) {
   3246 						return(0);
   3247 					}
   3248 				}
   3249 			} else {
   3250 				/* normal accounting */
   3251 				num_missing++;
   3252 			}
   3253 		}
   3254 		if ((parity_type == '1') && (c%2 == 1)) {
   3255 				/* Just did an even component, and we didn't
   3256 				   bail.. reset the even_pair_failed flag,
   3257 				   and go on to the next component.... */
   3258 			even_pair_failed = 0;
   3259 		}
   3260 	}
   3261 
   3262 	clabel = cset->ac->clabel;
   3263 
   3264 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3265 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3266 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3267 		/* XXX this needs to be made *much* more general */
   3268 		/* Too many failures */
   3269 		return(0);
   3270 	}
   3271 	/* otherwise, all is well, and we've got enough to take a kick
   3272 	   at autoconfiguring this set */
   3273 	return(1);
   3274 }
   3275 
   3276 void
   3277 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
   3278 			RF_Raid_t *raidPtr)
   3279 {
   3280 	RF_ComponentLabel_t *clabel;
   3281 	int i;
   3282 
   3283 	clabel = ac->clabel;
   3284 
   3285 	/* 1. Fill in the common stuff */
   3286 	config->numRow = clabel->num_rows = 1;
   3287 	config->numCol = clabel->num_columns;
   3288 	config->numSpare = 0; /* XXX should this be set here? */
   3289 	config->sectPerSU = clabel->sectPerSU;
   3290 	config->SUsPerPU = clabel->SUsPerPU;
   3291 	config->SUsPerRU = clabel->SUsPerRU;
   3292 	config->parityConfig = clabel->parityConfig;
   3293 	/* XXX... */
   3294 	strcpy(config->diskQueueType,"fifo");
   3295 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3296 	config->layoutSpecificSize = 0; /* XXX ?? */
   3297 
   3298 	while(ac!=NULL) {
   3299 		/* row/col values will be in range due to the checks
   3300 		   in reasonable_label() */
   3301 		strcpy(config->devnames[0][ac->clabel->column],
   3302 		       ac->devname);
   3303 		ac = ac->next;
   3304 	}
   3305 
   3306 	for(i=0;i<RF_MAXDBGV;i++) {
   3307 		config->debugVars[i][0] = 0;
   3308 	}
   3309 }
   3310 
   3311 int
   3312 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
   3313 {
   3314 	RF_ComponentLabel_t clabel;
   3315 	struct vnode *vp;
   3316 	dev_t dev;
   3317 	int column;
   3318 	int sparecol;
   3319 
   3320 	raidPtr->autoconfigure = new_value;
   3321 
   3322 	for(column=0; column<raidPtr->numCol; column++) {
   3323 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3324 			dev = raidPtr->Disks[column].dev;
   3325 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3326 			raidread_component_label(dev, vp, &clabel);
   3327 			clabel.autoconfigure = new_value;
   3328 			raidwrite_component_label(dev, vp, &clabel);
   3329 		}
   3330 	}
   3331 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3332 		sparecol = raidPtr->numCol + column;
   3333 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3334 			dev = raidPtr->Disks[sparecol].dev;
   3335 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3336 			raidread_component_label(dev, vp, &clabel);
   3337 			clabel.autoconfigure = new_value;
   3338 			raidwrite_component_label(dev, vp, &clabel);
   3339 		}
   3340 	}
   3341 	return(new_value);
   3342 }
   3343 
   3344 int
   3345 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
   3346 {
   3347 	RF_ComponentLabel_t clabel;
   3348 	struct vnode *vp;
   3349 	dev_t dev;
   3350 	int column;
   3351 	int sparecol;
   3352 
   3353 	raidPtr->root_partition = new_value;
   3354 	for(column=0; column<raidPtr->numCol; column++) {
   3355 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3356 			dev = raidPtr->Disks[column].dev;
   3357 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3358 			raidread_component_label(dev, vp, &clabel);
   3359 			clabel.root_partition = new_value;
   3360 			raidwrite_component_label(dev, vp, &clabel);
   3361 		}
   3362 	}
   3363 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3364 		sparecol = raidPtr->numCol + column;
   3365 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3366 			dev = raidPtr->Disks[sparecol].dev;
   3367 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3368 			raidread_component_label(dev, vp, &clabel);
   3369 			clabel.root_partition = new_value;
   3370 			raidwrite_component_label(dev, vp, &clabel);
   3371 		}
   3372 	}
   3373 	return(new_value);
   3374 }
   3375 
   3376 void
   3377 rf_release_all_vps(RF_ConfigSet_t *cset)
   3378 {
   3379 	RF_AutoConfig_t *ac;
   3380 
   3381 	ac = cset->ac;
   3382 	while(ac!=NULL) {
   3383 		/* Close the vp, and give it back */
   3384 		if (ac->vp) {
   3385 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3386 			VOP_CLOSE(ac->vp, FREAD, NOCRED);
   3387 			vput(ac->vp);
   3388 			ac->vp = NULL;
   3389 		}
   3390 		ac = ac->next;
   3391 	}
   3392 }
   3393 
   3394 
   3395 void
   3396 rf_cleanup_config_set(RF_ConfigSet_t *cset)
   3397 {
   3398 	RF_AutoConfig_t *ac;
   3399 	RF_AutoConfig_t *next_ac;
   3400 
   3401 	ac = cset->ac;
   3402 	while(ac!=NULL) {
   3403 		next_ac = ac->next;
   3404 		/* nuke the label */
   3405 		free(ac->clabel, M_RAIDFRAME);
   3406 		/* cleanup the config structure */
   3407 		free(ac, M_RAIDFRAME);
   3408 		/* "next.." */
   3409 		ac = next_ac;
   3410 	}
   3411 	/* and, finally, nuke the config set */
   3412 	free(cset, M_RAIDFRAME);
   3413 }
   3414 
   3415 
   3416 void
   3417 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   3418 {
   3419 	/* current version number */
   3420 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3421 	clabel->serial_number = raidPtr->serial_number;
   3422 	clabel->mod_counter = raidPtr->mod_counter;
   3423 	clabel->num_rows = 1;
   3424 	clabel->num_columns = raidPtr->numCol;
   3425 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3426 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3427 
   3428 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3429 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3430 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3431 
   3432 	clabel->blockSize = raidPtr->bytesPerSector;
   3433 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3434 
   3435 	/* XXX not portable */
   3436 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3437 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3438 	clabel->autoconfigure = raidPtr->autoconfigure;
   3439 	clabel->root_partition = raidPtr->root_partition;
   3440 	clabel->last_unit = raidPtr->raidid;
   3441 	clabel->config_order = raidPtr->config_order;
   3442 }
   3443 
   3444 int
   3445 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
   3446 {
   3447 	RF_Raid_t *raidPtr;
   3448 	RF_Config_t *config;
   3449 	int raidID;
   3450 	int retcode;
   3451 
   3452 #ifdef DEBUG
   3453 	printf("RAID autoconfigure\n");
   3454 #endif
   3455 
   3456 	retcode = 0;
   3457 	*unit = -1;
   3458 
   3459 	/* 1. Create a config structure */
   3460 
   3461 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3462 				       M_RAIDFRAME,
   3463 				       M_NOWAIT);
   3464 	if (config==NULL) {
   3465 		printf("Out of mem!?!?\n");
   3466 				/* XXX do something more intelligent here. */
   3467 		return(1);
   3468 	}
   3469 
   3470 	memset(config, 0, sizeof(RF_Config_t));
   3471 
   3472 	/*
   3473 	   2. Figure out what RAID ID this one is supposed to live at
   3474 	   See if we can get the same RAID dev that it was configured
   3475 	   on last time..
   3476 	*/
   3477 
   3478 	raidID = cset->ac->clabel->last_unit;
   3479 	if ((raidID < 0) || (raidID >= numraid)) {
   3480 		/* let's not wander off into lala land. */
   3481 		raidID = numraid - 1;
   3482 	}
   3483 	if (raidPtrs[raidID]->valid != 0) {
   3484 
   3485 		/*
   3486 		   Nope... Go looking for an alternative...
   3487 		   Start high so we don't immediately use raid0 if that's
   3488 		   not taken.
   3489 		*/
   3490 
   3491 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3492 			if (raidPtrs[raidID]->valid == 0) {
   3493 				/* can use this one! */
   3494 				break;
   3495 			}
   3496 		}
   3497 	}
   3498 
   3499 	if (raidID < 0) {
   3500 		/* punt... */
   3501 		printf("Unable to auto configure this set!\n");
   3502 		printf("(Out of RAID devs!)\n");
   3503 		free(config, M_RAIDFRAME);
   3504 		return(1);
   3505 	}
   3506 
   3507 #ifdef DEBUG
   3508 	printf("Configuring raid%d:\n",raidID);
   3509 #endif
   3510 
   3511 	raidPtr = raidPtrs[raidID];
   3512 
   3513 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3514 	raidPtr->raidid = raidID;
   3515 	raidPtr->openings = RAIDOUTSTANDING;
   3516 
   3517 	/* 3. Build the configuration structure */
   3518 	rf_create_configuration(cset->ac, config, raidPtr);
   3519 
   3520 	/* 4. Do the configuration */
   3521 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3522 
   3523 	if (retcode == 0) {
   3524 
   3525 		raidinit(raidPtrs[raidID]);
   3526 
   3527 		rf_markalldirty(raidPtrs[raidID]);
   3528 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3529 		if (cset->ac->clabel->root_partition==1) {
   3530 			/* everything configured just fine.  Make a note
   3531 			   that this set is eligible to be root. */
   3532 			cset->rootable = 1;
   3533 			/* XXX do this here? */
   3534 			raidPtrs[raidID]->root_partition = 1;
   3535 		}
   3536 	}
   3537 
   3538 	/* 5. Cleanup */
   3539 	free(config, M_RAIDFRAME);
   3540 
   3541 	*unit = raidID;
   3542 	return(retcode);
   3543 }
   3544 
   3545 void
   3546 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
   3547 {
   3548 	struct buf *bp;
   3549 
   3550 	bp = (struct buf *)desc->bp;
   3551 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3552 	    (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
   3553 }
   3554 
   3555 void
   3556 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
   3557 	     size_t xmin, size_t xmax)
   3558 {
   3559 	pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
   3560 	pool_sethiwat(p, xmax);
   3561 	pool_prime(p, xmin);
   3562 	pool_setlowat(p, xmin);
   3563 }
   3564 
   3565 /*
   3566  * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
   3567  * if there is IO pending and if that IO could possibly be done for a
   3568  * given RAID set.  Returns 0 if IO is waiting and can be done, 1
   3569  * otherwise.
   3570  *
   3571  */
   3572 
   3573 int
   3574 rf_buf_queue_check(int raidid)
   3575 {
   3576 	if ((bufq_peek(raid_softc[raidid].buf_queue) != NULL) &&
   3577 	    raidPtrs[raidid]->openings > 0) {
   3578 		/* there is work to do */
   3579 		return 0;
   3580 	}
   3581 	/* default is nothing to do */
   3582 	return 1;
   3583 }
   3584 
   3585 int
   3586 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
   3587 {
   3588 	struct partinfo dpart;
   3589 	struct dkwedge_info dkw;
   3590 	int error;
   3591 
   3592 	error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred);
   3593 	if (error == 0) {
   3594 		diskPtr->blockSize = dpart.disklab->d_secsize;
   3595 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
   3596 		diskPtr->partitionSize = dpart.part->p_size;
   3597 		return 0;
   3598 	}
   3599 
   3600 	error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred);
   3601 	if (error == 0) {
   3602 		diskPtr->blockSize = 512;	/* XXX */
   3603 		diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
   3604 		diskPtr->partitionSize = dkw.dkw_size;
   3605 		return 0;
   3606 	}
   3607 	return error;
   3608 }
   3609 
   3610 static int
   3611 raid_match(struct device *self, struct cfdata *cfdata,
   3612     void *aux)
   3613 {
   3614 	return 1;
   3615 }
   3616 
   3617 static void
   3618 raid_attach(struct device *parent, struct device *self,
   3619     void *aux)
   3620 {
   3621 
   3622 }
   3623 
   3624 
   3625 static int
   3626 raid_detach(struct device *self, int flags)
   3627 {
   3628 	struct raid_softc *rs = (struct raid_softc *)self;
   3629 
   3630 	if (rs->sc_flags & RAIDF_INITED)
   3631 		return EBUSY;
   3632 
   3633 	return 0;
   3634 }
   3635 
   3636 static void
   3637 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
   3638 {
   3639 	prop_dictionary_t disk_info, odisk_info, geom;
   3640 	disk_info = prop_dictionary_create();
   3641 	geom = prop_dictionary_create();
   3642 	prop_dictionary_set_uint64(geom, "sectors-per-unit",
   3643 				   raidPtr->totalSectors);
   3644 	prop_dictionary_set_uint32(geom, "sector-size",
   3645 				   raidPtr->bytesPerSector);
   3646 
   3647 	prop_dictionary_set_uint16(geom, "sectors-per-track",
   3648 				   raidPtr->Layout.dataSectorsPerStripe);
   3649 	prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
   3650 				   4 * raidPtr->numCol);
   3651 
   3652 	prop_dictionary_set_uint64(geom, "cylinders-per-unit",
   3653 	   raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
   3654 	   (4 * raidPtr->numCol)));
   3655 
   3656 	prop_dictionary_set(disk_info, "geometry", geom);
   3657 	prop_object_release(geom);
   3658 	prop_dictionary_set(device_properties(rs->sc_dev),
   3659 			    "disk-info", disk_info);
   3660 	odisk_info = rs->sc_dkdev.dk_info;
   3661 	rs->sc_dkdev.dk_info = disk_info;
   3662 	if (odisk_info)
   3663 		prop_object_release(odisk_info);
   3664 }
   3665 
   3666 /*
   3667  * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
   3668  * We end up returning whatever error was returned by the first cache flush
   3669  * that fails.
   3670  */
   3671 
   3672 static int
   3673 rf_sync_component_caches(RF_Raid_t *raidPtr)
   3674 {
   3675 	int c, sparecol;
   3676 	int e,error;
   3677 	int force = 1;
   3678 
   3679 	error = 0;
   3680 	for (c = 0; c < raidPtr->numCol; c++) {
   3681 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   3682 			e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
   3683 					  &force, FWRITE, NOCRED);
   3684 			if (e) {
   3685 				if (e != ENODEV)
   3686 					printf("raid%d: cache flush to component %s failed.\n",
   3687 					       raidPtr->raidid, raidPtr->Disks[c].devname);
   3688 				if (error == 0) {
   3689 					error = e;
   3690 				}
   3691 			}
   3692 		}
   3693 	}
   3694 
   3695 	for( c = 0; c < raidPtr->numSpare ; c++) {
   3696 		sparecol = raidPtr->numCol + c;
   3697 		/* Need to ensure that the reconstruct actually completed! */
   3698 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3699 			e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
   3700 					  DIOCCACHESYNC, &force, FWRITE, NOCRED);
   3701 			if (e) {
   3702 				if (e != ENODEV)
   3703 					printf("raid%d: cache flush to component %s failed.\n",
   3704 					       raidPtr->raidid, raidPtr->Disks[sparecol].devname);
   3705 				if (error == 0) {
   3706 					error = e;
   3707 				}
   3708 			}
   3709 		}
   3710 	}
   3711 	return error;
   3712 }
   3713