Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.250.4.6.2.1
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.250.4.6.2.1 2014/11/20 15:52:10 sborrill Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 /*
     32  * Copyright (c) 1990, 1993
     33  *      The Regents of the University of California.  All rights reserved.
     34  *
     35  * This code is derived from software contributed to Berkeley by
     36  * the Systems Programming Group of the University of Utah Computer
     37  * Science Department.
     38  *
     39  * Redistribution and use in source and binary forms, with or without
     40  * modification, are permitted provided that the following conditions
     41  * are met:
     42  * 1. Redistributions of source code must retain the above copyright
     43  *    notice, this list of conditions and the following disclaimer.
     44  * 2. Redistributions in binary form must reproduce the above copyright
     45  *    notice, this list of conditions and the following disclaimer in the
     46  *    documentation and/or other materials provided with the distribution.
     47  * 3. Neither the name of the University nor the names of its contributors
     48  *    may be used to endorse or promote products derived from this software
     49  *    without specific prior written permission.
     50  *
     51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     61  * SUCH DAMAGE.
     62  *
     63  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     64  *
     65  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     66  */
     67 
     68 /*
     69  * Copyright (c) 1988 University of Utah.
     70  *
     71  * This code is derived from software contributed to Berkeley by
     72  * the Systems Programming Group of the University of Utah Computer
     73  * Science Department.
     74  *
     75  * Redistribution and use in source and binary forms, with or without
     76  * modification, are permitted provided that the following conditions
     77  * are met:
     78  * 1. Redistributions of source code must retain the above copyright
     79  *    notice, this list of conditions and the following disclaimer.
     80  * 2. Redistributions in binary form must reproduce the above copyright
     81  *    notice, this list of conditions and the following disclaimer in the
     82  *    documentation and/or other materials provided with the distribution.
     83  * 3. All advertising materials mentioning features or use of this software
     84  *    must display the following acknowledgement:
     85  *      This product includes software developed by the University of
     86  *      California, Berkeley and its contributors.
     87  * 4. Neither the name of the University nor the names of its contributors
     88  *    may be used to endorse or promote products derived from this software
     89  *    without specific prior written permission.
     90  *
     91  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     92  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     93  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     94  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     95  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     96  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     97  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     98  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     99  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    100  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    101  * SUCH DAMAGE.
    102  *
    103  * from: Utah $Hdr: cd.c 1.6 90/11/28$
    104  *
    105  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
    106  */
    107 
    108 /*
    109  * Copyright (c) 1995 Carnegie-Mellon University.
    110  * All rights reserved.
    111  *
    112  * Authors: Mark Holland, Jim Zelenka
    113  *
    114  * Permission to use, copy, modify and distribute this software and
    115  * its documentation is hereby granted, provided that both the copyright
    116  * notice and this permission notice appear in all copies of the
    117  * software, derivative works or modified versions, and any portions
    118  * thereof, and that both notices appear in supporting documentation.
    119  *
    120  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
    121  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
    122  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
    123  *
    124  * Carnegie Mellon requests users of this software to return to
    125  *
    126  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    127  *  School of Computer Science
    128  *  Carnegie Mellon University
    129  *  Pittsburgh PA 15213-3890
    130  *
    131  * any improvements or extensions that they make and grant Carnegie the
    132  * rights to redistribute these changes.
    133  */
    134 
    135 /***********************************************************
    136  *
    137  * rf_kintf.c -- the kernel interface routines for RAIDframe
    138  *
    139  ***********************************************************/
    140 
    141 #include <sys/cdefs.h>
    142 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.250.4.6.2.1 2014/11/20 15:52:10 sborrill Exp $");
    143 
    144 #include <sys/param.h>
    145 #include <sys/errno.h>
    146 #include <sys/pool.h>
    147 #include <sys/proc.h>
    148 #include <sys/queue.h>
    149 #include <sys/disk.h>
    150 #include <sys/device.h>
    151 #include <sys/stat.h>
    152 #include <sys/ioctl.h>
    153 #include <sys/fcntl.h>
    154 #include <sys/systm.h>
    155 #include <sys/vnode.h>
    156 #include <sys/disklabel.h>
    157 #include <sys/conf.h>
    158 #include <sys/buf.h>
    159 #include <sys/bufq.h>
    160 #include <sys/user.h>
    161 #include <sys/reboot.h>
    162 #include <sys/kauth.h>
    163 
    164 #include <prop/proplib.h>
    165 
    166 #include <dev/raidframe/raidframevar.h>
    167 #include <dev/raidframe/raidframeio.h>
    168 #include <dev/raidframe/rf_paritymap.h>
    169 #include "raid.h"
    170 #include "opt_raid_autoconfig.h"
    171 #include "rf_raid.h"
    172 #include "rf_copyback.h"
    173 #include "rf_dag.h"
    174 #include "rf_dagflags.h"
    175 #include "rf_desc.h"
    176 #include "rf_diskqueue.h"
    177 #include "rf_etimer.h"
    178 #include "rf_general.h"
    179 #include "rf_kintf.h"
    180 #include "rf_options.h"
    181 #include "rf_driver.h"
    182 #include "rf_parityscan.h"
    183 #include "rf_threadstuff.h"
    184 
    185 #ifdef DEBUG
    186 int     rf_kdebug_level = 0;
    187 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    188 #else				/* DEBUG */
    189 #define db1_printf(a) { }
    190 #endif				/* DEBUG */
    191 
    192 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    193 
    194 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
    195 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    196 
    197 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    198 						 * spare table */
    199 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    200 						 * installation process */
    201 #endif
    202 
    203 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
    204 
    205 /* prototypes */
    206 static void KernelWakeupFunc(struct buf *);
    207 static void InitBP(struct buf *, struct vnode *, unsigned,
    208     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
    209     void *, int, struct proc *);
    210 static void raidinit(RF_Raid_t *);
    211 
    212 void raidattach(int);
    213 static int raid_match(struct device *, struct cfdata *, void *);
    214 static void raid_attach(struct device *, struct device *, void *);
    215 static int raid_detach(struct device *, int);
    216 
    217 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
    218     daddr_t, daddr_t);
    219 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
    220     daddr_t, daddr_t, int);
    221 
    222 static int raidwrite_component_label(dev_t, struct vnode *,
    223     RF_ComponentLabel_t *);
    224 static int raidread_component_label(dev_t, struct vnode *,
    225     RF_ComponentLabel_t *);
    226 
    227 
    228 dev_type_open(raidopen);
    229 dev_type_close(raidclose);
    230 dev_type_read(raidread);
    231 dev_type_write(raidwrite);
    232 dev_type_ioctl(raidioctl);
    233 dev_type_strategy(raidstrategy);
    234 dev_type_dump(raiddump);
    235 dev_type_size(raidsize);
    236 
    237 const struct bdevsw raid_bdevsw = {
    238 	raidopen, raidclose, raidstrategy, raidioctl,
    239 	raiddump, raidsize, D_DISK
    240 };
    241 
    242 const struct cdevsw raid_cdevsw = {
    243 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    244 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
    245 };
    246 
    247 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
    248 
    249 /* XXX Not sure if the following should be replacing the raidPtrs above,
    250    or if it should be used in conjunction with that...
    251 */
    252 
    253 struct raid_softc {
    254 	struct device *sc_dev;
    255 	int     sc_flags;	/* flags */
    256 	int     sc_cflags;	/* configuration flags */
    257 	uint64_t sc_size;	/* size of the raid device */
    258 	char    sc_xname[20];	/* XXX external name */
    259 	struct disk sc_dkdev;	/* generic disk device info */
    260 	struct bufq_state *buf_queue;	/* used for the device queue */
    261 };
    262 /* sc_flags */
    263 #define RAIDF_INITED	0x01	/* unit has been initialized */
    264 #define RAIDF_WLABEL	0x02	/* label area is writable */
    265 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    266 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    267 #define RAIDF_LOCKED	0x80	/* unit is locked */
    268 
    269 #define	raidunit(x)	DISKUNIT(x)
    270 int numraid = 0;
    271 
    272 extern struct cfdriver raid_cd;
    273 CFATTACH_DECL_NEW(raid, sizeof(struct raid_softc),
    274     raid_match, raid_attach, raid_detach, NULL);
    275 
    276 /*
    277  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    278  * Be aware that large numbers can allow the driver to consume a lot of
    279  * kernel memory, especially on writes, and in degraded mode reads.
    280  *
    281  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    282  * a single 64K write will typically require 64K for the old data,
    283  * 64K for the old parity, and 64K for the new parity, for a total
    284  * of 192K (if the parity buffer is not re-used immediately).
    285  * Even it if is used immediately, that's still 128K, which when multiplied
    286  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    287  *
    288  * Now in degraded mode, for example, a 64K read on the above setup may
    289  * require data reconstruction, which will require *all* of the 4 remaining
    290  * disks to participate -- 4 * 32K/disk == 128K again.
    291  */
    292 
    293 #ifndef RAIDOUTSTANDING
    294 #define RAIDOUTSTANDING   6
    295 #endif
    296 
    297 #define RAIDLABELDEV(dev)	\
    298 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    299 
    300 /* declared here, and made public, for the benefit of KVM stuff.. */
    301 struct raid_softc *raid_softc;
    302 
    303 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    304 				     struct disklabel *);
    305 static void raidgetdisklabel(dev_t);
    306 static void raidmakedisklabel(struct raid_softc *);
    307 
    308 static int raidlock(struct raid_softc *);
    309 static void raidunlock(struct raid_softc *);
    310 
    311 static void rf_markalldirty(RF_Raid_t *);
    312 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
    313 
    314 void rf_ReconThread(struct rf_recon_req *);
    315 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    316 void rf_CopybackThread(RF_Raid_t *raidPtr);
    317 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    318 int rf_autoconfig(struct device *self);
    319 void rf_buildroothack(RF_ConfigSet_t *);
    320 
    321 RF_AutoConfig_t *rf_find_raid_components(void);
    322 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    323 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    324 static int rf_reasonable_label(RF_ComponentLabel_t *);
    325 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    326 int rf_set_autoconfig(RF_Raid_t *, int);
    327 int rf_set_rootpartition(RF_Raid_t *, int);
    328 void rf_release_all_vps(RF_ConfigSet_t *);
    329 void rf_cleanup_config_set(RF_ConfigSet_t *);
    330 int rf_have_enough_components(RF_ConfigSet_t *);
    331 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    332 
    333 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    334 				  allow autoconfig to take place.
    335 				  Note that this is overridden by having
    336 				  RAID_AUTOCONFIG as an option in the
    337 				  kernel config file.  */
    338 
    339 struct RF_Pools_s rf_pools;
    340 
    341 void
    342 raidattach(int num)
    343 {
    344 	int raidID;
    345 	int i, rc;
    346 
    347 #ifdef DEBUG
    348 	printf("raidattach: Asked for %d units\n", num);
    349 #endif
    350 
    351 	if (num <= 0) {
    352 #ifdef DIAGNOSTIC
    353 		panic("raidattach: count <= 0");
    354 #endif
    355 		return;
    356 	}
    357 	/* This is where all the initialization stuff gets done. */
    358 
    359 	numraid = num;
    360 
    361 	/* Make some space for requested number of units... */
    362 
    363 	RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
    364 	if (raidPtrs == NULL) {
    365 		panic("raidPtrs is NULL!!");
    366 	}
    367 
    368 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
    369 	rf_mutex_init(&rf_sparet_wait_mutex);
    370 
    371 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    372 #endif
    373 
    374 	for (i = 0; i < num; i++)
    375 		raidPtrs[i] = NULL;
    376 	rc = rf_BootRaidframe();
    377 	if (rc == 0)
    378 		aprint_normal("Kernelized RAIDframe activated\n");
    379 	else
    380 		panic("Serious error booting RAID!!");
    381 
    382 	/* put together some datastructures like the CCD device does.. This
    383 	 * lets us lock the device and what-not when it gets opened. */
    384 
    385 	raid_softc = (struct raid_softc *)
    386 		malloc(num * sizeof(struct raid_softc),
    387 		       M_RAIDFRAME, M_NOWAIT);
    388 	if (raid_softc == NULL) {
    389 		aprint_error("WARNING: no memory for RAIDframe driver\n");
    390 		return;
    391 	}
    392 
    393 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    394 
    395 	for (raidID = 0; raidID < num; raidID++) {
    396 		bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
    397 
    398 		RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
    399 			  (RF_Raid_t *));
    400 		if (raidPtrs[raidID] == NULL) {
    401 			aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID);
    402 			numraid = raidID;
    403 			return;
    404 		}
    405 	}
    406 
    407 	if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
    408 		aprint_error("raidattach: config_cfattach_attach failed?\n");
    409 	}
    410 
    411 #ifdef RAID_AUTOCONFIG
    412 	raidautoconfig = 1;
    413 #endif
    414 
    415 	/*
    416 	 * Register a finalizer which will be used to auto-config RAID
    417 	 * sets once all real hardware devices have been found.
    418 	 */
    419 	if (config_finalize_register(NULL, rf_autoconfig) != 0)
    420 		aprint_error("WARNING: unable to register RAIDframe finalizer\n");
    421 }
    422 
    423 int
    424 rf_autoconfig(struct device *self)
    425 {
    426 	RF_AutoConfig_t *ac_list;
    427 	RF_ConfigSet_t *config_sets;
    428 
    429 	if (raidautoconfig == 0)
    430 		return (0);
    431 
    432 	/* XXX This code can only be run once. */
    433 	raidautoconfig = 0;
    434 
    435 	/* 1. locate all RAID components on the system */
    436 #ifdef DEBUG
    437 	printf("Searching for RAID components...\n");
    438 #endif
    439 	ac_list = rf_find_raid_components();
    440 
    441 	/* 2. Sort them into their respective sets. */
    442 	config_sets = rf_create_auto_sets(ac_list);
    443 
    444 	/*
    445 	 * 3. Evaluate each set andconfigure the valid ones.
    446 	 * This gets done in rf_buildroothack().
    447 	 */
    448 	rf_buildroothack(config_sets);
    449 
    450 	return 1;
    451 }
    452 
    453 void
    454 rf_buildroothack(RF_ConfigSet_t *config_sets)
    455 {
    456 	RF_ConfigSet_t *cset;
    457 	RF_ConfigSet_t *next_cset;
    458 	int retcode;
    459 	int raidID;
    460 	int rootID;
    461 	int col;
    462 	int num_root;
    463 	char *devname;
    464 
    465 	rootID = 0;
    466 	num_root = 0;
    467 	cset = config_sets;
    468 	while(cset != NULL ) {
    469 		next_cset = cset->next;
    470 		if (rf_have_enough_components(cset) &&
    471 		    cset->ac->clabel->autoconfigure==1) {
    472 			retcode = rf_auto_config_set(cset,&raidID);
    473 			if (!retcode) {
    474 #ifdef DEBUG
    475 				printf("raid%d: configured ok\n", raidID);
    476 #endif
    477 				if (cset->rootable) {
    478 					rootID = raidID;
    479 					num_root++;
    480 				}
    481 			} else {
    482 				/* The autoconfig didn't work :( */
    483 #ifdef DEBUG
    484 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    485 #endif
    486 				rf_release_all_vps(cset);
    487 			}
    488 		} else {
    489 			/* we're not autoconfiguring this set...
    490 			   release the associated resources */
    491 			rf_release_all_vps(cset);
    492 		}
    493 		/* cleanup */
    494 		rf_cleanup_config_set(cset);
    495 		cset = next_cset;
    496 	}
    497 
    498 	/* if the user has specified what the root device should be
    499 	   then we don't touch booted_device or boothowto... */
    500 
    501 	if (rootspec != NULL)
    502 		return;
    503 
    504 	/* we found something bootable... */
    505 
    506 	if (num_root == 1) {
    507 		booted_device = raid_softc[rootID].sc_dev;
    508 	} else if (num_root > 1) {
    509 
    510 		/*
    511 		 * Maybe the MD code can help. If it cannot, then
    512 		 * setroot() will discover that we have no
    513 		 * booted_device and will ask the user if nothing was
    514 		 * hardwired in the kernel config file
    515 		 */
    516 
    517 		if (booted_device == NULL)
    518 			cpu_rootconf();
    519 		if (booted_device == NULL)
    520 			return;
    521 
    522 		num_root = 0;
    523 		for (raidID = 0; raidID < numraid; raidID++) {
    524 			if (raidPtrs[raidID]->valid == 0)
    525 				continue;
    526 
    527 			if (raidPtrs[raidID]->root_partition == 0)
    528 				continue;
    529 
    530 			for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
    531 				devname = raidPtrs[raidID]->Disks[col].devname;
    532 				devname += sizeof("/dev/") - 1;
    533 				if (strncmp(devname, device_xname(booted_device),
    534 					    strlen(device_xname(booted_device))) != 0)
    535 					continue;
    536 #ifdef DEBUG
    537 				printf("raid%d includes boot device %s\n",
    538 				       raidID, devname);
    539 #endif
    540 				num_root++;
    541 				rootID = raidID;
    542 			}
    543 		}
    544 
    545 		if (num_root == 1) {
    546 			booted_device = raid_softc[rootID].sc_dev;
    547 		} else {
    548 			/* we can't guess.. require the user to answer... */
    549 			boothowto |= RB_ASKNAME;
    550 		}
    551 	}
    552 }
    553 
    554 
    555 int
    556 raidsize(dev_t dev)
    557 {
    558 	struct raid_softc *rs;
    559 	struct disklabel *lp;
    560 	int     part, unit, omask, size;
    561 
    562 	unit = raidunit(dev);
    563 	if (unit >= numraid)
    564 		return (-1);
    565 	rs = &raid_softc[unit];
    566 
    567 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    568 		return (-1);
    569 
    570 	part = DISKPART(dev);
    571 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    572 	lp = rs->sc_dkdev.dk_label;
    573 
    574 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
    575 		return (-1);
    576 
    577 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    578 		size = -1;
    579 	else
    580 		size = lp->d_partitions[part].p_size *
    581 		    (lp->d_secsize / DEV_BSIZE);
    582 
    583 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
    584 		return (-1);
    585 
    586 	return (size);
    587 
    588 }
    589 
    590 int
    591 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
    592 {
    593 	int     unit = raidunit(dev);
    594 	struct raid_softc *rs;
    595 	const struct bdevsw *bdev;
    596 	struct disklabel *lp;
    597 	RF_Raid_t *raidPtr;
    598 	daddr_t offset;
    599 	int     part, c, sparecol, j, scol, dumpto;
    600 	int     error = 0;
    601 
    602 	if (unit >= numraid)
    603 		return (ENXIO);
    604 
    605 	rs = &raid_softc[unit];
    606 	raidPtr = raidPtrs[unit];
    607 
    608 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    609 		return ENXIO;
    610 
    611 	/* we only support dumping to RAID 1 sets */
    612 	if (raidPtr->Layout.numDataCol != 1 ||
    613 	    raidPtr->Layout.numParityCol != 1)
    614 		return EINVAL;
    615 
    616 
    617 	if ((error = raidlock(rs)) != 0)
    618 		return error;
    619 
    620 	if (size % DEV_BSIZE != 0) {
    621 		error = EINVAL;
    622 		goto out;
    623 	}
    624 
    625 	if (blkno + size / DEV_BSIZE > rs->sc_size) {
    626 		printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
    627 		    "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
    628 		    size / DEV_BSIZE, rs->sc_size);
    629 		error = EINVAL;
    630 		goto out;
    631 	}
    632 
    633 	part = DISKPART(dev);
    634 	lp = rs->sc_dkdev.dk_label;
    635 	offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
    636 
    637 	/* figure out what device is alive.. */
    638 
    639 	/*
    640 	   Look for a component to dump to.  The preference for the
    641 	   component to dump to is as follows:
    642 	   1) the master
    643 	   2) a used_spare of the master
    644 	   3) the slave
    645 	   4) a used_spare of the slave
    646 	*/
    647 
    648 	dumpto = -1;
    649 	for (c = 0; c < raidPtr->numCol; c++) {
    650 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
    651 			/* this might be the one */
    652 			dumpto = c;
    653 			break;
    654 		}
    655 	}
    656 
    657 	/*
    658 	   At this point we have possibly selected a live master or a
    659 	   live slave.  We now check to see if there is a spared
    660 	   master (or a spared slave), if we didn't find a live master
    661 	   or a live slave.
    662 	*/
    663 
    664 	for (c = 0; c < raidPtr->numSpare; c++) {
    665 		sparecol = raidPtr->numCol + c;
    666 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
    667 			/* How about this one? */
    668 			scol = -1;
    669 			for(j=0;j<raidPtr->numCol;j++) {
    670 				if (raidPtr->Disks[j].spareCol == sparecol) {
    671 					scol = j;
    672 					break;
    673 				}
    674 			}
    675 			if (scol == 0) {
    676 				/*
    677 				   We must have found a spared master!
    678 				   We'll take that over anything else
    679 				   found so far.  (We couldn't have
    680 				   found a real master before, since
    681 				   this is a used spare, and it's
    682 				   saying that it's replacing the
    683 				   master.)  On reboot (with
    684 				   autoconfiguration turned on)
    685 				   sparecol will become the 1st
    686 				   component (component0) of this set.
    687 				*/
    688 				dumpto = sparecol;
    689 				break;
    690 			} else if (scol != -1) {
    691 				/*
    692 				   Must be a spared slave.  We'll dump
    693 				   to that if we havn't found anything
    694 				   else so far.
    695 				*/
    696 				if (dumpto == -1)
    697 					dumpto = sparecol;
    698 			}
    699 		}
    700 	}
    701 
    702 	if (dumpto == -1) {
    703 		/* we couldn't find any live components to dump to!?!?
    704 		 */
    705 		error = EINVAL;
    706 		goto out;
    707 	}
    708 
    709 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
    710 
    711 	/*
    712 	   Note that blkno is relative to this particular partition.
    713 	   By adding the offset of this partition in the RAID
    714 	   set, and also adding RF_PROTECTED_SECTORS, we get a
    715 	   value that is relative to the partition used for the
    716 	   underlying component.
    717 	*/
    718 
    719 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
    720 				blkno + offset, va, size);
    721 
    722 out:
    723 	raidunlock(rs);
    724 
    725 	return error;
    726 }
    727 /* ARGSUSED */
    728 int
    729 raidopen(dev_t dev, int flags, int fmt,
    730     struct lwp *l)
    731 {
    732 	int     unit = raidunit(dev);
    733 	struct raid_softc *rs;
    734 	struct disklabel *lp;
    735 	int     part, pmask;
    736 	int     error = 0;
    737 
    738 	if (unit >= numraid)
    739 		return (ENXIO);
    740 	rs = &raid_softc[unit];
    741 
    742 	if ((error = raidlock(rs)) != 0)
    743 		return (error);
    744 	lp = rs->sc_dkdev.dk_label;
    745 
    746 	part = DISKPART(dev);
    747 
    748 	/*
    749 	 * If there are wedges, and this is not RAW_PART, then we
    750 	 * need to fail.
    751 	 */
    752 	if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
    753 		error = EBUSY;
    754 		goto bad;
    755 	}
    756 	pmask = (1 << part);
    757 
    758 	if ((rs->sc_flags & RAIDF_INITED) &&
    759 	    (rs->sc_dkdev.dk_openmask == 0))
    760 		raidgetdisklabel(dev);
    761 
    762 	/* make sure that this partition exists */
    763 
    764 	if (part != RAW_PART) {
    765 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    766 		    ((part >= lp->d_npartitions) ||
    767 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    768 			error = ENXIO;
    769 			goto bad;
    770 		}
    771 	}
    772 	/* Prevent this unit from being unconfigured while open. */
    773 	switch (fmt) {
    774 	case S_IFCHR:
    775 		rs->sc_dkdev.dk_copenmask |= pmask;
    776 		break;
    777 
    778 	case S_IFBLK:
    779 		rs->sc_dkdev.dk_bopenmask |= pmask;
    780 		break;
    781 	}
    782 
    783 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    784 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    785 		/* First one... mark things as dirty... Note that we *MUST*
    786 		 have done a configure before this.  I DO NOT WANT TO BE
    787 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    788 		 THAT THEY BELONG TOGETHER!!!!! */
    789 		/* XXX should check to see if we're only open for reading
    790 		   here... If so, we needn't do this, but then need some
    791 		   other way of keeping track of what's happened.. */
    792 
    793 		rf_markalldirty( raidPtrs[unit] );
    794 	}
    795 
    796 
    797 	rs->sc_dkdev.dk_openmask =
    798 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    799 
    800 bad:
    801 	raidunlock(rs);
    802 
    803 	return (error);
    804 
    805 
    806 }
    807 /* ARGSUSED */
    808 int
    809 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
    810 {
    811 	int     unit = raidunit(dev);
    812 	struct cfdata *cf;
    813 	struct raid_softc *rs;
    814 	int     error = 0;
    815 	int     part;
    816 
    817 	if (unit >= numraid)
    818 		return (ENXIO);
    819 	rs = &raid_softc[unit];
    820 
    821 	if ((error = raidlock(rs)) != 0)
    822 		return (error);
    823 
    824 	part = DISKPART(dev);
    825 
    826 	/* ...that much closer to allowing unconfiguration... */
    827 	switch (fmt) {
    828 	case S_IFCHR:
    829 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    830 		break;
    831 
    832 	case S_IFBLK:
    833 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    834 		break;
    835 	}
    836 	rs->sc_dkdev.dk_openmask =
    837 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    838 
    839 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    840 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    841 		/* Last one... device is not unconfigured yet.
    842 		   Device shutdown has taken care of setting the
    843 		   clean bits if RAIDF_INITED is not set
    844 		   mark things as clean... */
    845 
    846 		rf_update_component_labels(raidPtrs[unit],
    847 						 RF_FINAL_COMPONENT_UPDATE);
    848 		if (doing_shutdown) {
    849 			/* last one, and we're going down, so
    850 			   lights out for this RAID set too. */
    851 			error = rf_Shutdown(raidPtrs[unit]);
    852 
    853 			/* It's no longer initialized... */
    854 			rs->sc_flags &= ~RAIDF_INITED;
    855 
    856 			/* detach the device */
    857 
    858 			cf = device_cfdata(rs->sc_dev);
    859 			error = config_detach(rs->sc_dev, DETACH_QUIET);
    860 			free(cf, M_RAIDFRAME);
    861 
    862 			/* Detach the disk. */
    863 			disk_detach(&rs->sc_dkdev);
    864 			disk_destroy(&rs->sc_dkdev);
    865 		}
    866 	}
    867 
    868 	raidunlock(rs);
    869 	return (0);
    870 
    871 }
    872 
    873 void
    874 raidstrategy(struct buf *bp)
    875 {
    876 	int s;
    877 
    878 	unsigned int raidID = raidunit(bp->b_dev);
    879 	RF_Raid_t *raidPtr;
    880 	struct raid_softc *rs = &raid_softc[raidID];
    881 	int     wlabel;
    882 
    883 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    884 		bp->b_error = ENXIO;
    885 		goto done;
    886 	}
    887 	if (raidID >= numraid || !raidPtrs[raidID]) {
    888 		bp->b_error = ENODEV;
    889 		goto done;
    890 	}
    891 	raidPtr = raidPtrs[raidID];
    892 	if (!raidPtr->valid) {
    893 		bp->b_error = ENODEV;
    894 		goto done;
    895 	}
    896 	if (bp->b_bcount == 0) {
    897 		db1_printf(("b_bcount is zero..\n"));
    898 		goto done;
    899 	}
    900 
    901 	/*
    902 	 * Do bounds checking and adjust transfer.  If there's an
    903 	 * error, the bounds check will flag that for us.
    904 	 */
    905 
    906 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    907 	if (DISKPART(bp->b_dev) == RAW_PART) {
    908 		uint64_t size; /* device size in DEV_BSIZE unit */
    909 
    910 		if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
    911 			size = raidPtr->totalSectors <<
    912 			    (raidPtr->logBytesPerSector - DEV_BSHIFT);
    913 		} else {
    914 			size = raidPtr->totalSectors >>
    915 			    (DEV_BSHIFT - raidPtr->logBytesPerSector);
    916 		}
    917 		if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
    918 			goto done;
    919 		}
    920 	} else {
    921 		if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
    922 			db1_printf(("Bounds check failed!!:%d %d\n",
    923 				(int) bp->b_blkno, (int) wlabel));
    924 			goto done;
    925 		}
    926 	}
    927 	s = splbio();
    928 
    929 	bp->b_resid = 0;
    930 
    931 	/* stuff it onto our queue */
    932 	BUFQ_PUT(rs->buf_queue, bp);
    933 
    934 	/* scheduled the IO to happen at the next convenient time */
    935 	wakeup(&(raidPtrs[raidID]->iodone));
    936 
    937 	splx(s);
    938 	return;
    939 
    940 done:
    941 	bp->b_resid = bp->b_bcount;
    942 	biodone(bp);
    943 }
    944 /* ARGSUSED */
    945 int
    946 raidread(dev_t dev, struct uio *uio, int flags)
    947 {
    948 	int     unit = raidunit(dev);
    949 	struct raid_softc *rs;
    950 
    951 	if (unit >= numraid)
    952 		return (ENXIO);
    953 	rs = &raid_softc[unit];
    954 
    955 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    956 		return (ENXIO);
    957 
    958 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    959 
    960 }
    961 /* ARGSUSED */
    962 int
    963 raidwrite(dev_t dev, struct uio *uio, int flags)
    964 {
    965 	int     unit = raidunit(dev);
    966 	struct raid_softc *rs;
    967 
    968 	if (unit >= numraid)
    969 		return (ENXIO);
    970 	rs = &raid_softc[unit];
    971 
    972 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    973 		return (ENXIO);
    974 
    975 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    976 
    977 }
    978 
    979 int
    980 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
    981 {
    982 	int     unit = raidunit(dev);
    983 	int     error = 0;
    984 	int     part, pmask;
    985 	struct cfdata *cf;
    986 	struct raid_softc *rs;
    987 	RF_Config_t *k_cfg, *u_cfg;
    988 	RF_Raid_t *raidPtr;
    989 	RF_RaidDisk_t *diskPtr;
    990 	RF_AccTotals_t *totals;
    991 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    992 	u_char *specific_buf;
    993 	int retcode = 0;
    994 	int column;
    995 /*	int raidid; */
    996 	struct rf_recon_req *rrcopy, *rr;
    997 	RF_ComponentLabel_t *clabel;
    998 	RF_ComponentLabel_t *ci_label;
    999 	RF_ComponentLabel_t **clabel_ptr;
   1000 	RF_SingleComponent_t *sparePtr,*componentPtr;
   1001 	RF_SingleComponent_t component;
   1002 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
   1003 	int i, j, d;
   1004 #ifdef __HAVE_OLD_DISKLABEL
   1005 	struct disklabel newlabel;
   1006 #endif
   1007 	struct dkwedge_info *dkw;
   1008 
   1009 	if (unit >= numraid)
   1010 		return (ENXIO);
   1011 	rs = &raid_softc[unit];
   1012 	raidPtr = raidPtrs[unit];
   1013 
   1014 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
   1015 		(int) DISKPART(dev), (int) unit, (int) cmd));
   1016 
   1017 	/* Must be open for writes for these commands... */
   1018 	switch (cmd) {
   1019 #ifdef DIOCGSECTORSIZE
   1020 	case DIOCGSECTORSIZE:
   1021 		*(u_int *)data = raidPtr->bytesPerSector;
   1022 		return 0;
   1023 	case DIOCGMEDIASIZE:
   1024 		*(off_t *)data =
   1025 		    (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
   1026 		return 0;
   1027 #endif
   1028 	case DIOCSDINFO:
   1029 	case DIOCWDINFO:
   1030 #ifdef __HAVE_OLD_DISKLABEL
   1031 	case ODIOCWDINFO:
   1032 	case ODIOCSDINFO:
   1033 #endif
   1034 	case DIOCWLABEL:
   1035 	case DIOCAWEDGE:
   1036 	case DIOCDWEDGE:
   1037 		if ((flag & FWRITE) == 0)
   1038 			return (EBADF);
   1039 	}
   1040 
   1041 	/* Must be initialized for these... */
   1042 	switch (cmd) {
   1043 	case DIOCGDINFO:
   1044 	case DIOCSDINFO:
   1045 	case DIOCWDINFO:
   1046 #ifdef __HAVE_OLD_DISKLABEL
   1047 	case ODIOCGDINFO:
   1048 	case ODIOCWDINFO:
   1049 	case ODIOCSDINFO:
   1050 	case ODIOCGDEFLABEL:
   1051 #endif
   1052 	case DIOCGPART:
   1053 	case DIOCWLABEL:
   1054 	case DIOCGDEFLABEL:
   1055 	case DIOCAWEDGE:
   1056 	case DIOCDWEDGE:
   1057 	case DIOCLWEDGES:
   1058 	case DIOCCACHESYNC:
   1059 	case RAIDFRAME_SHUTDOWN:
   1060 	case RAIDFRAME_REWRITEPARITY:
   1061 	case RAIDFRAME_GET_INFO:
   1062 	case RAIDFRAME_RESET_ACCTOTALS:
   1063 	case RAIDFRAME_GET_ACCTOTALS:
   1064 	case RAIDFRAME_KEEP_ACCTOTALS:
   1065 	case RAIDFRAME_GET_SIZE:
   1066 	case RAIDFRAME_FAIL_DISK:
   1067 	case RAIDFRAME_COPYBACK:
   1068 	case RAIDFRAME_CHECK_RECON_STATUS:
   1069 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1070 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1071 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1072 	case RAIDFRAME_ADD_HOT_SPARE:
   1073 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1074 	case RAIDFRAME_INIT_LABELS:
   1075 	case RAIDFRAME_REBUILD_IN_PLACE:
   1076 	case RAIDFRAME_CHECK_PARITY:
   1077 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1078 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1079 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1080 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1081 	case RAIDFRAME_SET_AUTOCONFIG:
   1082 	case RAIDFRAME_SET_ROOT:
   1083 	case RAIDFRAME_DELETE_COMPONENT:
   1084 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1085 	case RAIDFRAME_PARITYMAP_STATUS:
   1086 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
   1087 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
   1088 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
   1089 		if ((rs->sc_flags & RAIDF_INITED) == 0)
   1090 			return (ENXIO);
   1091 	}
   1092 
   1093 	switch (cmd) {
   1094 
   1095 		/* configure the system */
   1096 	case RAIDFRAME_CONFIGURE:
   1097 
   1098 		if (raidPtr->valid) {
   1099 			/* There is a valid RAID set running on this unit! */
   1100 			printf("raid%d: Device already configured!\n",unit);
   1101 			return(EINVAL);
   1102 		}
   1103 
   1104 		/* copy-in the configuration information */
   1105 		/* data points to a pointer to the configuration structure */
   1106 
   1107 		u_cfg = *((RF_Config_t **) data);
   1108 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
   1109 		if (k_cfg == NULL) {
   1110 			return (ENOMEM);
   1111 		}
   1112 		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
   1113 		if (retcode) {
   1114 			RF_Free(k_cfg, sizeof(RF_Config_t));
   1115 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
   1116 				retcode));
   1117 			return (retcode);
   1118 		}
   1119 		/* allocate a buffer for the layout-specific data, and copy it
   1120 		 * in */
   1121 		if (k_cfg->layoutSpecificSize) {
   1122 			if (k_cfg->layoutSpecificSize > 10000) {
   1123 				/* sanity check */
   1124 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1125 				return (EINVAL);
   1126 			}
   1127 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
   1128 			    (u_char *));
   1129 			if (specific_buf == NULL) {
   1130 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1131 				return (ENOMEM);
   1132 			}
   1133 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
   1134 			    k_cfg->layoutSpecificSize);
   1135 			if (retcode) {
   1136 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1137 				RF_Free(specific_buf,
   1138 					k_cfg->layoutSpecificSize);
   1139 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
   1140 					retcode));
   1141 				return (retcode);
   1142 			}
   1143 		} else
   1144 			specific_buf = NULL;
   1145 		k_cfg->layoutSpecific = specific_buf;
   1146 
   1147 		/* should do some kind of sanity check on the configuration.
   1148 		 * Store the sum of all the bytes in the last byte? */
   1149 
   1150 		/* configure the system */
   1151 
   1152 		/*
   1153 		 * Clear the entire RAID descriptor, just to make sure
   1154 		 *  there is no stale data left in the case of a
   1155 		 *  reconfiguration
   1156 		 */
   1157 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
   1158 		raidPtr->raidid = unit;
   1159 
   1160 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
   1161 
   1162 		if (retcode == 0) {
   1163 
   1164 			/* allow this many simultaneous IO's to
   1165 			   this RAID device */
   1166 			raidPtr->openings = RAIDOUTSTANDING;
   1167 
   1168 			raidinit(raidPtr);
   1169 			rf_markalldirty(raidPtr);
   1170 		}
   1171 		/* free the buffers.  No return code here. */
   1172 		if (k_cfg->layoutSpecificSize) {
   1173 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
   1174 		}
   1175 		RF_Free(k_cfg, sizeof(RF_Config_t));
   1176 
   1177 		return (retcode);
   1178 
   1179 		/* shutdown the system */
   1180 	case RAIDFRAME_SHUTDOWN:
   1181 
   1182 		if ((error = raidlock(rs)) != 0)
   1183 			return (error);
   1184 
   1185 		/*
   1186 		 * If somebody has a partition mounted, we shouldn't
   1187 		 * shutdown.
   1188 		 */
   1189 
   1190 		part = DISKPART(dev);
   1191 		pmask = (1 << part);
   1192 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
   1193 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
   1194 			(rs->sc_dkdev.dk_copenmask & pmask))) {
   1195 			raidunlock(rs);
   1196 			return (EBUSY);
   1197 		}
   1198 
   1199 		retcode = rf_Shutdown(raidPtr);
   1200 
   1201 		/* It's no longer initialized... */
   1202 		rs->sc_flags &= ~RAIDF_INITED;
   1203 
   1204 		/* free the pseudo device attach bits */
   1205 
   1206 		cf = device_cfdata(rs->sc_dev);
   1207 		/* XXX this causes us to not return any errors
   1208 		   from the above call to rf_Shutdown() */
   1209 		retcode = config_detach(rs->sc_dev, DETACH_QUIET);
   1210 		free(cf, M_RAIDFRAME);
   1211 
   1212 		/* Detach the disk. */
   1213 		disk_detach(&rs->sc_dkdev);
   1214 		disk_destroy(&rs->sc_dkdev);
   1215 
   1216 		raidunlock(rs);
   1217 
   1218 		return (retcode);
   1219 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1220 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1221 		/* need to read the component label for the disk indicated
   1222 		   by row,column in clabel */
   1223 
   1224 		/*
   1225 		 * Perhaps there should be an option to skip the in-core
   1226 		 * copy and hit the disk, as with disklabel(8).
   1227 		 */
   1228 		RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
   1229 
   1230 		retcode = copyin( *clabel_ptr, clabel,
   1231 				  sizeof(RF_ComponentLabel_t));
   1232 
   1233 		if (retcode) {
   1234 			return(retcode);
   1235 		}
   1236 
   1237 		clabel->row = 0; /* Don't allow looking at anything else.*/
   1238 
   1239 		column = clabel->column;
   1240 
   1241 		if ((column < 0) || (column >= raidPtr->numCol +
   1242 				     raidPtr->numSpare)) {
   1243 			return(EINVAL);
   1244 		}
   1245 
   1246 		RF_Free(clabel, sizeof(*clabel));
   1247 
   1248 		clabel = raidget_component_label(raidPtr, column);
   1249 
   1250 		if (retcode == 0) {
   1251 			retcode = copyout(clabel, *clabel_ptr,
   1252 					  sizeof(RF_ComponentLabel_t));
   1253 		}
   1254 		return (retcode);
   1255 
   1256 #if 0
   1257 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1258 		clabel = (RF_ComponentLabel_t *) data;
   1259 
   1260 		/* XXX check the label for valid stuff... */
   1261 		/* Note that some things *should not* get modified --
   1262 		   the user should be re-initing the labels instead of
   1263 		   trying to patch things.
   1264 		   */
   1265 
   1266 		raidid = raidPtr->raidid;
   1267 #ifdef DEBUG
   1268 		printf("raid%d: Got component label:\n", raidid);
   1269 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1270 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1271 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1272 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1273 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1274 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1275 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1276 #endif
   1277 		clabel->row = 0;
   1278 		column = clabel->column;
   1279 
   1280 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1281 			return(EINVAL);
   1282 		}
   1283 
   1284 		/* XXX this isn't allowed to do anything for now :-) */
   1285 
   1286 		/* XXX and before it is, we need to fill in the rest
   1287 		   of the fields!?!?!?! */
   1288 		memcpy(raidget_component_label(raidPtr, column),
   1289 		    clabel, sizeof(*clabel));
   1290 		raidflush_component_label(raidPtr, column);
   1291 		return (0);
   1292 #endif
   1293 
   1294 	case RAIDFRAME_INIT_LABELS:
   1295 		clabel = (RF_ComponentLabel_t *) data;
   1296 		/*
   1297 		   we only want the serial number from
   1298 		   the above.  We get all the rest of the information
   1299 		   from the config that was used to create this RAID
   1300 		   set.
   1301 		   */
   1302 
   1303 		raidPtr->serial_number = clabel->serial_number;
   1304 
   1305 		for(column=0;column<raidPtr->numCol;column++) {
   1306 			diskPtr = &raidPtr->Disks[column];
   1307 			if (!RF_DEAD_DISK(diskPtr->status)) {
   1308 				ci_label = raidget_component_label(raidPtr,
   1309 				    column);
   1310 				/* Zeroing this is important. */
   1311 				memset(ci_label, 0, sizeof(*ci_label));
   1312 				raid_init_component_label(raidPtr, ci_label);
   1313 				ci_label->serial_number =
   1314 				    raidPtr->serial_number;
   1315 				ci_label->row = 0; /* we dont' pretend to support more */
   1316 				ci_label->partitionSize =
   1317 				    diskPtr->partitionSize;
   1318 				ci_label->column = column;
   1319 				raidflush_component_label(raidPtr, column);
   1320 			}
   1321 			/* XXXjld what about the spares? */
   1322 		}
   1323 
   1324 		return (retcode);
   1325 	case RAIDFRAME_SET_AUTOCONFIG:
   1326 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1327 		printf("raid%d: New autoconfig value is: %d\n",
   1328 		       raidPtr->raidid, d);
   1329 		*(int *) data = d;
   1330 		return (retcode);
   1331 
   1332 	case RAIDFRAME_SET_ROOT:
   1333 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1334 		printf("raid%d: New rootpartition value is: %d\n",
   1335 		       raidPtr->raidid, d);
   1336 		*(int *) data = d;
   1337 		return (retcode);
   1338 
   1339 		/* initialize all parity */
   1340 	case RAIDFRAME_REWRITEPARITY:
   1341 
   1342 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1343 			/* Parity for RAID 0 is trivially correct */
   1344 			raidPtr->parity_good = RF_RAID_CLEAN;
   1345 			return(0);
   1346 		}
   1347 
   1348 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1349 			/* Re-write is already in progress! */
   1350 			return(EINVAL);
   1351 		}
   1352 
   1353 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1354 					   rf_RewriteParityThread,
   1355 					   raidPtr,"raid_parity");
   1356 		return (retcode);
   1357 
   1358 
   1359 	case RAIDFRAME_ADD_HOT_SPARE:
   1360 		sparePtr = (RF_SingleComponent_t *) data;
   1361 		memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
   1362 		retcode = rf_add_hot_spare(raidPtr, &component);
   1363 		return(retcode);
   1364 
   1365 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1366 		return(retcode);
   1367 
   1368 	case RAIDFRAME_DELETE_COMPONENT:
   1369 		componentPtr = (RF_SingleComponent_t *)data;
   1370 		memcpy( &component, componentPtr,
   1371 			sizeof(RF_SingleComponent_t));
   1372 		retcode = rf_delete_component(raidPtr, &component);
   1373 		return(retcode);
   1374 
   1375 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1376 		componentPtr = (RF_SingleComponent_t *)data;
   1377 		memcpy( &component, componentPtr,
   1378 			sizeof(RF_SingleComponent_t));
   1379 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1380 		return(retcode);
   1381 
   1382 	case RAIDFRAME_REBUILD_IN_PLACE:
   1383 
   1384 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1385 			/* Can't do this on a RAID 0!! */
   1386 			return(EINVAL);
   1387 		}
   1388 
   1389 		if (raidPtr->recon_in_progress == 1) {
   1390 			/* a reconstruct is already in progress! */
   1391 			return(EINVAL);
   1392 		}
   1393 
   1394 		componentPtr = (RF_SingleComponent_t *) data;
   1395 		memcpy( &component, componentPtr,
   1396 			sizeof(RF_SingleComponent_t));
   1397 		component.row = 0; /* we don't support any more */
   1398 		column = component.column;
   1399 
   1400 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1401 			return(EINVAL);
   1402 		}
   1403 
   1404 		RF_LOCK_MUTEX(raidPtr->mutex);
   1405 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
   1406 		    (raidPtr->numFailures > 0)) {
   1407 			/* XXX 0 above shouldn't be constant!!! */
   1408 			/* some component other than this has failed.
   1409 			   Let's not make things worse than they already
   1410 			   are... */
   1411 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1412 			       raidPtr->raidid);
   1413 			printf("raid%d:     Col: %d   Too many failures.\n",
   1414 			       raidPtr->raidid, column);
   1415 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1416 			return (EINVAL);
   1417 		}
   1418 		if (raidPtr->Disks[column].status ==
   1419 		    rf_ds_reconstructing) {
   1420 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1421 			       raidPtr->raidid);
   1422 			printf("raid%d:    Col: %d   Reconstruction already occuring!\n", raidPtr->raidid, column);
   1423 
   1424 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1425 			return (EINVAL);
   1426 		}
   1427 		if (raidPtr->Disks[column].status == rf_ds_spared) {
   1428 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1429 			return (EINVAL);
   1430 		}
   1431 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1432 
   1433 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1434 		if (rrcopy == NULL)
   1435 			return(ENOMEM);
   1436 
   1437 		rrcopy->raidPtr = (void *) raidPtr;
   1438 		rrcopy->col = column;
   1439 
   1440 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1441 					   rf_ReconstructInPlaceThread,
   1442 					   rrcopy,"raid_reconip");
   1443 		return(retcode);
   1444 
   1445 	case RAIDFRAME_GET_INFO:
   1446 		if (!raidPtr->valid)
   1447 			return (ENODEV);
   1448 		ucfgp = (RF_DeviceConfig_t **) data;
   1449 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1450 			  (RF_DeviceConfig_t *));
   1451 		if (d_cfg == NULL)
   1452 			return (ENOMEM);
   1453 		d_cfg->rows = 1; /* there is only 1 row now */
   1454 		d_cfg->cols = raidPtr->numCol;
   1455 		d_cfg->ndevs = raidPtr->numCol;
   1456 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1457 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1458 			return (ENOMEM);
   1459 		}
   1460 		d_cfg->nspares = raidPtr->numSpare;
   1461 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1462 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1463 			return (ENOMEM);
   1464 		}
   1465 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1466 		d = 0;
   1467 		for (j = 0; j < d_cfg->cols; j++) {
   1468 			d_cfg->devs[d] = raidPtr->Disks[j];
   1469 			d++;
   1470 		}
   1471 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1472 			d_cfg->spares[i] = raidPtr->Disks[j];
   1473 			if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) {
   1474 				/* XXX: raidctl(8) expects to see this as a used spare */
   1475 				d_cfg->spares[i].status = rf_ds_used_spare;
   1476 			}
   1477 		}
   1478 		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
   1479 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1480 
   1481 		return (retcode);
   1482 
   1483 	case RAIDFRAME_CHECK_PARITY:
   1484 		*(int *) data = raidPtr->parity_good;
   1485 		return (0);
   1486 
   1487 	case RAIDFRAME_PARITYMAP_STATUS:
   1488 		rf_paritymap_status(raidPtr->parity_map,
   1489 		    (struct rf_pmstat *)data);
   1490 		return 0;
   1491 
   1492 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
   1493 		if (raidPtr->parity_map == NULL)
   1494 			return ENOENT; /* ??? */
   1495 		if (0 != rf_paritymap_set_params(raidPtr->parity_map,
   1496 			(struct rf_pmparams *)data, 1))
   1497 			return EINVAL;
   1498 		return 0;
   1499 
   1500 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
   1501 		*(int *) data = rf_paritymap_get_disable(raidPtr);
   1502 		return 0;
   1503 
   1504 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
   1505 		rf_paritymap_set_disable(raidPtr, *(int *)data);
   1506 		/* XXX should errors be passed up? */
   1507 		return 0;
   1508 
   1509 	case RAIDFRAME_RESET_ACCTOTALS:
   1510 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1511 		return (0);
   1512 
   1513 	case RAIDFRAME_GET_ACCTOTALS:
   1514 		totals = (RF_AccTotals_t *) data;
   1515 		*totals = raidPtr->acc_totals;
   1516 		return (0);
   1517 
   1518 	case RAIDFRAME_KEEP_ACCTOTALS:
   1519 		raidPtr->keep_acc_totals = *(int *)data;
   1520 		return (0);
   1521 
   1522 	case RAIDFRAME_GET_SIZE:
   1523 		*(int *) data = raidPtr->totalSectors;
   1524 		return (0);
   1525 
   1526 		/* fail a disk & optionally start reconstruction */
   1527 	case RAIDFRAME_FAIL_DISK:
   1528 
   1529 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1530 			/* Can't do this on a RAID 0!! */
   1531 			return(EINVAL);
   1532 		}
   1533 
   1534 		rr = (struct rf_recon_req *) data;
   1535 		rr->row = 0;
   1536 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
   1537 			return (EINVAL);
   1538 
   1539 
   1540 		RF_LOCK_MUTEX(raidPtr->mutex);
   1541 		if (raidPtr->status == rf_rs_reconstructing) {
   1542 			/* you can't fail a disk while we're reconstructing! */
   1543 			/* XXX wrong for RAID6 */
   1544 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1545 			return (EINVAL);
   1546 		}
   1547 		if ((raidPtr->Disks[rr->col].status ==
   1548 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
   1549 			/* some other component has failed.  Let's not make
   1550 			   things worse. XXX wrong for RAID6 */
   1551 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1552 			return (EINVAL);
   1553 		}
   1554 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
   1555 			/* Can't fail a spared disk! */
   1556 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1557 			return (EINVAL);
   1558 		}
   1559 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1560 
   1561 		/* make a copy of the recon request so that we don't rely on
   1562 		 * the user's buffer */
   1563 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1564 		if (rrcopy == NULL)
   1565 			return(ENOMEM);
   1566 		memcpy(rrcopy, rr, sizeof(*rr));
   1567 		rrcopy->raidPtr = (void *) raidPtr;
   1568 
   1569 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1570 					   rf_ReconThread,
   1571 					   rrcopy,"raid_recon");
   1572 		return (0);
   1573 
   1574 		/* invoke a copyback operation after recon on whatever disk
   1575 		 * needs it, if any */
   1576 	case RAIDFRAME_COPYBACK:
   1577 
   1578 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1579 			/* This makes no sense on a RAID 0!! */
   1580 			return(EINVAL);
   1581 		}
   1582 
   1583 		if (raidPtr->copyback_in_progress == 1) {
   1584 			/* Copyback is already in progress! */
   1585 			return(EINVAL);
   1586 		}
   1587 
   1588 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1589 					   rf_CopybackThread,
   1590 					   raidPtr,"raid_copyback");
   1591 		return (retcode);
   1592 
   1593 		/* return the percentage completion of reconstruction */
   1594 	case RAIDFRAME_CHECK_RECON_STATUS:
   1595 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1596 			/* This makes no sense on a RAID 0, so tell the
   1597 			   user it's done. */
   1598 			*(int *) data = 100;
   1599 			return(0);
   1600 		}
   1601 		if (raidPtr->status != rf_rs_reconstructing)
   1602 			*(int *) data = 100;
   1603 		else {
   1604 			if (raidPtr->reconControl->numRUsTotal > 0) {
   1605 				*(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
   1606 			} else {
   1607 				*(int *) data = 0;
   1608 			}
   1609 		}
   1610 		return (0);
   1611 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1612 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1613 		if (raidPtr->status != rf_rs_reconstructing) {
   1614 			progressInfo.remaining = 0;
   1615 			progressInfo.completed = 100;
   1616 			progressInfo.total = 100;
   1617 		} else {
   1618 			progressInfo.total =
   1619 				raidPtr->reconControl->numRUsTotal;
   1620 			progressInfo.completed =
   1621 				raidPtr->reconControl->numRUsComplete;
   1622 			progressInfo.remaining = progressInfo.total -
   1623 				progressInfo.completed;
   1624 		}
   1625 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1626 				  sizeof(RF_ProgressInfo_t));
   1627 		return (retcode);
   1628 
   1629 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1630 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1631 			/* This makes no sense on a RAID 0, so tell the
   1632 			   user it's done. */
   1633 			*(int *) data = 100;
   1634 			return(0);
   1635 		}
   1636 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1637 			*(int *) data = 100 *
   1638 				raidPtr->parity_rewrite_stripes_done /
   1639 				raidPtr->Layout.numStripe;
   1640 		} else {
   1641 			*(int *) data = 100;
   1642 		}
   1643 		return (0);
   1644 
   1645 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1646 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1647 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1648 			progressInfo.total = raidPtr->Layout.numStripe;
   1649 			progressInfo.completed =
   1650 				raidPtr->parity_rewrite_stripes_done;
   1651 			progressInfo.remaining = progressInfo.total -
   1652 				progressInfo.completed;
   1653 		} else {
   1654 			progressInfo.remaining = 0;
   1655 			progressInfo.completed = 100;
   1656 			progressInfo.total = 100;
   1657 		}
   1658 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1659 				  sizeof(RF_ProgressInfo_t));
   1660 		return (retcode);
   1661 
   1662 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1663 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1664 			/* This makes no sense on a RAID 0 */
   1665 			*(int *) data = 100;
   1666 			return(0);
   1667 		}
   1668 		if (raidPtr->copyback_in_progress == 1) {
   1669 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1670 				raidPtr->Layout.numStripe;
   1671 		} else {
   1672 			*(int *) data = 100;
   1673 		}
   1674 		return (0);
   1675 
   1676 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1677 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1678 		if (raidPtr->copyback_in_progress == 1) {
   1679 			progressInfo.total = raidPtr->Layout.numStripe;
   1680 			progressInfo.completed =
   1681 				raidPtr->copyback_stripes_done;
   1682 			progressInfo.remaining = progressInfo.total -
   1683 				progressInfo.completed;
   1684 		} else {
   1685 			progressInfo.remaining = 0;
   1686 			progressInfo.completed = 100;
   1687 			progressInfo.total = 100;
   1688 		}
   1689 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1690 				  sizeof(RF_ProgressInfo_t));
   1691 		return (retcode);
   1692 
   1693 		/* the sparetable daemon calls this to wait for the kernel to
   1694 		 * need a spare table. this ioctl does not return until a
   1695 		 * spare table is needed. XXX -- calling mpsleep here in the
   1696 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1697 		 * -- I should either compute the spare table in the kernel,
   1698 		 * or have a different -- XXX XXX -- interface (a different
   1699 		 * character device) for delivering the table     -- XXX */
   1700 #if 0
   1701 	case RAIDFRAME_SPARET_WAIT:
   1702 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1703 		while (!rf_sparet_wait_queue)
   1704 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1705 		waitreq = rf_sparet_wait_queue;
   1706 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1707 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1708 
   1709 		/* structure assignment */
   1710 		*((RF_SparetWait_t *) data) = *waitreq;
   1711 
   1712 		RF_Free(waitreq, sizeof(*waitreq));
   1713 		return (0);
   1714 
   1715 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1716 		 * code in it that will cause the dameon to exit */
   1717 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1718 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1719 		waitreq->fcol = -1;
   1720 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1721 		waitreq->next = rf_sparet_wait_queue;
   1722 		rf_sparet_wait_queue = waitreq;
   1723 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1724 		wakeup(&rf_sparet_wait_queue);
   1725 		return (0);
   1726 
   1727 		/* used by the spare table daemon to deliver a spare table
   1728 		 * into the kernel */
   1729 	case RAIDFRAME_SEND_SPARET:
   1730 
   1731 		/* install the spare table */
   1732 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1733 
   1734 		/* respond to the requestor.  the return status of the spare
   1735 		 * table installation is passed in the "fcol" field */
   1736 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1737 		waitreq->fcol = retcode;
   1738 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1739 		waitreq->next = rf_sparet_resp_queue;
   1740 		rf_sparet_resp_queue = waitreq;
   1741 		wakeup(&rf_sparet_resp_queue);
   1742 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1743 
   1744 		return (retcode);
   1745 #endif
   1746 
   1747 	default:
   1748 		break; /* fall through to the os-specific code below */
   1749 
   1750 	}
   1751 
   1752 	if (!raidPtr->valid)
   1753 		return (EINVAL);
   1754 
   1755 	/*
   1756 	 * Add support for "regular" device ioctls here.
   1757 	 */
   1758 
   1759 	switch (cmd) {
   1760 	case DIOCGDINFO:
   1761 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1762 		break;
   1763 #ifdef __HAVE_OLD_DISKLABEL
   1764 	case ODIOCGDINFO:
   1765 		newlabel = *(rs->sc_dkdev.dk_label);
   1766 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1767 			return ENOTTY;
   1768 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1769 		break;
   1770 #endif
   1771 
   1772 	case DIOCGPART:
   1773 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1774 		((struct partinfo *) data)->part =
   1775 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1776 		break;
   1777 
   1778 	case DIOCWDINFO:
   1779 	case DIOCSDINFO:
   1780 #ifdef __HAVE_OLD_DISKLABEL
   1781 	case ODIOCWDINFO:
   1782 	case ODIOCSDINFO:
   1783 #endif
   1784 	{
   1785 		struct disklabel *lp;
   1786 #ifdef __HAVE_OLD_DISKLABEL
   1787 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1788 			memset(&newlabel, 0, sizeof newlabel);
   1789 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1790 			lp = &newlabel;
   1791 		} else
   1792 #endif
   1793 		lp = (struct disklabel *)data;
   1794 
   1795 		if ((error = raidlock(rs)) != 0)
   1796 			return (error);
   1797 
   1798 		rs->sc_flags |= RAIDF_LABELLING;
   1799 
   1800 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1801 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1802 		if (error == 0) {
   1803 			if (cmd == DIOCWDINFO
   1804 #ifdef __HAVE_OLD_DISKLABEL
   1805 			    || cmd == ODIOCWDINFO
   1806 #endif
   1807 			   )
   1808 				error = writedisklabel(RAIDLABELDEV(dev),
   1809 				    raidstrategy, rs->sc_dkdev.dk_label,
   1810 				    rs->sc_dkdev.dk_cpulabel);
   1811 		}
   1812 		rs->sc_flags &= ~RAIDF_LABELLING;
   1813 
   1814 		raidunlock(rs);
   1815 
   1816 		if (error)
   1817 			return (error);
   1818 		break;
   1819 	}
   1820 
   1821 	case DIOCWLABEL:
   1822 		if (*(int *) data != 0)
   1823 			rs->sc_flags |= RAIDF_WLABEL;
   1824 		else
   1825 			rs->sc_flags &= ~RAIDF_WLABEL;
   1826 		break;
   1827 
   1828 	case DIOCGDEFLABEL:
   1829 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1830 		break;
   1831 
   1832 #ifdef __HAVE_OLD_DISKLABEL
   1833 	case ODIOCGDEFLABEL:
   1834 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1835 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1836 			return ENOTTY;
   1837 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1838 		break;
   1839 #endif
   1840 
   1841 	case DIOCAWEDGE:
   1842 	case DIOCDWEDGE:
   1843 	    	dkw = (void *)data;
   1844 
   1845 		/* If the ioctl happens here, the parent is us. */
   1846 		(void)strcpy(dkw->dkw_parent, rs->sc_xname);
   1847 		return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
   1848 
   1849 	case DIOCLWEDGES:
   1850 		return dkwedge_list(&rs->sc_dkdev,
   1851 		    (struct dkwedge_list *)data, l);
   1852 	case DIOCCACHESYNC:
   1853 		return rf_sync_component_caches(raidPtr);
   1854 	default:
   1855 		retcode = ENOTTY;
   1856 	}
   1857 	return (retcode);
   1858 
   1859 }
   1860 
   1861 
   1862 /* raidinit -- complete the rest of the initialization for the
   1863    RAIDframe device.  */
   1864 
   1865 
   1866 static void
   1867 raidinit(RF_Raid_t *raidPtr)
   1868 {
   1869 	struct cfdata *cf;
   1870 	struct raid_softc *rs;
   1871 	int     unit;
   1872 
   1873 	unit = raidPtr->raidid;
   1874 
   1875 	rs = &raid_softc[unit];
   1876 
   1877 	/* XXX should check return code first... */
   1878 	rs->sc_flags |= RAIDF_INITED;
   1879 
   1880 	/* XXX doesn't check bounds. */
   1881 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
   1882 
   1883 	/* attach the pseudo device */
   1884 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
   1885 	cf->cf_name = raid_cd.cd_name;
   1886 	cf->cf_atname = raid_cd.cd_name;
   1887 	cf->cf_unit = unit;
   1888 	cf->cf_fstate = FSTATE_STAR;
   1889 
   1890 	rs->sc_dev = config_attach_pseudo(cf);
   1891 
   1892 	if (rs->sc_dev==NULL) {
   1893 		printf("raid%d: config_attach_pseudo failed\n",
   1894 		       raidPtr->raidid);
   1895 	}
   1896 
   1897 	/* disk_attach actually creates space for the CPU disklabel, among
   1898 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1899 	 * with disklabels. */
   1900 
   1901 	disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
   1902 	disk_attach(&rs->sc_dkdev);
   1903 
   1904 	/* XXX There may be a weird interaction here between this, and
   1905 	 * protectedSectors, as used in RAIDframe.  */
   1906 
   1907 	rs->sc_size = raidPtr->totalSectors;
   1908 
   1909 	dkwedge_discover(&rs->sc_dkdev);
   1910 
   1911 	rf_set_properties(rs, raidPtr);
   1912 
   1913 }
   1914 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   1915 /* wake up the daemon & tell it to get us a spare table
   1916  * XXX
   1917  * the entries in the queues should be tagged with the raidPtr
   1918  * so that in the extremely rare case that two recons happen at once,
   1919  * we know for which device were requesting a spare table
   1920  * XXX
   1921  *
   1922  * XXX This code is not currently used. GO
   1923  */
   1924 int
   1925 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
   1926 {
   1927 	int     retcode;
   1928 
   1929 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1930 	req->next = rf_sparet_wait_queue;
   1931 	rf_sparet_wait_queue = req;
   1932 	wakeup(&rf_sparet_wait_queue);
   1933 
   1934 	/* mpsleep unlocks the mutex */
   1935 	while (!rf_sparet_resp_queue) {
   1936 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1937 		    "raidframe getsparetable", 0);
   1938 	}
   1939 	req = rf_sparet_resp_queue;
   1940 	rf_sparet_resp_queue = req->next;
   1941 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1942 
   1943 	retcode = req->fcol;
   1944 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1945 					 * alloc'd */
   1946 	return (retcode);
   1947 }
   1948 #endif
   1949 
   1950 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1951  * bp & passes it down.
   1952  * any calls originating in the kernel must use non-blocking I/O
   1953  * do some extra sanity checking to return "appropriate" error values for
   1954  * certain conditions (to make some standard utilities work)
   1955  *
   1956  * Formerly known as: rf_DoAccessKernel
   1957  */
   1958 void
   1959 raidstart(RF_Raid_t *raidPtr)
   1960 {
   1961 	RF_SectorCount_t num_blocks, pb, sum;
   1962 	RF_RaidAddr_t raid_addr;
   1963 	struct partition *pp;
   1964 	daddr_t blocknum;
   1965 	int     unit;
   1966 	struct raid_softc *rs;
   1967 	int     do_async;
   1968 	struct buf *bp;
   1969 	int rc;
   1970 
   1971 	unit = raidPtr->raidid;
   1972 	rs = &raid_softc[unit];
   1973 
   1974 	/* quick check to see if anything has died recently */
   1975 	RF_LOCK_MUTEX(raidPtr->mutex);
   1976 	if (raidPtr->numNewFailures > 0) {
   1977 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1978 		rf_update_component_labels(raidPtr,
   1979 					   RF_NORMAL_COMPONENT_UPDATE);
   1980 		RF_LOCK_MUTEX(raidPtr->mutex);
   1981 		raidPtr->numNewFailures--;
   1982 	}
   1983 
   1984 	/* Check to see if we're at the limit... */
   1985 	while (raidPtr->openings > 0) {
   1986 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1987 
   1988 		/* get the next item, if any, from the queue */
   1989 		if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) {
   1990 			/* nothing more to do */
   1991 			return;
   1992 		}
   1993 
   1994 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1995 		 * partition.. Need to make it absolute to the underlying
   1996 		 * device.. */
   1997 
   1998 		blocknum = bp->b_blkno;
   1999 		if (DISKPART(bp->b_dev) != RAW_PART) {
   2000 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   2001 			blocknum += pp->p_offset;
   2002 		}
   2003 
   2004 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   2005 			    (int) blocknum));
   2006 
   2007 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   2008 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   2009 
   2010 		/* *THIS* is where we adjust what block we're going to...
   2011 		 * but DO NOT TOUCH bp->b_blkno!!! */
   2012 		raid_addr = blocknum;
   2013 
   2014 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   2015 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   2016 		sum = raid_addr + num_blocks + pb;
   2017 		if (1 || rf_debugKernelAccess) {
   2018 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   2019 				    (int) raid_addr, (int) sum, (int) num_blocks,
   2020 				    (int) pb, (int) bp->b_resid));
   2021 		}
   2022 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   2023 		    || (sum < num_blocks) || (sum < pb)) {
   2024 			bp->b_error = ENOSPC;
   2025 			bp->b_resid = bp->b_bcount;
   2026 			biodone(bp);
   2027 			RF_LOCK_MUTEX(raidPtr->mutex);
   2028 			continue;
   2029 		}
   2030 		/*
   2031 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   2032 		 */
   2033 
   2034 		if (bp->b_bcount & raidPtr->sectorMask) {
   2035 			bp->b_error = EINVAL;
   2036 			bp->b_resid = bp->b_bcount;
   2037 			biodone(bp);
   2038 			RF_LOCK_MUTEX(raidPtr->mutex);
   2039 			continue;
   2040 
   2041 		}
   2042 		db1_printf(("Calling DoAccess..\n"));
   2043 
   2044 
   2045 		RF_LOCK_MUTEX(raidPtr->mutex);
   2046 		raidPtr->openings--;
   2047 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   2048 
   2049 		/*
   2050 		 * Everything is async.
   2051 		 */
   2052 		do_async = 1;
   2053 
   2054 		disk_busy(&rs->sc_dkdev);
   2055 
   2056 		/* XXX we're still at splbio() here... do we *really*
   2057 		   need to be? */
   2058 
   2059 		/* don't ever condition on bp->b_flags & B_WRITE.
   2060 		 * always condition on B_READ instead */
   2061 
   2062 		rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   2063 				 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   2064 				 do_async, raid_addr, num_blocks,
   2065 				 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   2066 
   2067 		if (rc) {
   2068 			bp->b_error = rc;
   2069 			bp->b_resid = bp->b_bcount;
   2070 			biodone(bp);
   2071 			/* continue loop */
   2072 		}
   2073 
   2074 		RF_LOCK_MUTEX(raidPtr->mutex);
   2075 	}
   2076 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   2077 }
   2078 
   2079 
   2080 
   2081 
   2082 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   2083 
   2084 int
   2085 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
   2086 {
   2087 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   2088 	struct buf *bp;
   2089 
   2090 	req->queue = queue;
   2091 
   2092 #if DIAGNOSTIC
   2093 	if (queue->raidPtr->raidid >= numraid) {
   2094 		printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
   2095 		    numraid);
   2096 		panic("Invalid Unit number in rf_DispatchKernelIO");
   2097 	}
   2098 #endif
   2099 
   2100 	bp = req->bp;
   2101 
   2102 	switch (req->type) {
   2103 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   2104 		/* XXX need to do something extra here.. */
   2105 		/* I'm leaving this in, as I've never actually seen it used,
   2106 		 * and I'd like folks to report it... GO */
   2107 		printf(("WAKEUP CALLED\n"));
   2108 		queue->numOutstanding++;
   2109 
   2110 		bp->b_flags = 0;
   2111 		bp->b_private = req;
   2112 
   2113 		KernelWakeupFunc(bp);
   2114 		break;
   2115 
   2116 	case RF_IO_TYPE_READ:
   2117 	case RF_IO_TYPE_WRITE:
   2118 #if RF_ACC_TRACE > 0
   2119 		if (req->tracerec) {
   2120 			RF_ETIMER_START(req->tracerec->timer);
   2121 		}
   2122 #endif
   2123 		InitBP(bp, queue->rf_cinfo->ci_vp,
   2124 		    op, queue->rf_cinfo->ci_dev,
   2125 		    req->sectorOffset, req->numSector,
   2126 		    req->buf, KernelWakeupFunc, (void *) req,
   2127 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   2128 
   2129 		if (rf_debugKernelAccess) {
   2130 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   2131 				(long) bp->b_blkno));
   2132 		}
   2133 		queue->numOutstanding++;
   2134 		queue->last_deq_sector = req->sectorOffset;
   2135 		/* acc wouldn't have been let in if there were any pending
   2136 		 * reqs at any other priority */
   2137 		queue->curPriority = req->priority;
   2138 
   2139 		db1_printf(("Going for %c to unit %d col %d\n",
   2140 			    req->type, queue->raidPtr->raidid,
   2141 			    queue->col));
   2142 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   2143 			(int) req->sectorOffset, (int) req->numSector,
   2144 			(int) (req->numSector <<
   2145 			    queue->raidPtr->logBytesPerSector),
   2146 			(int) queue->raidPtr->logBytesPerSector));
   2147 
   2148 		/*
   2149 		 * XXX: drop lock here since this can block at
   2150 		 * least with backing SCSI devices.  Retake it
   2151 		 * to minimize fuss with calling interfaces.
   2152 		 */
   2153 
   2154 		RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
   2155 		bdev_strategy(bp);
   2156 		RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
   2157 		break;
   2158 
   2159 	default:
   2160 		panic("bad req->type in rf_DispatchKernelIO");
   2161 	}
   2162 	db1_printf(("Exiting from DispatchKernelIO\n"));
   2163 
   2164 	return (0);
   2165 }
   2166 /* this is the callback function associated with a I/O invoked from
   2167    kernel code.
   2168  */
   2169 static void
   2170 KernelWakeupFunc(struct buf *bp)
   2171 {
   2172 	RF_DiskQueueData_t *req = NULL;
   2173 	RF_DiskQueue_t *queue;
   2174 	int s;
   2175 
   2176 	s = splbio();
   2177 	db1_printf(("recovering the request queue:\n"));
   2178 	req = bp->b_private;
   2179 
   2180 	queue = (RF_DiskQueue_t *) req->queue;
   2181 
   2182 #if RF_ACC_TRACE > 0
   2183 	if (req->tracerec) {
   2184 		RF_ETIMER_STOP(req->tracerec->timer);
   2185 		RF_ETIMER_EVAL(req->tracerec->timer);
   2186 		RF_LOCK_MUTEX(rf_tracing_mutex);
   2187 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2188 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2189 		req->tracerec->num_phys_ios++;
   2190 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   2191 	}
   2192 #endif
   2193 
   2194 	/* XXX Ok, let's get aggressive... If b_error is set, let's go
   2195 	 * ballistic, and mark the component as hosed... */
   2196 
   2197 	if (bp->b_error != 0) {
   2198 		/* Mark the disk as dead */
   2199 		/* but only mark it once... */
   2200 		/* and only if it wouldn't leave this RAID set
   2201 		   completely broken */
   2202 		if (((queue->raidPtr->Disks[queue->col].status ==
   2203 		      rf_ds_optimal) ||
   2204 		     (queue->raidPtr->Disks[queue->col].status ==
   2205 		      rf_ds_used_spare)) &&
   2206 		     (queue->raidPtr->numFailures <
   2207 		      queue->raidPtr->Layout.map->faultsTolerated)) {
   2208 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   2209 			       queue->raidPtr->raidid,
   2210 			       queue->raidPtr->Disks[queue->col].devname);
   2211 			queue->raidPtr->Disks[queue->col].status =
   2212 			    rf_ds_failed;
   2213 			queue->raidPtr->status = rf_rs_degraded;
   2214 			queue->raidPtr->numFailures++;
   2215 			queue->raidPtr->numNewFailures++;
   2216 		} else {	/* Disk is already dead... */
   2217 			/* printf("Disk already marked as dead!\n"); */
   2218 		}
   2219 
   2220 	}
   2221 
   2222 	/* Fill in the error value */
   2223 
   2224 	req->error = bp->b_error;
   2225 
   2226 	simple_lock(&queue->raidPtr->iodone_lock);
   2227 
   2228 	/* Drop this one on the "finished" queue... */
   2229 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   2230 
   2231 	/* Let the raidio thread know there is work to be done. */
   2232 	wakeup(&(queue->raidPtr->iodone));
   2233 
   2234 	simple_unlock(&queue->raidPtr->iodone_lock);
   2235 
   2236 	splx(s);
   2237 }
   2238 
   2239 
   2240 
   2241 /*
   2242  * initialize a buf structure for doing an I/O in the kernel.
   2243  */
   2244 static void
   2245 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
   2246        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
   2247        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
   2248        struct proc *b_proc)
   2249 {
   2250 	/* bp->b_flags       = B_PHYS | rw_flag; */
   2251 	bp->b_flags = rw_flag;	/* XXX need B_PHYS here too??? */
   2252 	bp->b_oflags = 0;
   2253 	bp->b_cflags = 0;
   2254 	bp->b_bcount = numSect << logBytesPerSector;
   2255 	bp->b_bufsize = bp->b_bcount;
   2256 	bp->b_error = 0;
   2257 	bp->b_dev = dev;
   2258 	bp->b_data = bf;
   2259 	bp->b_blkno = startSect;
   2260 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   2261 	if (bp->b_bcount == 0) {
   2262 		panic("bp->b_bcount is zero in InitBP!!");
   2263 	}
   2264 	bp->b_proc = b_proc;
   2265 	bp->b_iodone = cbFunc;
   2266 	bp->b_private = cbArg;
   2267 }
   2268 
   2269 static void
   2270 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
   2271 		    struct disklabel *lp)
   2272 {
   2273 	memset(lp, 0, sizeof(*lp));
   2274 
   2275 	/* fabricate a label... */
   2276 	lp->d_secperunit = raidPtr->totalSectors;
   2277 	lp->d_secsize = raidPtr->bytesPerSector;
   2278 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2279 	lp->d_ntracks = 4 * raidPtr->numCol;
   2280 	lp->d_ncylinders = raidPtr->totalSectors /
   2281 		(lp->d_nsectors * lp->d_ntracks);
   2282 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2283 
   2284 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2285 	lp->d_type = DTYPE_RAID;
   2286 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2287 	lp->d_rpm = 3600;
   2288 	lp->d_interleave = 1;
   2289 	lp->d_flags = 0;
   2290 
   2291 	lp->d_partitions[RAW_PART].p_offset = 0;
   2292 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2293 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2294 	lp->d_npartitions = RAW_PART + 1;
   2295 
   2296 	lp->d_magic = DISKMAGIC;
   2297 	lp->d_magic2 = DISKMAGIC;
   2298 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2299 
   2300 }
   2301 /*
   2302  * Read the disklabel from the raid device.  If one is not present, fake one
   2303  * up.
   2304  */
   2305 static void
   2306 raidgetdisklabel(dev_t dev)
   2307 {
   2308 	int     unit = raidunit(dev);
   2309 	struct raid_softc *rs = &raid_softc[unit];
   2310 	const char   *errstring;
   2311 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2312 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2313 	RF_Raid_t *raidPtr;
   2314 
   2315 	db1_printf(("Getting the disklabel...\n"));
   2316 
   2317 	memset(clp, 0, sizeof(*clp));
   2318 
   2319 	raidPtr = raidPtrs[unit];
   2320 
   2321 	raidgetdefaultlabel(raidPtr, rs, lp);
   2322 
   2323 	/*
   2324 	 * Call the generic disklabel extraction routine.
   2325 	 */
   2326 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2327 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2328 	if (errstring)
   2329 		raidmakedisklabel(rs);
   2330 	else {
   2331 		int     i;
   2332 		struct partition *pp;
   2333 
   2334 		/*
   2335 		 * Sanity check whether the found disklabel is valid.
   2336 		 *
   2337 		 * This is necessary since total size of the raid device
   2338 		 * may vary when an interleave is changed even though exactly
   2339 		 * same components are used, and old disklabel may used
   2340 		 * if that is found.
   2341 		 */
   2342 		if (lp->d_secperunit != rs->sc_size)
   2343 			printf("raid%d: WARNING: %s: "
   2344 			    "total sector size in disklabel (%" PRIu32 ") != "
   2345 			    "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
   2346 			    lp->d_secperunit, rs->sc_size);
   2347 		for (i = 0; i < lp->d_npartitions; i++) {
   2348 			pp = &lp->d_partitions[i];
   2349 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2350 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2351 				       "exceeds the size of raid (%" PRIu64 ")\n",
   2352 				       unit, rs->sc_xname, 'a' + i, rs->sc_size);
   2353 		}
   2354 	}
   2355 
   2356 }
   2357 /*
   2358  * Take care of things one might want to take care of in the event
   2359  * that a disklabel isn't present.
   2360  */
   2361 static void
   2362 raidmakedisklabel(struct raid_softc *rs)
   2363 {
   2364 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2365 	db1_printf(("Making a label..\n"));
   2366 
   2367 	/*
   2368 	 * For historical reasons, if there's no disklabel present
   2369 	 * the raw partition must be marked FS_BSDFFS.
   2370 	 */
   2371 
   2372 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2373 
   2374 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2375 
   2376 	lp->d_checksum = dkcksum(lp);
   2377 }
   2378 /*
   2379  * Wait interruptibly for an exclusive lock.
   2380  *
   2381  * XXX
   2382  * Several drivers do this; it should be abstracted and made MP-safe.
   2383  * (Hmm... where have we seen this warning before :->  GO )
   2384  */
   2385 static int
   2386 raidlock(struct raid_softc *rs)
   2387 {
   2388 	int     error;
   2389 
   2390 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2391 		rs->sc_flags |= RAIDF_WANTED;
   2392 		if ((error =
   2393 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2394 			return (error);
   2395 	}
   2396 	rs->sc_flags |= RAIDF_LOCKED;
   2397 	return (0);
   2398 }
   2399 /*
   2400  * Unlock and wake up any waiters.
   2401  */
   2402 static void
   2403 raidunlock(struct raid_softc *rs)
   2404 {
   2405 
   2406 	rs->sc_flags &= ~RAIDF_LOCKED;
   2407 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2408 		rs->sc_flags &= ~RAIDF_WANTED;
   2409 		wakeup(rs);
   2410 	}
   2411 }
   2412 
   2413 
   2414 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2415 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2416 #define RF_PARITY_MAP_OFFSET \
   2417 	(RF_COMPONENT_INFO_OFFSET + RF_COMPONENT_INFO_SIZE)
   2418 #define RF_PARITY_MAP_SIZE   RF_PARITYMAP_NBYTE
   2419 
   2420 int
   2421 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2422 {
   2423 	RF_ComponentLabel_t *clabel;
   2424 
   2425 	clabel = raidget_component_label(raidPtr, col);
   2426 	clabel->clean = RF_RAID_CLEAN;
   2427 	raidflush_component_label(raidPtr, col);
   2428 	return(0);
   2429 }
   2430 
   2431 
   2432 int
   2433 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2434 {
   2435 	RF_ComponentLabel_t *clabel;
   2436 
   2437 	clabel = raidget_component_label(raidPtr, col);
   2438 	clabel->clean = RF_RAID_DIRTY;
   2439 	raidflush_component_label(raidPtr, col);
   2440 	return(0);
   2441 }
   2442 
   2443 int
   2444 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2445 {
   2446 	return raidread_component_label(raidPtr->Disks[col].dev,
   2447 	    raidPtr->raid_cinfo[col].ci_vp,
   2448 	    &raidPtr->raid_cinfo[col].ci_label);
   2449 }
   2450 
   2451 RF_ComponentLabel_t *
   2452 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2453 {
   2454 	return &raidPtr->raid_cinfo[col].ci_label;
   2455 }
   2456 
   2457 int
   2458 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
   2459 {
   2460 	RF_ComponentLabel_t *label;
   2461 
   2462 	label = &raidPtr->raid_cinfo[col].ci_label;
   2463 	label->mod_counter = raidPtr->mod_counter;
   2464 #ifndef RF_NO_PARITY_MAP
   2465 	label->parity_map_modcount = label->mod_counter;
   2466 #endif
   2467 	return raidwrite_component_label(raidPtr->Disks[col].dev,
   2468 	    raidPtr->raid_cinfo[col].ci_vp, label);
   2469 }
   2470 
   2471 
   2472 static int
   2473 raidread_component_label(dev_t dev, struct vnode *b_vp,
   2474     RF_ComponentLabel_t *clabel)
   2475 {
   2476 	return raidread_component_area(dev, b_vp, clabel,
   2477 	    sizeof(RF_ComponentLabel_t),
   2478 	    RF_COMPONENT_INFO_OFFSET, RF_COMPONENT_INFO_SIZE);
   2479 }
   2480 
   2481 /* ARGSUSED */
   2482 static int
   2483 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
   2484     size_t msize, daddr_t offset, daddr_t dsize)
   2485 {
   2486 	struct buf *bp;
   2487 	const struct bdevsw *bdev;
   2488 	int error;
   2489 
   2490 	/* XXX should probably ensure that we don't try to do this if
   2491 	   someone has changed rf_protected_sectors. */
   2492 
   2493 	if (b_vp == NULL) {
   2494 		/* For whatever reason, this component is not valid.
   2495 		   Don't try to read a component label from it. */
   2496 		return(EINVAL);
   2497 	}
   2498 
   2499 	/* get a block of the appropriate size... */
   2500 	bp = geteblk((int)dsize);
   2501 	bp->b_dev = dev;
   2502 
   2503 	/* get our ducks in a row for the read */
   2504 	bp->b_blkno = offset / DEV_BSIZE;
   2505 	bp->b_bcount = dsize;
   2506 	bp->b_flags |= B_READ;
   2507  	bp->b_resid = dsize;
   2508 
   2509 	bdev = bdevsw_lookup(bp->b_dev);
   2510 	if (bdev == NULL)
   2511 		return (ENXIO);
   2512 	(*bdev->d_strategy)(bp);
   2513 
   2514 	error = biowait(bp);
   2515 
   2516 	if (!error) {
   2517 		memcpy(data, bp->b_data, msize);
   2518 	}
   2519 
   2520 	brelse(bp, 0);
   2521 	return(error);
   2522 }
   2523 
   2524 
   2525 static int
   2526 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
   2527 	RF_ComponentLabel_t *clabel)
   2528 {
   2529 	return raidwrite_component_area(dev, b_vp, clabel,
   2530 	    sizeof(RF_ComponentLabel_t),
   2531 	    RF_COMPONENT_INFO_OFFSET, RF_COMPONENT_INFO_SIZE, 0);
   2532 }
   2533 
   2534 /* ARGSUSED */
   2535 static int
   2536 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
   2537     size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
   2538 {
   2539 	struct buf *bp;
   2540 	const struct bdevsw *bdev;
   2541 	int error;
   2542 
   2543 	/* get a block of the appropriate size... */
   2544 	bp = geteblk((int)dsize);
   2545 	bp->b_dev = dev;
   2546 
   2547 	/* get our ducks in a row for the write */
   2548 	bp->b_blkno = offset / DEV_BSIZE;
   2549 	bp->b_bcount = dsize;
   2550 	bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
   2551  	bp->b_resid = dsize;
   2552 
   2553 	memset(bp->b_data, 0, dsize);
   2554 	memcpy(bp->b_data, data, msize);
   2555 
   2556 	bdev = bdevsw_lookup(bp->b_dev);
   2557 	if (bdev == NULL)
   2558 		return (ENXIO);
   2559 	(*bdev->d_strategy)(bp);
   2560 	if (asyncp)
   2561 		return 0;
   2562 	error = biowait(bp);
   2563 	brelse(bp, 0);
   2564 	if (error) {
   2565 #if 1
   2566 		printf("Failed to write RAID component info!\n");
   2567 #endif
   2568 	}
   2569 
   2570 	return(error);
   2571 }
   2572 
   2573 void
   2574 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
   2575 {
   2576 	int c;
   2577 
   2578 	for (c = 0; c < raidPtr->numCol; c++) {
   2579 		/* Skip dead disks. */
   2580 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
   2581 			continue;
   2582 		/* XXXjld: what if an error occurs here? */
   2583 		raidwrite_component_area(raidPtr->Disks[c].dev,
   2584 		    raidPtr->raid_cinfo[c].ci_vp, map,
   2585 		    RF_PARITYMAP_NBYTE,
   2586 		    RF_PARITY_MAP_OFFSET, RF_PARITY_MAP_SIZE, 0);
   2587 	}
   2588 }
   2589 
   2590 void
   2591 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
   2592 {
   2593 	struct rf_paritymap_ondisk tmp;
   2594 	int c,first;
   2595 
   2596 	first=1;
   2597 	for (c = 0; c < raidPtr->numCol; c++) {
   2598 		/* Skip dead disks. */
   2599 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
   2600 			continue;
   2601 		raidread_component_area(raidPtr->Disks[c].dev,
   2602 		    raidPtr->raid_cinfo[c].ci_vp, &tmp,
   2603 		    RF_PARITYMAP_NBYTE,
   2604 		    RF_PARITY_MAP_OFFSET, RF_PARITY_MAP_SIZE);
   2605 		if (first) {
   2606 			memcpy(map, &tmp, sizeof(*map));
   2607 			first = 0;
   2608 		} else {
   2609 			rf_paritymap_merge(map, &tmp);
   2610 		}
   2611 	}
   2612 }
   2613 
   2614 void
   2615 rf_markalldirty(RF_Raid_t *raidPtr)
   2616 {
   2617 	RF_ComponentLabel_t *clabel;
   2618 	int sparecol;
   2619 	int c;
   2620 	int j;
   2621 	int scol = -1;
   2622 
   2623 	raidPtr->mod_counter++;
   2624 	for (c = 0; c < raidPtr->numCol; c++) {
   2625 		/* we don't want to touch (at all) a disk that has
   2626 		   failed */
   2627 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   2628 			clabel = raidget_component_label(raidPtr, c);
   2629 			if (clabel->status == rf_ds_spared) {
   2630 				/* XXX do something special...
   2631 				   but whatever you do, don't
   2632 				   try to access it!! */
   2633 			} else {
   2634 				raidmarkdirty(raidPtr, c);
   2635 			}
   2636 		}
   2637 	}
   2638 
   2639 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2640 		sparecol = raidPtr->numCol + c;
   2641 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2642 			/*
   2643 
   2644 			   we claim this disk is "optimal" if it's
   2645 			   rf_ds_used_spare, as that means it should be
   2646 			   directly substitutable for the disk it replaced.
   2647 			   We note that too...
   2648 
   2649 			 */
   2650 
   2651 			for(j=0;j<raidPtr->numCol;j++) {
   2652 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2653 					scol = j;
   2654 					break;
   2655 				}
   2656 			}
   2657 
   2658 			clabel = raidget_component_label(raidPtr, sparecol);
   2659 			/* make sure status is noted */
   2660 
   2661 			raid_init_component_label(raidPtr, clabel);
   2662 
   2663 			clabel->row = 0;
   2664 			clabel->column = scol;
   2665 			/* Note: we *don't* change status from rf_ds_used_spare
   2666 			   to rf_ds_optimal */
   2667 			/* clabel.status = rf_ds_optimal; */
   2668 
   2669 			raidmarkdirty(raidPtr, sparecol);
   2670 		}
   2671 	}
   2672 }
   2673 
   2674 
   2675 void
   2676 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
   2677 {
   2678 	RF_ComponentLabel_t *clabel;
   2679 	int sparecol;
   2680 	int c;
   2681 	int j;
   2682 	int scol;
   2683 
   2684 	scol = -1;
   2685 
   2686 	/* XXX should do extra checks to make sure things really are clean,
   2687 	   rather than blindly setting the clean bit... */
   2688 
   2689 	raidPtr->mod_counter++;
   2690 
   2691 	for (c = 0; c < raidPtr->numCol; c++) {
   2692 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   2693 			clabel = raidget_component_label(raidPtr, c);
   2694 			/* make sure status is noted */
   2695 			clabel->status = rf_ds_optimal;
   2696 
   2697 			/* note what unit we are configured as */
   2698 			clabel->last_unit = raidPtr->raidid;
   2699 
   2700 			raidflush_component_label(raidPtr, c);
   2701 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2702 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2703 					raidmarkclean(raidPtr, c);
   2704 				}
   2705 			}
   2706 		}
   2707 		/* else we don't touch it.. */
   2708 	}
   2709 
   2710 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2711 		sparecol = raidPtr->numCol + c;
   2712 		/* Need to ensure that the reconstruct actually completed! */
   2713 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2714 			/*
   2715 
   2716 			   we claim this disk is "optimal" if it's
   2717 			   rf_ds_used_spare, as that means it should be
   2718 			   directly substitutable for the disk it replaced.
   2719 			   We note that too...
   2720 
   2721 			 */
   2722 
   2723 			for(j=0;j<raidPtr->numCol;j++) {
   2724 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2725 					scol = j;
   2726 					break;
   2727 				}
   2728 			}
   2729 
   2730 			/* XXX shouldn't *really* need this... */
   2731 			clabel = raidget_component_label(raidPtr, sparecol);
   2732 			/* make sure status is noted */
   2733 
   2734 			raid_init_component_label(raidPtr, clabel);
   2735 
   2736 			clabel->column = scol;
   2737 			clabel->status = rf_ds_optimal;
   2738 			clabel->last_unit = raidPtr->raidid;
   2739 
   2740 			raidflush_component_label(raidPtr, sparecol);
   2741 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2742 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2743 					raidmarkclean(raidPtr, sparecol);
   2744 				}
   2745 			}
   2746 		}
   2747 	}
   2748 }
   2749 
   2750 void
   2751 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
   2752 {
   2753 
   2754 	if (vp != NULL) {
   2755 		if (auto_configured == 1) {
   2756 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2757 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2758 			vput(vp);
   2759 
   2760 		} else {
   2761 			(void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
   2762 		}
   2763 	}
   2764 }
   2765 
   2766 
   2767 void
   2768 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
   2769 {
   2770 	int r,c;
   2771 	struct vnode *vp;
   2772 	int acd;
   2773 
   2774 
   2775 	/* We take this opportunity to close the vnodes like we should.. */
   2776 
   2777 	for (c = 0; c < raidPtr->numCol; c++) {
   2778 		vp = raidPtr->raid_cinfo[c].ci_vp;
   2779 		acd = raidPtr->Disks[c].auto_configured;
   2780 		rf_close_component(raidPtr, vp, acd);
   2781 		raidPtr->raid_cinfo[c].ci_vp = NULL;
   2782 		raidPtr->Disks[c].auto_configured = 0;
   2783 	}
   2784 
   2785 	for (r = 0; r < raidPtr->numSpare; r++) {
   2786 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
   2787 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
   2788 		rf_close_component(raidPtr, vp, acd);
   2789 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
   2790 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
   2791 	}
   2792 }
   2793 
   2794 
   2795 void
   2796 rf_ReconThread(struct rf_recon_req *req)
   2797 {
   2798 	int     s;
   2799 	RF_Raid_t *raidPtr;
   2800 
   2801 	s = splbio();
   2802 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2803 	raidPtr->recon_in_progress = 1;
   2804 
   2805 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
   2806 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2807 
   2808 	RF_Free(req, sizeof(*req));
   2809 
   2810 	raidPtr->recon_in_progress = 0;
   2811 	splx(s);
   2812 
   2813 	/* That's all... */
   2814 	kthread_exit(0);	/* does not return */
   2815 }
   2816 
   2817 void
   2818 rf_RewriteParityThread(RF_Raid_t *raidPtr)
   2819 {
   2820 	int retcode;
   2821 	int s;
   2822 
   2823 	raidPtr->parity_rewrite_stripes_done = 0;
   2824 	raidPtr->parity_rewrite_in_progress = 1;
   2825 	s = splbio();
   2826 	retcode = rf_RewriteParity(raidPtr);
   2827 	splx(s);
   2828 	if (retcode) {
   2829 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2830 	} else {
   2831 		/* set the clean bit!  If we shutdown correctly,
   2832 		   the clean bit on each component label will get
   2833 		   set */
   2834 		raidPtr->parity_good = RF_RAID_CLEAN;
   2835 	}
   2836 	raidPtr->parity_rewrite_in_progress = 0;
   2837 
   2838 	/* Anyone waiting for us to stop?  If so, inform them... */
   2839 	if (raidPtr->waitShutdown) {
   2840 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2841 	}
   2842 
   2843 	/* That's all... */
   2844 	kthread_exit(0);	/* does not return */
   2845 }
   2846 
   2847 
   2848 void
   2849 rf_CopybackThread(RF_Raid_t *raidPtr)
   2850 {
   2851 	int s;
   2852 
   2853 	raidPtr->copyback_in_progress = 1;
   2854 	s = splbio();
   2855 	rf_CopybackReconstructedData(raidPtr);
   2856 	splx(s);
   2857 	raidPtr->copyback_in_progress = 0;
   2858 
   2859 	/* That's all... */
   2860 	kthread_exit(0);	/* does not return */
   2861 }
   2862 
   2863 
   2864 void
   2865 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
   2866 {
   2867 	int s;
   2868 	RF_Raid_t *raidPtr;
   2869 
   2870 	s = splbio();
   2871 	raidPtr = req->raidPtr;
   2872 	raidPtr->recon_in_progress = 1;
   2873 	rf_ReconstructInPlace(raidPtr, req->col);
   2874 	RF_Free(req, sizeof(*req));
   2875 	raidPtr->recon_in_progress = 0;
   2876 	splx(s);
   2877 
   2878 	/* That's all... */
   2879 	kthread_exit(0);	/* does not return */
   2880 }
   2881 
   2882 static RF_AutoConfig_t *
   2883 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
   2884     const char *cname, RF_SectorCount_t size)
   2885 {
   2886 	int good_one = 0;
   2887 	RF_ComponentLabel_t *clabel;
   2888 	RF_AutoConfig_t *ac;
   2889 
   2890 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
   2891 	if (clabel == NULL) {
   2892 oomem:
   2893 		    while(ac_list) {
   2894 			    ac = ac_list;
   2895 			    if (ac->clabel)
   2896 				    free(ac->clabel, M_RAIDFRAME);
   2897 			    ac_list = ac_list->next;
   2898 			    free(ac, M_RAIDFRAME);
   2899 		    }
   2900 		    printf("RAID auto config: out of memory!\n");
   2901 		    return NULL; /* XXX probably should panic? */
   2902 	}
   2903 
   2904 	if (!raidread_component_label(dev, vp, clabel)) {
   2905 		    /* Got the label.  Does it look reasonable? */
   2906 		    if (rf_reasonable_label(clabel) &&
   2907 			(clabel->partitionSize <= size)) {
   2908 #ifdef DEBUG
   2909 			    printf("Component on: %s: %llu\n",
   2910 				cname, (unsigned long long)size);
   2911 			    rf_print_component_label(clabel);
   2912 #endif
   2913 			    /* if it's reasonable, add it, else ignore it. */
   2914 			    ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
   2915 				M_NOWAIT);
   2916 			    if (ac == NULL) {
   2917 				    free(clabel, M_RAIDFRAME);
   2918 				    goto oomem;
   2919 			    }
   2920 			    strlcpy(ac->devname, cname, sizeof(ac->devname));
   2921 			    ac->dev = dev;
   2922 			    ac->vp = vp;
   2923 			    ac->clabel = clabel;
   2924 			    ac->next = ac_list;
   2925 			    ac_list = ac;
   2926 			    good_one = 1;
   2927 		    }
   2928 	}
   2929 	if (!good_one) {
   2930 		/* cleanup */
   2931 		free(clabel, M_RAIDFRAME);
   2932 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2933 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2934 		vput(vp);
   2935 	}
   2936 	return ac_list;
   2937 }
   2938 
   2939 RF_AutoConfig_t *
   2940 rf_find_raid_components()
   2941 {
   2942 	struct vnode *vp;
   2943 	struct disklabel label;
   2944 	struct device *dv;
   2945 	dev_t dev;
   2946 	int bmajor, bminor, wedge;
   2947 	int error;
   2948 	int i;
   2949 	RF_AutoConfig_t *ac_list;
   2950 
   2951 
   2952 	/* initialize the AutoConfig list */
   2953 	ac_list = NULL;
   2954 
   2955 	/* we begin by trolling through *all* the devices on the system */
   2956 
   2957 	for (dv = alldevs.tqh_first; dv != NULL;
   2958 	     dv = dv->dv_list.tqe_next) {
   2959 
   2960 		/* we are only interested in disks... */
   2961 		if (device_class(dv) != DV_DISK)
   2962 			continue;
   2963 
   2964 		/* we don't care about floppies... */
   2965 		if (device_is_a(dv, "fd")) {
   2966 			continue;
   2967 		}
   2968 
   2969 		/* we don't care about CD's... */
   2970 		if (device_is_a(dv, "cd")) {
   2971 			continue;
   2972 		}
   2973 
   2974 		/* we don't care about md's... */
   2975 		if (device_is_a(dv, "md")) {
   2976 			continue;
   2977 		}
   2978 
   2979 		/* hdfd is the Atari/Hades floppy driver */
   2980 		if (device_is_a(dv, "hdfd")) {
   2981 			continue;
   2982 		}
   2983 
   2984 		/* fdisa is the Atari/Milan floppy driver */
   2985 		if (device_is_a(dv, "fdisa")) {
   2986 			continue;
   2987 		}
   2988 
   2989 		/* need to find the device_name_to_block_device_major stuff */
   2990 		bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
   2991 
   2992 		/* get a vnode for the raw partition of this disk */
   2993 
   2994 		wedge = device_is_a(dv, "dk");
   2995 		bminor = minor(device_unit(dv));
   2996 		dev = wedge ? makedev(bmajor, bminor) :
   2997 		    MAKEDISKDEV(bmajor, bminor, RAW_PART);
   2998 		if (bdevvp(dev, &vp))
   2999 			panic("RAID can't alloc vnode");
   3000 
   3001 		error = VOP_OPEN(vp, FREAD, NOCRED);
   3002 
   3003 		if (error) {
   3004 			/* "Who cares."  Continue looking
   3005 			   for something that exists*/
   3006 			vput(vp);
   3007 			continue;
   3008 		}
   3009 
   3010 		if (wedge) {
   3011 			struct dkwedge_info dkw;
   3012 			error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
   3013 			    NOCRED);
   3014 			if (error) {
   3015 				printf("RAIDframe: can't get wedge info for "
   3016 				    "dev %s (%d)\n", device_xname(dv), error);
   3017 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   3018 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3019 				vput(vp);
   3020 				continue;
   3021 			}
   3022 
   3023 			if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
   3024 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   3025 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3026 				vput(vp);
   3027 				continue;
   3028 			}
   3029 
   3030 			ac_list = rf_get_component(ac_list, dev, vp,
   3031 			    device_xname(dv), dkw.dkw_size);
   3032 			continue;
   3033 		}
   3034 
   3035 		/* Ok, the disk exists.  Go get the disklabel. */
   3036 		error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
   3037 		if (error) {
   3038 			/*
   3039 			 * XXX can't happen - open() would
   3040 			 * have errored out (or faked up one)
   3041 			 */
   3042 			if (error != ENOTTY)
   3043 				printf("RAIDframe: can't get label for dev "
   3044 				    "%s (%d)\n", device_xname(dv), error);
   3045 		}
   3046 
   3047 		/* don't need this any more.  We'll allocate it again
   3048 		   a little later if we really do... */
   3049 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   3050 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   3051 		vput(vp);
   3052 
   3053 		if (error)
   3054 			continue;
   3055 
   3056 		for (i = 0; i < label.d_npartitions; i++) {
   3057 			char cname[sizeof(ac_list->devname)];
   3058 
   3059 			/* We only support partitions marked as RAID */
   3060 			if (label.d_partitions[i].p_fstype != FS_RAID)
   3061 				continue;
   3062 
   3063 			dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
   3064 			if (bdevvp(dev, &vp))
   3065 				panic("RAID can't alloc vnode");
   3066 
   3067 			error = VOP_OPEN(vp, FREAD, NOCRED);
   3068 			if (error) {
   3069 				/* Whatever... */
   3070 				vput(vp);
   3071 				continue;
   3072 			}
   3073 			snprintf(cname, sizeof(cname), "%s%c",
   3074 			    device_xname(dv), 'a' + i);
   3075 			ac_list = rf_get_component(ac_list, dev, vp, cname,
   3076 				label.d_partitions[i].p_size);
   3077 		}
   3078 	}
   3079 	return ac_list;
   3080 }
   3081 
   3082 
   3083 static int
   3084 rf_reasonable_label(RF_ComponentLabel_t *clabel)
   3085 {
   3086 
   3087 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   3088 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   3089 	    ((clabel->clean == RF_RAID_CLEAN) ||
   3090 	     (clabel->clean == RF_RAID_DIRTY)) &&
   3091 	    clabel->row >=0 &&
   3092 	    clabel->column >= 0 &&
   3093 	    clabel->num_rows > 0 &&
   3094 	    clabel->num_columns > 0 &&
   3095 	    clabel->row < clabel->num_rows &&
   3096 	    clabel->column < clabel->num_columns &&
   3097 	    clabel->blockSize > 0 &&
   3098 	    clabel->numBlocks > 0) {
   3099 		/* label looks reasonable enough... */
   3100 		return(1);
   3101 	}
   3102 	return(0);
   3103 }
   3104 
   3105 
   3106 #ifdef DEBUG
   3107 void
   3108 rf_print_component_label(RF_ComponentLabel_t *clabel)
   3109 {
   3110 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   3111 	       clabel->row, clabel->column,
   3112 	       clabel->num_rows, clabel->num_columns);
   3113 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   3114 	       clabel->version, clabel->serial_number,
   3115 	       clabel->mod_counter);
   3116 	printf("   Clean: %s Status: %d\n",
   3117 	       clabel->clean ? "Yes" : "No", clabel->status );
   3118 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   3119 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   3120 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   3121 	       (char) clabel->parityConfig, clabel->blockSize,
   3122 	       clabel->numBlocks);
   3123 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   3124 	printf("   Contains root partition: %s\n",
   3125 	       clabel->root_partition ? "Yes" : "No" );
   3126 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   3127 #if 0
   3128 	   printf("   Config order: %d\n", clabel->config_order);
   3129 #endif
   3130 
   3131 }
   3132 #endif
   3133 
   3134 RF_ConfigSet_t *
   3135 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
   3136 {
   3137 	RF_AutoConfig_t *ac;
   3138 	RF_ConfigSet_t *config_sets;
   3139 	RF_ConfigSet_t *cset;
   3140 	RF_AutoConfig_t *ac_next;
   3141 
   3142 
   3143 	config_sets = NULL;
   3144 
   3145 	/* Go through the AutoConfig list, and figure out which components
   3146 	   belong to what sets.  */
   3147 	ac = ac_list;
   3148 	while(ac!=NULL) {
   3149 		/* we're going to putz with ac->next, so save it here
   3150 		   for use at the end of the loop */
   3151 		ac_next = ac->next;
   3152 
   3153 		if (config_sets == NULL) {
   3154 			/* will need at least this one... */
   3155 			config_sets = (RF_ConfigSet_t *)
   3156 				malloc(sizeof(RF_ConfigSet_t),
   3157 				       M_RAIDFRAME, M_NOWAIT);
   3158 			if (config_sets == NULL) {
   3159 				panic("rf_create_auto_sets: No memory!");
   3160 			}
   3161 			/* this one is easy :) */
   3162 			config_sets->ac = ac;
   3163 			config_sets->next = NULL;
   3164 			config_sets->rootable = 0;
   3165 			ac->next = NULL;
   3166 		} else {
   3167 			/* which set does this component fit into? */
   3168 			cset = config_sets;
   3169 			while(cset!=NULL) {
   3170 				if (rf_does_it_fit(cset, ac)) {
   3171 					/* looks like it matches... */
   3172 					ac->next = cset->ac;
   3173 					cset->ac = ac;
   3174 					break;
   3175 				}
   3176 				cset = cset->next;
   3177 			}
   3178 			if (cset==NULL) {
   3179 				/* didn't find a match above... new set..*/
   3180 				cset = (RF_ConfigSet_t *)
   3181 					malloc(sizeof(RF_ConfigSet_t),
   3182 					       M_RAIDFRAME, M_NOWAIT);
   3183 				if (cset == NULL) {
   3184 					panic("rf_create_auto_sets: No memory!");
   3185 				}
   3186 				cset->ac = ac;
   3187 				ac->next = NULL;
   3188 				cset->next = config_sets;
   3189 				cset->rootable = 0;
   3190 				config_sets = cset;
   3191 			}
   3192 		}
   3193 		ac = ac_next;
   3194 	}
   3195 
   3196 
   3197 	return(config_sets);
   3198 }
   3199 
   3200 static int
   3201 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
   3202 {
   3203 	RF_ComponentLabel_t *clabel1, *clabel2;
   3204 
   3205 	/* If this one matches the *first* one in the set, that's good
   3206 	   enough, since the other members of the set would have been
   3207 	   through here too... */
   3208 	/* note that we are not checking partitionSize here..
   3209 
   3210 	   Note that we are also not checking the mod_counters here.
   3211 	   If everything else matches execpt the mod_counter, that's
   3212 	   good enough for this test.  We will deal with the mod_counters
   3213 	   a little later in the autoconfiguration process.
   3214 
   3215 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   3216 
   3217 	   The reason we don't check for this is that failed disks
   3218 	   will have lower modification counts.  If those disks are
   3219 	   not added to the set they used to belong to, then they will
   3220 	   form their own set, which may result in 2 different sets,
   3221 	   for example, competing to be configured at raid0, and
   3222 	   perhaps competing to be the root filesystem set.  If the
   3223 	   wrong ones get configured, or both attempt to become /,
   3224 	   weird behaviour and or serious lossage will occur.  Thus we
   3225 	   need to bring them into the fold here, and kick them out at
   3226 	   a later point.
   3227 
   3228 	*/
   3229 
   3230 	clabel1 = cset->ac->clabel;
   3231 	clabel2 = ac->clabel;
   3232 	if ((clabel1->version == clabel2->version) &&
   3233 	    (clabel1->serial_number == clabel2->serial_number) &&
   3234 	    (clabel1->num_rows == clabel2->num_rows) &&
   3235 	    (clabel1->num_columns == clabel2->num_columns) &&
   3236 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   3237 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   3238 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   3239 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   3240 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   3241 	    (clabel1->blockSize == clabel2->blockSize) &&
   3242 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   3243 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   3244 	    (clabel1->root_partition == clabel2->root_partition) &&
   3245 	    (clabel1->last_unit == clabel2->last_unit) &&
   3246 	    (clabel1->config_order == clabel2->config_order)) {
   3247 		/* if it get's here, it almost *has* to be a match */
   3248 	} else {
   3249 		/* it's not consistent with somebody in the set..
   3250 		   punt */
   3251 		return(0);
   3252 	}
   3253 	/* all was fine.. it must fit... */
   3254 	return(1);
   3255 }
   3256 
   3257 int
   3258 rf_have_enough_components(RF_ConfigSet_t *cset)
   3259 {
   3260 	RF_AutoConfig_t *ac;
   3261 	RF_AutoConfig_t *auto_config;
   3262 	RF_ComponentLabel_t *clabel;
   3263 	int c;
   3264 	int num_cols;
   3265 	int num_missing;
   3266 	int mod_counter;
   3267 	int mod_counter_found;
   3268 	int even_pair_failed;
   3269 	char parity_type;
   3270 
   3271 
   3272 	/* check to see that we have enough 'live' components
   3273 	   of this set.  If so, we can configure it if necessary */
   3274 
   3275 	num_cols = cset->ac->clabel->num_columns;
   3276 	parity_type = cset->ac->clabel->parityConfig;
   3277 
   3278 	/* XXX Check for duplicate components!?!?!? */
   3279 
   3280 	/* Determine what the mod_counter is supposed to be for this set. */
   3281 
   3282 	mod_counter_found = 0;
   3283 	mod_counter = 0;
   3284 	ac = cset->ac;
   3285 	while(ac!=NULL) {
   3286 		if (mod_counter_found==0) {
   3287 			mod_counter = ac->clabel->mod_counter;
   3288 			mod_counter_found = 1;
   3289 		} else {
   3290 			if (ac->clabel->mod_counter > mod_counter) {
   3291 				mod_counter = ac->clabel->mod_counter;
   3292 			}
   3293 		}
   3294 		ac = ac->next;
   3295 	}
   3296 
   3297 	num_missing = 0;
   3298 	auto_config = cset->ac;
   3299 
   3300 	even_pair_failed = 0;
   3301 	for(c=0; c<num_cols; c++) {
   3302 		ac = auto_config;
   3303 		while(ac!=NULL) {
   3304 			if ((ac->clabel->column == c) &&
   3305 			    (ac->clabel->mod_counter == mod_counter)) {
   3306 				/* it's this one... */
   3307 #ifdef DEBUG
   3308 				printf("Found: %s at %d\n",
   3309 				       ac->devname,c);
   3310 #endif
   3311 				break;
   3312 			}
   3313 			ac=ac->next;
   3314 		}
   3315 		if (ac==NULL) {
   3316 				/* Didn't find one here! */
   3317 				/* special case for RAID 1, especially
   3318 				   where there are more than 2
   3319 				   components (where RAIDframe treats
   3320 				   things a little differently :( ) */
   3321 			if (parity_type == '1') {
   3322 				if (c%2 == 0) { /* even component */
   3323 					even_pair_failed = 1;
   3324 				} else { /* odd component.  If
   3325 					    we're failed, and
   3326 					    so is the even
   3327 					    component, it's
   3328 					    "Good Night, Charlie" */
   3329 					if (even_pair_failed == 1) {
   3330 						return(0);
   3331 					}
   3332 				}
   3333 			} else {
   3334 				/* normal accounting */
   3335 				num_missing++;
   3336 			}
   3337 		}
   3338 		if ((parity_type == '1') && (c%2 == 1)) {
   3339 				/* Just did an even component, and we didn't
   3340 				   bail.. reset the even_pair_failed flag,
   3341 				   and go on to the next component.... */
   3342 			even_pair_failed = 0;
   3343 		}
   3344 	}
   3345 
   3346 	clabel = cset->ac->clabel;
   3347 
   3348 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3349 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3350 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3351 		/* XXX this needs to be made *much* more general */
   3352 		/* Too many failures */
   3353 		return(0);
   3354 	}
   3355 	/* otherwise, all is well, and we've got enough to take a kick
   3356 	   at autoconfiguring this set */
   3357 	return(1);
   3358 }
   3359 
   3360 void
   3361 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
   3362 			RF_Raid_t *raidPtr)
   3363 {
   3364 	RF_ComponentLabel_t *clabel;
   3365 	int i;
   3366 
   3367 	clabel = ac->clabel;
   3368 
   3369 	/* 1. Fill in the common stuff */
   3370 	config->numRow = clabel->num_rows = 1;
   3371 	config->numCol = clabel->num_columns;
   3372 	config->numSpare = 0; /* XXX should this be set here? */
   3373 	config->sectPerSU = clabel->sectPerSU;
   3374 	config->SUsPerPU = clabel->SUsPerPU;
   3375 	config->SUsPerRU = clabel->SUsPerRU;
   3376 	config->parityConfig = clabel->parityConfig;
   3377 	/* XXX... */
   3378 	strcpy(config->diskQueueType,"fifo");
   3379 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3380 	config->layoutSpecificSize = 0; /* XXX ?? */
   3381 
   3382 	while(ac!=NULL) {
   3383 		/* row/col values will be in range due to the checks
   3384 		   in reasonable_label() */
   3385 		strcpy(config->devnames[0][ac->clabel->column],
   3386 		       ac->devname);
   3387 		ac = ac->next;
   3388 	}
   3389 
   3390 	for(i=0;i<RF_MAXDBGV;i++) {
   3391 		config->debugVars[i][0] = 0;
   3392 	}
   3393 }
   3394 
   3395 int
   3396 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
   3397 {
   3398 	RF_ComponentLabel_t *clabel;
   3399 	int column;
   3400 	int sparecol;
   3401 
   3402 	raidPtr->autoconfigure = new_value;
   3403 
   3404 	for(column=0; column<raidPtr->numCol; column++) {
   3405 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3406 			clabel = raidget_component_label(raidPtr, column);
   3407 			clabel->autoconfigure = new_value;
   3408 			raidflush_component_label(raidPtr, column);
   3409 		}
   3410 	}
   3411 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3412 		sparecol = raidPtr->numCol + column;
   3413 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3414 			clabel = raidget_component_label(raidPtr, sparecol);
   3415 			clabel->autoconfigure = new_value;
   3416 			raidflush_component_label(raidPtr, sparecol);
   3417 		}
   3418 	}
   3419 	return(new_value);
   3420 }
   3421 
   3422 int
   3423 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
   3424 {
   3425 	RF_ComponentLabel_t *clabel;
   3426 	int column;
   3427 	int sparecol;
   3428 
   3429 	raidPtr->root_partition = new_value;
   3430 	for(column=0; column<raidPtr->numCol; column++) {
   3431 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3432 			clabel = raidget_component_label(raidPtr, column);
   3433 			clabel->root_partition = new_value;
   3434 			raidflush_component_label(raidPtr, column);
   3435 		}
   3436 	}
   3437 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3438 		sparecol = raidPtr->numCol + column;
   3439 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3440 			clabel = raidget_component_label(raidPtr, sparecol);
   3441 			clabel->root_partition = new_value;
   3442 			raidflush_component_label(raidPtr, sparecol);
   3443 		}
   3444 	}
   3445 	return(new_value);
   3446 }
   3447 
   3448 void
   3449 rf_release_all_vps(RF_ConfigSet_t *cset)
   3450 {
   3451 	RF_AutoConfig_t *ac;
   3452 
   3453 	ac = cset->ac;
   3454 	while(ac!=NULL) {
   3455 		/* Close the vp, and give it back */
   3456 		if (ac->vp) {
   3457 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3458 			VOP_CLOSE(ac->vp, FREAD, NOCRED);
   3459 			vput(ac->vp);
   3460 			ac->vp = NULL;
   3461 		}
   3462 		ac = ac->next;
   3463 	}
   3464 }
   3465 
   3466 
   3467 void
   3468 rf_cleanup_config_set(RF_ConfigSet_t *cset)
   3469 {
   3470 	RF_AutoConfig_t *ac;
   3471 	RF_AutoConfig_t *next_ac;
   3472 
   3473 	ac = cset->ac;
   3474 	while(ac!=NULL) {
   3475 		next_ac = ac->next;
   3476 		/* nuke the label */
   3477 		free(ac->clabel, M_RAIDFRAME);
   3478 		/* cleanup the config structure */
   3479 		free(ac, M_RAIDFRAME);
   3480 		/* "next.." */
   3481 		ac = next_ac;
   3482 	}
   3483 	/* and, finally, nuke the config set */
   3484 	free(cset, M_RAIDFRAME);
   3485 }
   3486 
   3487 
   3488 void
   3489 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   3490 {
   3491 	/* current version number */
   3492 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3493 	clabel->serial_number = raidPtr->serial_number;
   3494 	clabel->mod_counter = raidPtr->mod_counter;
   3495 
   3496 	clabel->num_rows = 1;
   3497 	clabel->num_columns = raidPtr->numCol;
   3498 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3499 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3500 
   3501 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3502 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3503 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3504 
   3505 	clabel->blockSize = raidPtr->bytesPerSector;
   3506 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3507 
   3508 	/* XXX not portable */
   3509 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3510 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3511 	clabel->autoconfigure = raidPtr->autoconfigure;
   3512 	clabel->root_partition = raidPtr->root_partition;
   3513 	clabel->last_unit = raidPtr->raidid;
   3514 	clabel->config_order = raidPtr->config_order;
   3515 
   3516 #ifndef RF_NO_PARITY_MAP
   3517 	rf_paritymap_init_label(raidPtr->parity_map, clabel);
   3518 #endif
   3519 }
   3520 
   3521 int
   3522 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
   3523 {
   3524 	RF_Raid_t *raidPtr;
   3525 	RF_Config_t *config;
   3526 	int raidID;
   3527 	int retcode;
   3528 
   3529 #ifdef DEBUG
   3530 	printf("RAID autoconfigure\n");
   3531 #endif
   3532 
   3533 	retcode = 0;
   3534 	*unit = -1;
   3535 
   3536 	/* 1. Create a config structure */
   3537 
   3538 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3539 				       M_RAIDFRAME,
   3540 				       M_NOWAIT);
   3541 	if (config==NULL) {
   3542 		printf("Out of mem!?!?\n");
   3543 				/* XXX do something more intelligent here. */
   3544 		return(1);
   3545 	}
   3546 
   3547 	memset(config, 0, sizeof(RF_Config_t));
   3548 
   3549 	/*
   3550 	   2. Figure out what RAID ID this one is supposed to live at
   3551 	   See if we can get the same RAID dev that it was configured
   3552 	   on last time..
   3553 	*/
   3554 
   3555 	raidID = cset->ac->clabel->last_unit;
   3556 	if ((raidID < 0) || (raidID >= numraid)) {
   3557 		/* let's not wander off into lala land. */
   3558 		raidID = numraid - 1;
   3559 	}
   3560 	if (raidPtrs[raidID]->valid != 0) {
   3561 
   3562 		/*
   3563 		   Nope... Go looking for an alternative...
   3564 		   Start high so we don't immediately use raid0 if that's
   3565 		   not taken.
   3566 		*/
   3567 
   3568 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3569 			if (raidPtrs[raidID]->valid == 0) {
   3570 				/* can use this one! */
   3571 				break;
   3572 			}
   3573 		}
   3574 	}
   3575 
   3576 	if (raidID < 0) {
   3577 		/* punt... */
   3578 		printf("Unable to auto configure this set!\n");
   3579 		printf("(Out of RAID devs!)\n");
   3580 		free(config, M_RAIDFRAME);
   3581 		return(1);
   3582 	}
   3583 
   3584 #ifdef DEBUG
   3585 	printf("Configuring raid%d:\n",raidID);
   3586 #endif
   3587 
   3588 	raidPtr = raidPtrs[raidID];
   3589 
   3590 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3591 	raidPtr->raidid = raidID;
   3592 	raidPtr->openings = RAIDOUTSTANDING;
   3593 
   3594 	/* 3. Build the configuration structure */
   3595 	rf_create_configuration(cset->ac, config, raidPtr);
   3596 
   3597 	/* 4. Do the configuration */
   3598 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3599 
   3600 	if (retcode == 0) {
   3601 
   3602 		raidinit(raidPtrs[raidID]);
   3603 
   3604 		rf_markalldirty(raidPtrs[raidID]);
   3605 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3606 		if (cset->ac->clabel->root_partition==1) {
   3607 			/* everything configured just fine.  Make a note
   3608 			   that this set is eligible to be root. */
   3609 			cset->rootable = 1;
   3610 			/* XXX do this here? */
   3611 			raidPtrs[raidID]->root_partition = 1;
   3612 		}
   3613 	}
   3614 
   3615 	/* 5. Cleanup */
   3616 	free(config, M_RAIDFRAME);
   3617 
   3618 	*unit = raidID;
   3619 	return(retcode);
   3620 }
   3621 
   3622 void
   3623 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
   3624 {
   3625 	struct buf *bp;
   3626 
   3627 	bp = (struct buf *)desc->bp;
   3628 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3629 	    (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
   3630 }
   3631 
   3632 void
   3633 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
   3634 	     size_t xmin, size_t xmax)
   3635 {
   3636 	pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
   3637 	pool_sethiwat(p, xmax);
   3638 	pool_prime(p, xmin);
   3639 	pool_setlowat(p, xmin);
   3640 }
   3641 
   3642 /*
   3643  * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
   3644  * if there is IO pending and if that IO could possibly be done for a
   3645  * given RAID set.  Returns 0 if IO is waiting and can be done, 1
   3646  * otherwise.
   3647  *
   3648  */
   3649 
   3650 int
   3651 rf_buf_queue_check(int raidid)
   3652 {
   3653 	if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) &&
   3654 	    raidPtrs[raidid]->openings > 0) {
   3655 		/* there is work to do */
   3656 		return 0;
   3657 	}
   3658 	/* default is nothing to do */
   3659 	return 1;
   3660 }
   3661 
   3662 int
   3663 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
   3664 {
   3665 	struct partinfo dpart;
   3666 	struct dkwedge_info dkw;
   3667 	int error;
   3668 
   3669 	error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred);
   3670 	if (error == 0) {
   3671 		diskPtr->blockSize = dpart.disklab->d_secsize;
   3672 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
   3673 		diskPtr->partitionSize = dpart.part->p_size;
   3674 		return 0;
   3675 	}
   3676 
   3677 	error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred);
   3678 	if (error == 0) {
   3679 		diskPtr->blockSize = 512;	/* XXX */
   3680 		diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
   3681 		diskPtr->partitionSize = dkw.dkw_size;
   3682 		return 0;
   3683 	}
   3684 	return error;
   3685 }
   3686 
   3687 static int
   3688 raid_match(struct device *self, struct cfdata *cfdata,
   3689     void *aux)
   3690 {
   3691 	return 1;
   3692 }
   3693 
   3694 static void
   3695 raid_attach(struct device *parent, struct device *self,
   3696     void *aux)
   3697 {
   3698 
   3699 }
   3700 
   3701 
   3702 static int
   3703 raid_detach(struct device *self, int flags)
   3704 {
   3705 	struct raid_softc *rs = (struct raid_softc *)self;
   3706 
   3707 	if (rs->sc_flags & RAIDF_INITED)
   3708 		return EBUSY;
   3709 
   3710 	return 0;
   3711 }
   3712 
   3713 static void
   3714 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
   3715 {
   3716 	prop_dictionary_t disk_info, odisk_info, geom;
   3717 	disk_info = prop_dictionary_create();
   3718 	geom = prop_dictionary_create();
   3719 	prop_dictionary_set_uint64(geom, "sectors-per-unit",
   3720 				   raidPtr->totalSectors);
   3721 	prop_dictionary_set_uint32(geom, "sector-size",
   3722 				   raidPtr->bytesPerSector);
   3723 
   3724 	prop_dictionary_set_uint16(geom, "sectors-per-track",
   3725 				   raidPtr->Layout.dataSectorsPerStripe);
   3726 	prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
   3727 				   4 * raidPtr->numCol);
   3728 
   3729 	prop_dictionary_set_uint64(geom, "cylinders-per-unit",
   3730 	   raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
   3731 	   (4 * raidPtr->numCol)));
   3732 
   3733 	prop_dictionary_set(disk_info, "geometry", geom);
   3734 	prop_object_release(geom);
   3735 	prop_dictionary_set(device_properties(rs->sc_dev),
   3736 			    "disk-info", disk_info);
   3737 	odisk_info = rs->sc_dkdev.dk_info;
   3738 	rs->sc_dkdev.dk_info = disk_info;
   3739 	if (odisk_info)
   3740 		prop_object_release(odisk_info);
   3741 }
   3742 
   3743 /*
   3744  * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
   3745  * We end up returning whatever error was returned by the first cache flush
   3746  * that fails.
   3747  */
   3748 
   3749 int
   3750 rf_sync_component_caches(RF_Raid_t *raidPtr)
   3751 {
   3752 	int c, sparecol;
   3753 	int e,error;
   3754 	int force = 1;
   3755 
   3756 	error = 0;
   3757 	for (c = 0; c < raidPtr->numCol; c++) {
   3758 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   3759 			e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
   3760 					  &force, FWRITE, NOCRED);
   3761 			if (e) {
   3762 				if (e != ENODEV)
   3763 					printf("raid%d: cache flush to component %s failed.\n",
   3764 					       raidPtr->raidid, raidPtr->Disks[c].devname);
   3765 				if (error == 0) {
   3766 					error = e;
   3767 				}
   3768 			}
   3769 		}
   3770 	}
   3771 
   3772 	for( c = 0; c < raidPtr->numSpare ; c++) {
   3773 		sparecol = raidPtr->numCol + c;
   3774 		/* Need to ensure that the reconstruct actually completed! */
   3775 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3776 			e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
   3777 					  DIOCCACHESYNC, &force, FWRITE, NOCRED);
   3778 			if (e) {
   3779 				if (e != ENODEV)
   3780 					printf("raid%d: cache flush to component %s failed.\n",
   3781 					       raidPtr->raidid, raidPtr->Disks[sparecol].devname);
   3782 				if (error == 0) {
   3783 					error = e;
   3784 				}
   3785 			}
   3786 		}
   3787 	}
   3788 	return error;
   3789 }
   3790