Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.267
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.267 2009/10/13 22:46:28 pooka Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 /*
     32  * Copyright (c) 1990, 1993
     33  *      The Regents of the University of California.  All rights reserved.
     34  *
     35  * This code is derived from software contributed to Berkeley by
     36  * the Systems Programming Group of the University of Utah Computer
     37  * Science Department.
     38  *
     39  * Redistribution and use in source and binary forms, with or without
     40  * modification, are permitted provided that the following conditions
     41  * are met:
     42  * 1. Redistributions of source code must retain the above copyright
     43  *    notice, this list of conditions and the following disclaimer.
     44  * 2. Redistributions in binary form must reproduce the above copyright
     45  *    notice, this list of conditions and the following disclaimer in the
     46  *    documentation and/or other materials provided with the distribution.
     47  * 3. Neither the name of the University nor the names of its contributors
     48  *    may be used to endorse or promote products derived from this software
     49  *    without specific prior written permission.
     50  *
     51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     61  * SUCH DAMAGE.
     62  *
     63  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     64  *
     65  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     66  */
     67 
     68 /*
     69  * Copyright (c) 1988 University of Utah.
     70  *
     71  * This code is derived from software contributed to Berkeley by
     72  * the Systems Programming Group of the University of Utah Computer
     73  * Science Department.
     74  *
     75  * Redistribution and use in source and binary forms, with or without
     76  * modification, are permitted provided that the following conditions
     77  * are met:
     78  * 1. Redistributions of source code must retain the above copyright
     79  *    notice, this list of conditions and the following disclaimer.
     80  * 2. Redistributions in binary form must reproduce the above copyright
     81  *    notice, this list of conditions and the following disclaimer in the
     82  *    documentation and/or other materials provided with the distribution.
     83  * 3. All advertising materials mentioning features or use of this software
     84  *    must display the following acknowledgement:
     85  *      This product includes software developed by the University of
     86  *      California, Berkeley and its contributors.
     87  * 4. Neither the name of the University nor the names of its contributors
     88  *    may be used to endorse or promote products derived from this software
     89  *    without specific prior written permission.
     90  *
     91  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     92  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     93  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     94  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     95  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     96  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     97  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     98  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     99  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    100  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    101  * SUCH DAMAGE.
    102  *
    103  * from: Utah $Hdr: cd.c 1.6 90/11/28$
    104  *
    105  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
    106  */
    107 
    108 /*
    109  * Copyright (c) 1995 Carnegie-Mellon University.
    110  * All rights reserved.
    111  *
    112  * Authors: Mark Holland, Jim Zelenka
    113  *
    114  * Permission to use, copy, modify and distribute this software and
    115  * its documentation is hereby granted, provided that both the copyright
    116  * notice and this permission notice appear in all copies of the
    117  * software, derivative works or modified versions, and any portions
    118  * thereof, and that both notices appear in supporting documentation.
    119  *
    120  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
    121  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
    122  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
    123  *
    124  * Carnegie Mellon requests users of this software to return to
    125  *
    126  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    127  *  School of Computer Science
    128  *  Carnegie Mellon University
    129  *  Pittsburgh PA 15213-3890
    130  *
    131  * any improvements or extensions that they make and grant Carnegie the
    132  * rights to redistribute these changes.
    133  */
    134 
    135 /***********************************************************
    136  *
    137  * rf_kintf.c -- the kernel interface routines for RAIDframe
    138  *
    139  ***********************************************************/
    140 
    141 #include <sys/cdefs.h>
    142 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.267 2009/10/13 22:46:28 pooka Exp $");
    143 
    144 #ifdef _KERNEL_OPT
    145 #include "opt_compat_netbsd.h"
    146 #include "opt_raid_autoconfig.h"
    147 #include "raid.h"
    148 #endif
    149 
    150 #include <sys/param.h>
    151 #include <sys/errno.h>
    152 #include <sys/pool.h>
    153 #include <sys/proc.h>
    154 #include <sys/queue.h>
    155 #include <sys/disk.h>
    156 #include <sys/device.h>
    157 #include <sys/stat.h>
    158 #include <sys/ioctl.h>
    159 #include <sys/fcntl.h>
    160 #include <sys/systm.h>
    161 #include <sys/vnode.h>
    162 #include <sys/disklabel.h>
    163 #include <sys/conf.h>
    164 #include <sys/buf.h>
    165 #include <sys/bufq.h>
    166 #include <sys/reboot.h>
    167 #include <sys/kauth.h>
    168 
    169 #include <prop/proplib.h>
    170 
    171 #include <dev/raidframe/raidframevar.h>
    172 #include <dev/raidframe/raidframeio.h>
    173 
    174 #include "rf_raid.h"
    175 #include "rf_copyback.h"
    176 #include "rf_dag.h"
    177 #include "rf_dagflags.h"
    178 #include "rf_desc.h"
    179 #include "rf_diskqueue.h"
    180 #include "rf_etimer.h"
    181 #include "rf_general.h"
    182 #include "rf_kintf.h"
    183 #include "rf_options.h"
    184 #include "rf_driver.h"
    185 #include "rf_parityscan.h"
    186 #include "rf_threadstuff.h"
    187 
    188 #ifdef COMPAT_50
    189 #include "rf_compat50.h"
    190 #endif
    191 
    192 #ifdef DEBUG
    193 int     rf_kdebug_level = 0;
    194 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    195 #else				/* DEBUG */
    196 #define db1_printf(a) { }
    197 #endif				/* DEBUG */
    198 
    199 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    200 
    201 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
    202 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    203 
    204 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    205 						 * spare table */
    206 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    207 						 * installation process */
    208 #endif
    209 
    210 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
    211 
    212 /* prototypes */
    213 static void KernelWakeupFunc(struct buf *);
    214 static void InitBP(struct buf *, struct vnode *, unsigned,
    215     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
    216     void *, int, struct proc *);
    217 static void raidinit(RF_Raid_t *);
    218 
    219 void raidattach(int);
    220 static int raid_match(device_t, cfdata_t, void *);
    221 static void raid_attach(device_t, device_t, void *);
    222 static int raid_detach(device_t, int);
    223 
    224 dev_type_open(raidopen);
    225 dev_type_close(raidclose);
    226 dev_type_read(raidread);
    227 dev_type_write(raidwrite);
    228 dev_type_ioctl(raidioctl);
    229 dev_type_strategy(raidstrategy);
    230 dev_type_dump(raiddump);
    231 dev_type_size(raidsize);
    232 
    233 const struct bdevsw raid_bdevsw = {
    234 	raidopen, raidclose, raidstrategy, raidioctl,
    235 	raiddump, raidsize, D_DISK
    236 };
    237 
    238 const struct cdevsw raid_cdevsw = {
    239 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    240 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
    241 };
    242 
    243 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
    244 
    245 /* XXX Not sure if the following should be replacing the raidPtrs above,
    246    or if it should be used in conjunction with that...
    247 */
    248 
    249 struct raid_softc {
    250 	device_t sc_dev;
    251 	int     sc_flags;	/* flags */
    252 	int     sc_cflags;	/* configuration flags */
    253 	uint64_t sc_size;	/* size of the raid device */
    254 	char    sc_xname[20];	/* XXX external name */
    255 	struct disk sc_dkdev;	/* generic disk device info */
    256 	struct bufq_state *buf_queue;	/* used for the device queue */
    257 };
    258 /* sc_flags */
    259 #define RAIDF_INITED	0x01	/* unit has been initialized */
    260 #define RAIDF_WLABEL	0x02	/* label area is writable */
    261 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    262 #define RAIDF_SHUTDOWN	0x08	/* unit is being shutdown */
    263 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    264 #define RAIDF_LOCKED	0x80	/* unit is locked */
    265 
    266 #define	raidunit(x)	DISKUNIT(x)
    267 int numraid = 0;
    268 
    269 extern struct cfdriver raid_cd;
    270 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
    271     raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
    272     DVF_DETACH_SHUTDOWN);
    273 
    274 /*
    275  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    276  * Be aware that large numbers can allow the driver to consume a lot of
    277  * kernel memory, especially on writes, and in degraded mode reads.
    278  *
    279  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    280  * a single 64K write will typically require 64K for the old data,
    281  * 64K for the old parity, and 64K for the new parity, for a total
    282  * of 192K (if the parity buffer is not re-used immediately).
    283  * Even it if is used immediately, that's still 128K, which when multiplied
    284  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    285  *
    286  * Now in degraded mode, for example, a 64K read on the above setup may
    287  * require data reconstruction, which will require *all* of the 4 remaining
    288  * disks to participate -- 4 * 32K/disk == 128K again.
    289  */
    290 
    291 #ifndef RAIDOUTSTANDING
    292 #define RAIDOUTSTANDING   6
    293 #endif
    294 
    295 #define RAIDLABELDEV(dev)	\
    296 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    297 
    298 /* declared here, and made public, for the benefit of KVM stuff.. */
    299 struct raid_softc *raid_softc;
    300 
    301 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    302 				     struct disklabel *);
    303 static void raidgetdisklabel(dev_t);
    304 static void raidmakedisklabel(struct raid_softc *);
    305 
    306 static int raidlock(struct raid_softc *);
    307 static void raidunlock(struct raid_softc *);
    308 
    309 static int raid_detach_unlocked(struct raid_softc *);
    310 
    311 static void rf_markalldirty(RF_Raid_t *);
    312 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
    313 
    314 void rf_ReconThread(struct rf_recon_req *);
    315 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    316 void rf_CopybackThread(RF_Raid_t *raidPtr);
    317 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    318 int rf_autoconfig(device_t);
    319 void rf_buildroothack(RF_ConfigSet_t *);
    320 
    321 RF_AutoConfig_t *rf_find_raid_components(void);
    322 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    323 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    324 static int rf_reasonable_label(RF_ComponentLabel_t *);
    325 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    326 int rf_set_autoconfig(RF_Raid_t *, int);
    327 int rf_set_rootpartition(RF_Raid_t *, int);
    328 void rf_release_all_vps(RF_ConfigSet_t *);
    329 void rf_cleanup_config_set(RF_ConfigSet_t *);
    330 int rf_have_enough_components(RF_ConfigSet_t *);
    331 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    332 static int rf_sync_component_caches(RF_Raid_t *raidPtr);
    333 
    334 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    335 				  allow autoconfig to take place.
    336 				  Note that this is overridden by having
    337 				  RAID_AUTOCONFIG as an option in the
    338 				  kernel config file.  */
    339 
    340 struct RF_Pools_s rf_pools;
    341 
    342 void
    343 raidattach(int num)
    344 {
    345 	int raidID;
    346 	int i, rc;
    347 
    348 	aprint_debug("raidattach: Asked for %d units\n", num);
    349 
    350 	if (num <= 0) {
    351 #ifdef DIAGNOSTIC
    352 		panic("raidattach: count <= 0");
    353 #endif
    354 		return;
    355 	}
    356 	/* This is where all the initialization stuff gets done. */
    357 
    358 	numraid = num;
    359 
    360 	/* Make some space for requested number of units... */
    361 
    362 	RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
    363 	if (raidPtrs == NULL) {
    364 		panic("raidPtrs is NULL!!");
    365 	}
    366 
    367 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
    368 	rf_mutex_init(&rf_sparet_wait_mutex);
    369 
    370 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    371 #endif
    372 
    373 	for (i = 0; i < num; i++)
    374 		raidPtrs[i] = NULL;
    375 	rc = rf_BootRaidframe();
    376 	if (rc == 0)
    377 		aprint_normal("Kernelized RAIDframe activated\n");
    378 	else
    379 		panic("Serious error booting RAID!!");
    380 
    381 	/* put together some datastructures like the CCD device does.. This
    382 	 * lets us lock the device and what-not when it gets opened. */
    383 
    384 	raid_softc = (struct raid_softc *)
    385 		malloc(num * sizeof(struct raid_softc),
    386 		       M_RAIDFRAME, M_NOWAIT);
    387 	if (raid_softc == NULL) {
    388 		aprint_error("WARNING: no memory for RAIDframe driver\n");
    389 		return;
    390 	}
    391 
    392 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    393 
    394 	for (raidID = 0; raidID < num; raidID++) {
    395 		bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
    396 
    397 		RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
    398 			  (RF_Raid_t *));
    399 		if (raidPtrs[raidID] == NULL) {
    400 			aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID);
    401 			numraid = raidID;
    402 			return;
    403 		}
    404 	}
    405 
    406 	if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
    407 		aprint_error("raidattach: config_cfattach_attach failed?\n");
    408 	}
    409 
    410 #ifdef RAID_AUTOCONFIG
    411 	raidautoconfig = 1;
    412 #endif
    413 
    414 	/*
    415 	 * Register a finalizer which will be used to auto-config RAID
    416 	 * sets once all real hardware devices have been found.
    417 	 */
    418 	if (config_finalize_register(NULL, rf_autoconfig) != 0)
    419 		aprint_error("WARNING: unable to register RAIDframe finalizer\n");
    420 }
    421 
    422 int
    423 rf_autoconfig(device_t self)
    424 {
    425 	RF_AutoConfig_t *ac_list;
    426 	RF_ConfigSet_t *config_sets;
    427 
    428 	if (raidautoconfig == 0)
    429 		return (0);
    430 
    431 	/* XXX This code can only be run once. */
    432 	raidautoconfig = 0;
    433 
    434 	/* 1. locate all RAID components on the system */
    435 	aprint_debug("Searching for RAID components...\n");
    436 	ac_list = rf_find_raid_components();
    437 
    438 	/* 2. Sort them into their respective sets. */
    439 	config_sets = rf_create_auto_sets(ac_list);
    440 
    441 	/*
    442 	 * 3. Evaluate each set andconfigure the valid ones.
    443 	 * This gets done in rf_buildroothack().
    444 	 */
    445 	rf_buildroothack(config_sets);
    446 
    447 	return 1;
    448 }
    449 
    450 void
    451 rf_buildroothack(RF_ConfigSet_t *config_sets)
    452 {
    453 	RF_ConfigSet_t *cset;
    454 	RF_ConfigSet_t *next_cset;
    455 	int retcode;
    456 	int raidID;
    457 	int rootID;
    458 	int col;
    459 	int num_root;
    460 	char *devname;
    461 
    462 	rootID = 0;
    463 	num_root = 0;
    464 	cset = config_sets;
    465 	while(cset != NULL ) {
    466 		next_cset = cset->next;
    467 		if (rf_have_enough_components(cset) &&
    468 		    cset->ac->clabel->autoconfigure==1) {
    469 			retcode = rf_auto_config_set(cset,&raidID);
    470 			if (!retcode) {
    471 				aprint_debug("raid%d: configured ok\n", raidID);
    472 				if (cset->rootable) {
    473 					rootID = raidID;
    474 					num_root++;
    475 				}
    476 			} else {
    477 				/* The autoconfig didn't work :( */
    478 				aprint_debug("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    479 				rf_release_all_vps(cset);
    480 			}
    481 		} else {
    482 			/* we're not autoconfiguring this set...
    483 			   release the associated resources */
    484 			rf_release_all_vps(cset);
    485 		}
    486 		/* cleanup */
    487 		rf_cleanup_config_set(cset);
    488 		cset = next_cset;
    489 	}
    490 
    491 	/* if the user has specified what the root device should be
    492 	   then we don't touch booted_device or boothowto... */
    493 
    494 	if (rootspec != NULL)
    495 		return;
    496 
    497 	/* we found something bootable... */
    498 
    499 	if (num_root == 1) {
    500 		booted_device = raid_softc[rootID].sc_dev;
    501 	} else if (num_root > 1) {
    502 
    503 		/*
    504 		 * Maybe the MD code can help. If it cannot, then
    505 		 * setroot() will discover that we have no
    506 		 * booted_device and will ask the user if nothing was
    507 		 * hardwired in the kernel config file
    508 		 */
    509 
    510 		if (booted_device == NULL)
    511 			cpu_rootconf();
    512 		if (booted_device == NULL)
    513 			return;
    514 
    515 		num_root = 0;
    516 		for (raidID = 0; raidID < numraid; raidID++) {
    517 			if (raidPtrs[raidID]->valid == 0)
    518 				continue;
    519 
    520 			if (raidPtrs[raidID]->root_partition == 0)
    521 				continue;
    522 
    523 			for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
    524 				devname = raidPtrs[raidID]->Disks[col].devname;
    525 				devname += sizeof("/dev/") - 1;
    526 				if (strncmp(devname, device_xname(booted_device),
    527 					    strlen(device_xname(booted_device))) != 0)
    528 					continue;
    529 				aprint_debug("raid%d includes boot device %s\n",
    530 				       raidID, devname);
    531 				num_root++;
    532 				rootID = raidID;
    533 			}
    534 		}
    535 
    536 		if (num_root == 1) {
    537 			booted_device = raid_softc[rootID].sc_dev;
    538 		} else {
    539 			/* we can't guess.. require the user to answer... */
    540 			boothowto |= RB_ASKNAME;
    541 		}
    542 	}
    543 }
    544 
    545 
    546 int
    547 raidsize(dev_t dev)
    548 {
    549 	struct raid_softc *rs;
    550 	struct disklabel *lp;
    551 	int     part, unit, omask, size;
    552 
    553 	unit = raidunit(dev);
    554 	if (unit >= numraid)
    555 		return (-1);
    556 	rs = &raid_softc[unit];
    557 
    558 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    559 		return (-1);
    560 
    561 	part = DISKPART(dev);
    562 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    563 	lp = rs->sc_dkdev.dk_label;
    564 
    565 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
    566 		return (-1);
    567 
    568 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    569 		size = -1;
    570 	else
    571 		size = lp->d_partitions[part].p_size *
    572 		    (lp->d_secsize / DEV_BSIZE);
    573 
    574 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
    575 		return (-1);
    576 
    577 	return (size);
    578 
    579 }
    580 
    581 int
    582 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
    583 {
    584 	int     unit = raidunit(dev);
    585 	struct raid_softc *rs;
    586 	const struct bdevsw *bdev;
    587 	struct disklabel *lp;
    588 	RF_Raid_t *raidPtr;
    589 	daddr_t offset;
    590 	int     part, c, sparecol, j, scol, dumpto;
    591 	int     error = 0;
    592 
    593 	if (unit >= numraid)
    594 		return (ENXIO);
    595 
    596 	rs = &raid_softc[unit];
    597 	raidPtr = raidPtrs[unit];
    598 
    599 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    600 		return ENXIO;
    601 
    602 	/* we only support dumping to RAID 1 sets */
    603 	if (raidPtr->Layout.numDataCol != 1 ||
    604 	    raidPtr->Layout.numParityCol != 1)
    605 		return EINVAL;
    606 
    607 
    608 	if ((error = raidlock(rs)) != 0)
    609 		return error;
    610 
    611 	if (size % DEV_BSIZE != 0) {
    612 		error = EINVAL;
    613 		goto out;
    614 	}
    615 
    616 	if (blkno + size / DEV_BSIZE > rs->sc_size) {
    617 		printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
    618 		    "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
    619 		    size / DEV_BSIZE, rs->sc_size);
    620 		error = EINVAL;
    621 		goto out;
    622 	}
    623 
    624 	part = DISKPART(dev);
    625 	lp = rs->sc_dkdev.dk_label;
    626 	offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
    627 
    628 	/* figure out what device is alive.. */
    629 
    630 	/*
    631 	   Look for a component to dump to.  The preference for the
    632 	   component to dump to is as follows:
    633 	   1) the master
    634 	   2) a used_spare of the master
    635 	   3) the slave
    636 	   4) a used_spare of the slave
    637 	*/
    638 
    639 	dumpto = -1;
    640 	for (c = 0; c < raidPtr->numCol; c++) {
    641 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
    642 			/* this might be the one */
    643 			dumpto = c;
    644 			break;
    645 		}
    646 	}
    647 
    648 	/*
    649 	   At this point we have possibly selected a live master or a
    650 	   live slave.  We now check to see if there is a spared
    651 	   master (or a spared slave), if we didn't find a live master
    652 	   or a live slave.
    653 	*/
    654 
    655 	for (c = 0; c < raidPtr->numSpare; c++) {
    656 		sparecol = raidPtr->numCol + c;
    657 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
    658 			/* How about this one? */
    659 			scol = -1;
    660 			for(j=0;j<raidPtr->numCol;j++) {
    661 				if (raidPtr->Disks[j].spareCol == sparecol) {
    662 					scol = j;
    663 					break;
    664 				}
    665 			}
    666 			if (scol == 0) {
    667 				/*
    668 				   We must have found a spared master!
    669 				   We'll take that over anything else
    670 				   found so far.  (We couldn't have
    671 				   found a real master before, since
    672 				   this is a used spare, and it's
    673 				   saying that it's replacing the
    674 				   master.)  On reboot (with
    675 				   autoconfiguration turned on)
    676 				   sparecol will become the 1st
    677 				   component (component0) of this set.
    678 				*/
    679 				dumpto = sparecol;
    680 				break;
    681 			} else if (scol != -1) {
    682 				/*
    683 				   Must be a spared slave.  We'll dump
    684 				   to that if we havn't found anything
    685 				   else so far.
    686 				*/
    687 				if (dumpto == -1)
    688 					dumpto = sparecol;
    689 			}
    690 		}
    691 	}
    692 
    693 	if (dumpto == -1) {
    694 		/* we couldn't find any live components to dump to!?!?
    695 		 */
    696 		error = EINVAL;
    697 		goto out;
    698 	}
    699 
    700 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
    701 
    702 	/*
    703 	   Note that blkno is relative to this particular partition.
    704 	   By adding the offset of this partition in the RAID
    705 	   set, and also adding RF_PROTECTED_SECTORS, we get a
    706 	   value that is relative to the partition used for the
    707 	   underlying component.
    708 	*/
    709 
    710 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
    711 				blkno + offset, va, size);
    712 
    713 out:
    714 	raidunlock(rs);
    715 
    716 	return error;
    717 }
    718 /* ARGSUSED */
    719 int
    720 raidopen(dev_t dev, int flags, int fmt,
    721     struct lwp *l)
    722 {
    723 	int     unit = raidunit(dev);
    724 	struct raid_softc *rs;
    725 	struct disklabel *lp;
    726 	int     part, pmask;
    727 	int     error = 0;
    728 
    729 	if (unit >= numraid)
    730 		return (ENXIO);
    731 	rs = &raid_softc[unit];
    732 
    733 	if ((error = raidlock(rs)) != 0)
    734 		return (error);
    735 
    736 	if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
    737 		error = EBUSY;
    738 		goto bad;
    739 	}
    740 
    741 	lp = rs->sc_dkdev.dk_label;
    742 
    743 	part = DISKPART(dev);
    744 
    745 	/*
    746 	 * If there are wedges, and this is not RAW_PART, then we
    747 	 * need to fail.
    748 	 */
    749 	if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
    750 		error = EBUSY;
    751 		goto bad;
    752 	}
    753 	pmask = (1 << part);
    754 
    755 	if ((rs->sc_flags & RAIDF_INITED) &&
    756 	    (rs->sc_dkdev.dk_openmask == 0))
    757 		raidgetdisklabel(dev);
    758 
    759 	/* make sure that this partition exists */
    760 
    761 	if (part != RAW_PART) {
    762 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    763 		    ((part >= lp->d_npartitions) ||
    764 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    765 			error = ENXIO;
    766 			goto bad;
    767 		}
    768 	}
    769 	/* Prevent this unit from being unconfigured while open. */
    770 	switch (fmt) {
    771 	case S_IFCHR:
    772 		rs->sc_dkdev.dk_copenmask |= pmask;
    773 		break;
    774 
    775 	case S_IFBLK:
    776 		rs->sc_dkdev.dk_bopenmask |= pmask;
    777 		break;
    778 	}
    779 
    780 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    781 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    782 		/* First one... mark things as dirty... Note that we *MUST*
    783 		 have done a configure before this.  I DO NOT WANT TO BE
    784 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    785 		 THAT THEY BELONG TOGETHER!!!!! */
    786 		/* XXX should check to see if we're only open for reading
    787 		   here... If so, we needn't do this, but then need some
    788 		   other way of keeping track of what's happened.. */
    789 
    790 		rf_markalldirty( raidPtrs[unit] );
    791 	}
    792 
    793 
    794 	rs->sc_dkdev.dk_openmask =
    795 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    796 
    797 bad:
    798 	raidunlock(rs);
    799 
    800 	return (error);
    801 
    802 
    803 }
    804 /* ARGSUSED */
    805 int
    806 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
    807 {
    808 	int     unit = raidunit(dev);
    809 	struct raid_softc *rs;
    810 	int     error = 0;
    811 	int     part;
    812 
    813 	if (unit >= numraid)
    814 		return (ENXIO);
    815 	rs = &raid_softc[unit];
    816 
    817 	if ((error = raidlock(rs)) != 0)
    818 		return (error);
    819 
    820 	part = DISKPART(dev);
    821 
    822 	/* ...that much closer to allowing unconfiguration... */
    823 	switch (fmt) {
    824 	case S_IFCHR:
    825 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    826 		break;
    827 
    828 	case S_IFBLK:
    829 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    830 		break;
    831 	}
    832 	rs->sc_dkdev.dk_openmask =
    833 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    834 
    835 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    836 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    837 		/* Last one... device is not unconfigured yet.
    838 		   Device shutdown has taken care of setting the
    839 		   clean bits if RAIDF_INITED is not set
    840 		   mark things as clean... */
    841 
    842 		rf_update_component_labels(raidPtrs[unit],
    843 						 RF_FINAL_COMPONENT_UPDATE);
    844 
    845 		/* If the kernel is shutting down, it will detach
    846 		 * this RAID set soon enough.
    847 		 */
    848 	}
    849 
    850 	raidunlock(rs);
    851 	return (0);
    852 
    853 }
    854 
    855 void
    856 raidstrategy(struct buf *bp)
    857 {
    858 	int s;
    859 
    860 	unsigned int raidID = raidunit(bp->b_dev);
    861 	RF_Raid_t *raidPtr;
    862 	struct raid_softc *rs = &raid_softc[raidID];
    863 	int     wlabel;
    864 
    865 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    866 		bp->b_error = ENXIO;
    867 		goto done;
    868 	}
    869 	if (raidID >= numraid || !raidPtrs[raidID]) {
    870 		bp->b_error = ENODEV;
    871 		goto done;
    872 	}
    873 	raidPtr = raidPtrs[raidID];
    874 	if (!raidPtr->valid) {
    875 		bp->b_error = ENODEV;
    876 		goto done;
    877 	}
    878 	if (bp->b_bcount == 0) {
    879 		db1_printf(("b_bcount is zero..\n"));
    880 		goto done;
    881 	}
    882 
    883 	/*
    884 	 * Do bounds checking and adjust transfer.  If there's an
    885 	 * error, the bounds check will flag that for us.
    886 	 */
    887 
    888 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    889 	if (DISKPART(bp->b_dev) == RAW_PART) {
    890 		uint64_t size; /* device size in DEV_BSIZE unit */
    891 
    892 		if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
    893 			size = raidPtr->totalSectors <<
    894 			    (raidPtr->logBytesPerSector - DEV_BSHIFT);
    895 		} else {
    896 			size = raidPtr->totalSectors >>
    897 			    (DEV_BSHIFT - raidPtr->logBytesPerSector);
    898 		}
    899 		if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
    900 			goto done;
    901 		}
    902 	} else {
    903 		if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
    904 			db1_printf(("Bounds check failed!!:%d %d\n",
    905 				(int) bp->b_blkno, (int) wlabel));
    906 			goto done;
    907 		}
    908 	}
    909 	s = splbio();
    910 
    911 	bp->b_resid = 0;
    912 
    913 	/* stuff it onto our queue */
    914 	bufq_put(rs->buf_queue, bp);
    915 
    916 	/* scheduled the IO to happen at the next convenient time */
    917 	wakeup(&(raidPtrs[raidID]->iodone));
    918 
    919 	splx(s);
    920 	return;
    921 
    922 done:
    923 	bp->b_resid = bp->b_bcount;
    924 	biodone(bp);
    925 }
    926 /* ARGSUSED */
    927 int
    928 raidread(dev_t dev, struct uio *uio, int flags)
    929 {
    930 	int     unit = raidunit(dev);
    931 	struct raid_softc *rs;
    932 
    933 	if (unit >= numraid)
    934 		return (ENXIO);
    935 	rs = &raid_softc[unit];
    936 
    937 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    938 		return (ENXIO);
    939 
    940 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    941 
    942 }
    943 /* ARGSUSED */
    944 int
    945 raidwrite(dev_t dev, struct uio *uio, int flags)
    946 {
    947 	int     unit = raidunit(dev);
    948 	struct raid_softc *rs;
    949 
    950 	if (unit >= numraid)
    951 		return (ENXIO);
    952 	rs = &raid_softc[unit];
    953 
    954 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    955 		return (ENXIO);
    956 
    957 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    958 
    959 }
    960 
    961 static int
    962 raid_detach_unlocked(struct raid_softc *rs)
    963 {
    964 	int error;
    965 	RF_Raid_t *raidPtr;
    966 
    967 	raidPtr = raidPtrs[device_unit(rs->sc_dev)];
    968 
    969 	/*
    970 	 * If somebody has a partition mounted, we shouldn't
    971 	 * shutdown.
    972 	 */
    973 	if (rs->sc_dkdev.dk_openmask != 0)
    974 		return EBUSY;
    975 
    976 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    977 		;	/* not initialized: nothing to do */
    978 	else if ((error = rf_Shutdown(raidPtr)) != 0)
    979 		return error;
    980 	else
    981 		rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
    982 
    983 	/* Detach the disk. */
    984 	disk_detach(&rs->sc_dkdev);
    985 	disk_destroy(&rs->sc_dkdev);
    986 
    987 	return 0;
    988 }
    989 
    990 int
    991 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
    992 {
    993 	int     unit = raidunit(dev);
    994 	int     error = 0;
    995 	int     part, pmask;
    996 	cfdata_t cf;
    997 	struct raid_softc *rs;
    998 	RF_Config_t *k_cfg, *u_cfg;
    999 	RF_Raid_t *raidPtr;
   1000 	RF_RaidDisk_t *diskPtr;
   1001 	RF_AccTotals_t *totals;
   1002 	RF_DeviceConfig_t *d_cfg, **ucfgp;
   1003 	u_char *specific_buf;
   1004 	int retcode = 0;
   1005 	int column;
   1006 	int raidid;
   1007 	struct rf_recon_req *rrcopy, *rr;
   1008 	RF_ComponentLabel_t *clabel;
   1009 	RF_ComponentLabel_t *ci_label;
   1010 	RF_ComponentLabel_t **clabel_ptr;
   1011 	RF_SingleComponent_t *sparePtr,*componentPtr;
   1012 	RF_SingleComponent_t component;
   1013 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
   1014 	int i, j, d;
   1015 #ifdef __HAVE_OLD_DISKLABEL
   1016 	struct disklabel newlabel;
   1017 #endif
   1018 	struct dkwedge_info *dkw;
   1019 
   1020 	if (unit >= numraid)
   1021 		return (ENXIO);
   1022 	rs = &raid_softc[unit];
   1023 	raidPtr = raidPtrs[unit];
   1024 
   1025 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
   1026 		(int) DISKPART(dev), (int) unit, (int) cmd));
   1027 
   1028 	/* Must be open for writes for these commands... */
   1029 	switch (cmd) {
   1030 #ifdef DIOCGSECTORSIZE
   1031 	case DIOCGSECTORSIZE:
   1032 		*(u_int *)data = raidPtr->bytesPerSector;
   1033 		return 0;
   1034 	case DIOCGMEDIASIZE:
   1035 		*(off_t *)data =
   1036 		    (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
   1037 		return 0;
   1038 #endif
   1039 	case DIOCSDINFO:
   1040 	case DIOCWDINFO:
   1041 #ifdef __HAVE_OLD_DISKLABEL
   1042 	case ODIOCWDINFO:
   1043 	case ODIOCSDINFO:
   1044 #endif
   1045 	case DIOCWLABEL:
   1046 	case DIOCAWEDGE:
   1047 	case DIOCDWEDGE:
   1048 		if ((flag & FWRITE) == 0)
   1049 			return (EBADF);
   1050 	}
   1051 
   1052 	/* Must be initialized for these... */
   1053 	switch (cmd) {
   1054 	case DIOCGDINFO:
   1055 	case DIOCSDINFO:
   1056 	case DIOCWDINFO:
   1057 #ifdef __HAVE_OLD_DISKLABEL
   1058 	case ODIOCGDINFO:
   1059 	case ODIOCWDINFO:
   1060 	case ODIOCSDINFO:
   1061 	case ODIOCGDEFLABEL:
   1062 #endif
   1063 	case DIOCGPART:
   1064 	case DIOCWLABEL:
   1065 	case DIOCGDEFLABEL:
   1066 	case DIOCAWEDGE:
   1067 	case DIOCDWEDGE:
   1068 	case DIOCLWEDGES:
   1069 	case DIOCCACHESYNC:
   1070 	case RAIDFRAME_SHUTDOWN:
   1071 	case RAIDFRAME_REWRITEPARITY:
   1072 	case RAIDFRAME_GET_INFO:
   1073 	case RAIDFRAME_RESET_ACCTOTALS:
   1074 	case RAIDFRAME_GET_ACCTOTALS:
   1075 	case RAIDFRAME_KEEP_ACCTOTALS:
   1076 	case RAIDFRAME_GET_SIZE:
   1077 	case RAIDFRAME_FAIL_DISK:
   1078 	case RAIDFRAME_COPYBACK:
   1079 	case RAIDFRAME_CHECK_RECON_STATUS:
   1080 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1081 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1082 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1083 	case RAIDFRAME_ADD_HOT_SPARE:
   1084 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1085 	case RAIDFRAME_INIT_LABELS:
   1086 	case RAIDFRAME_REBUILD_IN_PLACE:
   1087 	case RAIDFRAME_CHECK_PARITY:
   1088 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1089 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1090 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1091 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1092 	case RAIDFRAME_SET_AUTOCONFIG:
   1093 	case RAIDFRAME_SET_ROOT:
   1094 	case RAIDFRAME_DELETE_COMPONENT:
   1095 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1096 		if ((rs->sc_flags & RAIDF_INITED) == 0)
   1097 			return (ENXIO);
   1098 	}
   1099 
   1100 	switch (cmd) {
   1101 #ifdef COMPAT_50
   1102 	case RAIDFRAME_GET_INFO50:
   1103 		return rf_get_info50(raidPtr, data);
   1104 
   1105 	case RAIDFRAME_CONFIGURE50:
   1106 		if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
   1107 			return retcode;
   1108 		goto config;
   1109 #endif
   1110 		/* configure the system */
   1111 	case RAIDFRAME_CONFIGURE:
   1112 
   1113 		if (raidPtr->valid) {
   1114 			/* There is a valid RAID set running on this unit! */
   1115 			printf("raid%d: Device already configured!\n",unit);
   1116 			return(EINVAL);
   1117 		}
   1118 
   1119 		/* copy-in the configuration information */
   1120 		/* data points to a pointer to the configuration structure */
   1121 
   1122 		u_cfg = *((RF_Config_t **) data);
   1123 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
   1124 		if (k_cfg == NULL) {
   1125 			return (ENOMEM);
   1126 		}
   1127 		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
   1128 		if (retcode) {
   1129 			RF_Free(k_cfg, sizeof(RF_Config_t));
   1130 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
   1131 				retcode));
   1132 			return (retcode);
   1133 		}
   1134 		goto config;
   1135 	config:
   1136 		/* allocate a buffer for the layout-specific data, and copy it
   1137 		 * in */
   1138 		if (k_cfg->layoutSpecificSize) {
   1139 			if (k_cfg->layoutSpecificSize > 10000) {
   1140 				/* sanity check */
   1141 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1142 				return (EINVAL);
   1143 			}
   1144 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
   1145 			    (u_char *));
   1146 			if (specific_buf == NULL) {
   1147 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1148 				return (ENOMEM);
   1149 			}
   1150 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
   1151 			    k_cfg->layoutSpecificSize);
   1152 			if (retcode) {
   1153 				RF_Free(k_cfg, sizeof(RF_Config_t));
   1154 				RF_Free(specific_buf,
   1155 					k_cfg->layoutSpecificSize);
   1156 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
   1157 					retcode));
   1158 				return (retcode);
   1159 			}
   1160 		} else
   1161 			specific_buf = NULL;
   1162 		k_cfg->layoutSpecific = specific_buf;
   1163 
   1164 		/* should do some kind of sanity check on the configuration.
   1165 		 * Store the sum of all the bytes in the last byte? */
   1166 
   1167 		/* configure the system */
   1168 
   1169 		/*
   1170 		 * Clear the entire RAID descriptor, just to make sure
   1171 		 *  there is no stale data left in the case of a
   1172 		 *  reconfiguration
   1173 		 */
   1174 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
   1175 		raidPtr->raidid = unit;
   1176 
   1177 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
   1178 
   1179 		if (retcode == 0) {
   1180 
   1181 			/* allow this many simultaneous IO's to
   1182 			   this RAID device */
   1183 			raidPtr->openings = RAIDOUTSTANDING;
   1184 
   1185 			raidinit(raidPtr);
   1186 			rf_markalldirty(raidPtr);
   1187 		}
   1188 		/* free the buffers.  No return code here. */
   1189 		if (k_cfg->layoutSpecificSize) {
   1190 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
   1191 		}
   1192 		RF_Free(k_cfg, sizeof(RF_Config_t));
   1193 
   1194 		return (retcode);
   1195 
   1196 		/* shutdown the system */
   1197 	case RAIDFRAME_SHUTDOWN:
   1198 
   1199 		part = DISKPART(dev);
   1200 		pmask = (1 << part);
   1201 
   1202 		if ((error = raidlock(rs)) != 0)
   1203 			return (error);
   1204 
   1205 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
   1206 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
   1207 			(rs->sc_dkdev.dk_copenmask & pmask)))
   1208 			retcode = EBUSY;
   1209 		else {
   1210 			rs->sc_flags |= RAIDF_SHUTDOWN;
   1211 			rs->sc_dkdev.dk_copenmask &= ~pmask;
   1212 			rs->sc_dkdev.dk_bopenmask &= ~pmask;
   1213 			rs->sc_dkdev.dk_openmask &= ~pmask;
   1214 			retcode = 0;
   1215 		}
   1216 
   1217 		raidunlock(rs);
   1218 
   1219 		if (retcode != 0)
   1220 			return retcode;
   1221 
   1222 		/* free the pseudo device attach bits */
   1223 
   1224 		cf = device_cfdata(rs->sc_dev);
   1225 		if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
   1226 			free(cf, M_RAIDFRAME);
   1227 
   1228 		return (retcode);
   1229 	case RAIDFRAME_GET_COMPONENT_LABEL:
   1230 		clabel_ptr = (RF_ComponentLabel_t **) data;
   1231 		/* need to read the component label for the disk indicated
   1232 		   by row,column in clabel */
   1233 
   1234 		/* For practice, let's get it directly fromdisk, rather
   1235 		   than from the in-core copy */
   1236 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
   1237 			   (RF_ComponentLabel_t *));
   1238 		if (clabel == NULL)
   1239 			return (ENOMEM);
   1240 
   1241 		retcode = copyin( *clabel_ptr, clabel,
   1242 				  sizeof(RF_ComponentLabel_t));
   1243 
   1244 		if (retcode) {
   1245 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1246 			return(retcode);
   1247 		}
   1248 
   1249 		clabel->row = 0; /* Don't allow looking at anything else.*/
   1250 
   1251 		column = clabel->column;
   1252 
   1253 		if ((column < 0) || (column >= raidPtr->numCol +
   1254 				     raidPtr->numSpare)) {
   1255 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1256 			return(EINVAL);
   1257 		}
   1258 
   1259 		retcode = raidread_component_label(raidPtr->Disks[column].dev,
   1260 				raidPtr->raid_cinfo[column].ci_vp,
   1261 				clabel );
   1262 
   1263 		if (retcode == 0) {
   1264 			retcode = copyout(clabel, *clabel_ptr,
   1265 					  sizeof(RF_ComponentLabel_t));
   1266 		}
   1267 		RF_Free(clabel, sizeof(RF_ComponentLabel_t));
   1268 		return (retcode);
   1269 
   1270 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1271 		clabel = (RF_ComponentLabel_t *) data;
   1272 
   1273 		/* XXX check the label for valid stuff... */
   1274 		/* Note that some things *should not* get modified --
   1275 		   the user should be re-initing the labels instead of
   1276 		   trying to patch things.
   1277 		   */
   1278 
   1279 		raidid = raidPtr->raidid;
   1280 #ifdef DEBUG
   1281 		printf("raid%d: Got component label:\n", raidid);
   1282 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1283 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1284 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1285 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1286 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1287 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1288 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1289 #endif
   1290 		clabel->row = 0;
   1291 		column = clabel->column;
   1292 
   1293 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1294 			return(EINVAL);
   1295 		}
   1296 
   1297 		/* XXX this isn't allowed to do anything for now :-) */
   1298 
   1299 		/* XXX and before it is, we need to fill in the rest
   1300 		   of the fields!?!?!?! */
   1301 #if 0
   1302 		raidwrite_component_label(
   1303 		     raidPtr->Disks[column].dev,
   1304 			    raidPtr->raid_cinfo[column].ci_vp,
   1305 			    clabel );
   1306 #endif
   1307 		return (0);
   1308 
   1309 	case RAIDFRAME_INIT_LABELS:
   1310 		clabel = (RF_ComponentLabel_t *) data;
   1311 		/*
   1312 		   we only want the serial number from
   1313 		   the above.  We get all the rest of the information
   1314 		   from the config that was used to create this RAID
   1315 		   set.
   1316 		   */
   1317 
   1318 		raidPtr->serial_number = clabel->serial_number;
   1319 
   1320 		RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t),
   1321 			  (RF_ComponentLabel_t *));
   1322 		if (ci_label == NULL)
   1323 			return (ENOMEM);
   1324 
   1325 		raid_init_component_label(raidPtr, ci_label);
   1326 		ci_label->serial_number = clabel->serial_number;
   1327 		ci_label->row = 0; /* we dont' pretend to support more */
   1328 
   1329 		for(column=0;column<raidPtr->numCol;column++) {
   1330 			diskPtr = &raidPtr->Disks[column];
   1331 			if (!RF_DEAD_DISK(diskPtr->status)) {
   1332 				ci_label->partitionSize = diskPtr->partitionSize;
   1333 				ci_label->column = column;
   1334 				raidwrite_component_label(
   1335 							  raidPtr->Disks[column].dev,
   1336 							  raidPtr->raid_cinfo[column].ci_vp,
   1337 							  ci_label );
   1338 			}
   1339 		}
   1340 		RF_Free(ci_label, sizeof(RF_ComponentLabel_t));
   1341 
   1342 		return (retcode);
   1343 	case RAIDFRAME_SET_AUTOCONFIG:
   1344 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1345 		printf("raid%d: New autoconfig value is: %d\n",
   1346 		       raidPtr->raidid, d);
   1347 		*(int *) data = d;
   1348 		return (retcode);
   1349 
   1350 	case RAIDFRAME_SET_ROOT:
   1351 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1352 		printf("raid%d: New rootpartition value is: %d\n",
   1353 		       raidPtr->raidid, d);
   1354 		*(int *) data = d;
   1355 		return (retcode);
   1356 
   1357 		/* initialize all parity */
   1358 	case RAIDFRAME_REWRITEPARITY:
   1359 
   1360 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1361 			/* Parity for RAID 0 is trivially correct */
   1362 			raidPtr->parity_good = RF_RAID_CLEAN;
   1363 			return(0);
   1364 		}
   1365 
   1366 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1367 			/* Re-write is already in progress! */
   1368 			return(EINVAL);
   1369 		}
   1370 
   1371 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1372 					   rf_RewriteParityThread,
   1373 					   raidPtr,"raid_parity");
   1374 		return (retcode);
   1375 
   1376 
   1377 	case RAIDFRAME_ADD_HOT_SPARE:
   1378 		sparePtr = (RF_SingleComponent_t *) data;
   1379 		memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
   1380 		retcode = rf_add_hot_spare(raidPtr, &component);
   1381 		return(retcode);
   1382 
   1383 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1384 		return(retcode);
   1385 
   1386 	case RAIDFRAME_DELETE_COMPONENT:
   1387 		componentPtr = (RF_SingleComponent_t *)data;
   1388 		memcpy( &component, componentPtr,
   1389 			sizeof(RF_SingleComponent_t));
   1390 		retcode = rf_delete_component(raidPtr, &component);
   1391 		return(retcode);
   1392 
   1393 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1394 		componentPtr = (RF_SingleComponent_t *)data;
   1395 		memcpy( &component, componentPtr,
   1396 			sizeof(RF_SingleComponent_t));
   1397 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1398 		return(retcode);
   1399 
   1400 	case RAIDFRAME_REBUILD_IN_PLACE:
   1401 
   1402 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1403 			/* Can't do this on a RAID 0!! */
   1404 			return(EINVAL);
   1405 		}
   1406 
   1407 		if (raidPtr->recon_in_progress == 1) {
   1408 			/* a reconstruct is already in progress! */
   1409 			return(EINVAL);
   1410 		}
   1411 
   1412 		componentPtr = (RF_SingleComponent_t *) data;
   1413 		memcpy( &component, componentPtr,
   1414 			sizeof(RF_SingleComponent_t));
   1415 		component.row = 0; /* we don't support any more */
   1416 		column = component.column;
   1417 
   1418 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1419 			return(EINVAL);
   1420 		}
   1421 
   1422 		RF_LOCK_MUTEX(raidPtr->mutex);
   1423 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
   1424 		    (raidPtr->numFailures > 0)) {
   1425 			/* XXX 0 above shouldn't be constant!!! */
   1426 			/* some component other than this has failed.
   1427 			   Let's not make things worse than they already
   1428 			   are... */
   1429 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1430 			       raidPtr->raidid);
   1431 			printf("raid%d:     Col: %d   Too many failures.\n",
   1432 			       raidPtr->raidid, column);
   1433 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1434 			return (EINVAL);
   1435 		}
   1436 		if (raidPtr->Disks[column].status ==
   1437 		    rf_ds_reconstructing) {
   1438 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1439 			       raidPtr->raidid);
   1440 			printf("raid%d:    Col: %d   Reconstruction already occuring!\n", raidPtr->raidid, column);
   1441 
   1442 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1443 			return (EINVAL);
   1444 		}
   1445 		if (raidPtr->Disks[column].status == rf_ds_spared) {
   1446 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1447 			return (EINVAL);
   1448 		}
   1449 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1450 
   1451 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1452 		if (rrcopy == NULL)
   1453 			return(ENOMEM);
   1454 
   1455 		rrcopy->raidPtr = (void *) raidPtr;
   1456 		rrcopy->col = column;
   1457 
   1458 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1459 					   rf_ReconstructInPlaceThread,
   1460 					   rrcopy,"raid_reconip");
   1461 		return(retcode);
   1462 
   1463 	case RAIDFRAME_GET_INFO:
   1464 		if (!raidPtr->valid)
   1465 			return (ENODEV);
   1466 		ucfgp = (RF_DeviceConfig_t **) data;
   1467 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1468 			  (RF_DeviceConfig_t *));
   1469 		if (d_cfg == NULL)
   1470 			return (ENOMEM);
   1471 		d_cfg->rows = 1; /* there is only 1 row now */
   1472 		d_cfg->cols = raidPtr->numCol;
   1473 		d_cfg->ndevs = raidPtr->numCol;
   1474 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1475 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1476 			return (ENOMEM);
   1477 		}
   1478 		d_cfg->nspares = raidPtr->numSpare;
   1479 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1480 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1481 			return (ENOMEM);
   1482 		}
   1483 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1484 		d = 0;
   1485 		for (j = 0; j < d_cfg->cols; j++) {
   1486 			d_cfg->devs[d] = raidPtr->Disks[j];
   1487 			d++;
   1488 		}
   1489 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1490 			d_cfg->spares[i] = raidPtr->Disks[j];
   1491 		}
   1492 		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
   1493 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1494 
   1495 		return (retcode);
   1496 
   1497 	case RAIDFRAME_CHECK_PARITY:
   1498 		*(int *) data = raidPtr->parity_good;
   1499 		return (0);
   1500 
   1501 	case RAIDFRAME_RESET_ACCTOTALS:
   1502 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1503 		return (0);
   1504 
   1505 	case RAIDFRAME_GET_ACCTOTALS:
   1506 		totals = (RF_AccTotals_t *) data;
   1507 		*totals = raidPtr->acc_totals;
   1508 		return (0);
   1509 
   1510 	case RAIDFRAME_KEEP_ACCTOTALS:
   1511 		raidPtr->keep_acc_totals = *(int *)data;
   1512 		return (0);
   1513 
   1514 	case RAIDFRAME_GET_SIZE:
   1515 		*(int *) data = raidPtr->totalSectors;
   1516 		return (0);
   1517 
   1518 		/* fail a disk & optionally start reconstruction */
   1519 	case RAIDFRAME_FAIL_DISK:
   1520 
   1521 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1522 			/* Can't do this on a RAID 0!! */
   1523 			return(EINVAL);
   1524 		}
   1525 
   1526 		rr = (struct rf_recon_req *) data;
   1527 		rr->row = 0;
   1528 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
   1529 			return (EINVAL);
   1530 
   1531 
   1532 		RF_LOCK_MUTEX(raidPtr->mutex);
   1533 		if (raidPtr->status == rf_rs_reconstructing) {
   1534 			/* you can't fail a disk while we're reconstructing! */
   1535 			/* XXX wrong for RAID6 */
   1536 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1537 			return (EINVAL);
   1538 		}
   1539 		if ((raidPtr->Disks[rr->col].status ==
   1540 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
   1541 			/* some other component has failed.  Let's not make
   1542 			   things worse. XXX wrong for RAID6 */
   1543 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1544 			return (EINVAL);
   1545 		}
   1546 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
   1547 			/* Can't fail a spared disk! */
   1548 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1549 			return (EINVAL);
   1550 		}
   1551 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1552 
   1553 		/* make a copy of the recon request so that we don't rely on
   1554 		 * the user's buffer */
   1555 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1556 		if (rrcopy == NULL)
   1557 			return(ENOMEM);
   1558 		memcpy(rrcopy, rr, sizeof(*rr));
   1559 		rrcopy->raidPtr = (void *) raidPtr;
   1560 
   1561 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1562 					   rf_ReconThread,
   1563 					   rrcopy,"raid_recon");
   1564 		return (0);
   1565 
   1566 		/* invoke a copyback operation after recon on whatever disk
   1567 		 * needs it, if any */
   1568 	case RAIDFRAME_COPYBACK:
   1569 
   1570 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1571 			/* This makes no sense on a RAID 0!! */
   1572 			return(EINVAL);
   1573 		}
   1574 
   1575 		if (raidPtr->copyback_in_progress == 1) {
   1576 			/* Copyback is already in progress! */
   1577 			return(EINVAL);
   1578 		}
   1579 
   1580 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1581 					   rf_CopybackThread,
   1582 					   raidPtr,"raid_copyback");
   1583 		return (retcode);
   1584 
   1585 		/* return the percentage completion of reconstruction */
   1586 	case RAIDFRAME_CHECK_RECON_STATUS:
   1587 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1588 			/* This makes no sense on a RAID 0, so tell the
   1589 			   user it's done. */
   1590 			*(int *) data = 100;
   1591 			return(0);
   1592 		}
   1593 		if (raidPtr->status != rf_rs_reconstructing)
   1594 			*(int *) data = 100;
   1595 		else {
   1596 			if (raidPtr->reconControl->numRUsTotal > 0) {
   1597 				*(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
   1598 			} else {
   1599 				*(int *) data = 0;
   1600 			}
   1601 		}
   1602 		return (0);
   1603 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1604 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1605 		if (raidPtr->status != rf_rs_reconstructing) {
   1606 			progressInfo.remaining = 0;
   1607 			progressInfo.completed = 100;
   1608 			progressInfo.total = 100;
   1609 		} else {
   1610 			progressInfo.total =
   1611 				raidPtr->reconControl->numRUsTotal;
   1612 			progressInfo.completed =
   1613 				raidPtr->reconControl->numRUsComplete;
   1614 			progressInfo.remaining = progressInfo.total -
   1615 				progressInfo.completed;
   1616 		}
   1617 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1618 				  sizeof(RF_ProgressInfo_t));
   1619 		return (retcode);
   1620 
   1621 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1622 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1623 			/* This makes no sense on a RAID 0, so tell the
   1624 			   user it's done. */
   1625 			*(int *) data = 100;
   1626 			return(0);
   1627 		}
   1628 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1629 			*(int *) data = 100 *
   1630 				raidPtr->parity_rewrite_stripes_done /
   1631 				raidPtr->Layout.numStripe;
   1632 		} else {
   1633 			*(int *) data = 100;
   1634 		}
   1635 		return (0);
   1636 
   1637 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1638 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1639 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1640 			progressInfo.total = raidPtr->Layout.numStripe;
   1641 			progressInfo.completed =
   1642 				raidPtr->parity_rewrite_stripes_done;
   1643 			progressInfo.remaining = progressInfo.total -
   1644 				progressInfo.completed;
   1645 		} else {
   1646 			progressInfo.remaining = 0;
   1647 			progressInfo.completed = 100;
   1648 			progressInfo.total = 100;
   1649 		}
   1650 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1651 				  sizeof(RF_ProgressInfo_t));
   1652 		return (retcode);
   1653 
   1654 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1655 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1656 			/* This makes no sense on a RAID 0 */
   1657 			*(int *) data = 100;
   1658 			return(0);
   1659 		}
   1660 		if (raidPtr->copyback_in_progress == 1) {
   1661 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1662 				raidPtr->Layout.numStripe;
   1663 		} else {
   1664 			*(int *) data = 100;
   1665 		}
   1666 		return (0);
   1667 
   1668 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1669 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1670 		if (raidPtr->copyback_in_progress == 1) {
   1671 			progressInfo.total = raidPtr->Layout.numStripe;
   1672 			progressInfo.completed =
   1673 				raidPtr->copyback_stripes_done;
   1674 			progressInfo.remaining = progressInfo.total -
   1675 				progressInfo.completed;
   1676 		} else {
   1677 			progressInfo.remaining = 0;
   1678 			progressInfo.completed = 100;
   1679 			progressInfo.total = 100;
   1680 		}
   1681 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1682 				  sizeof(RF_ProgressInfo_t));
   1683 		return (retcode);
   1684 
   1685 		/* the sparetable daemon calls this to wait for the kernel to
   1686 		 * need a spare table. this ioctl does not return until a
   1687 		 * spare table is needed. XXX -- calling mpsleep here in the
   1688 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1689 		 * -- I should either compute the spare table in the kernel,
   1690 		 * or have a different -- XXX XXX -- interface (a different
   1691 		 * character device) for delivering the table     -- XXX */
   1692 #if 0
   1693 	case RAIDFRAME_SPARET_WAIT:
   1694 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1695 		while (!rf_sparet_wait_queue)
   1696 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1697 		waitreq = rf_sparet_wait_queue;
   1698 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1699 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1700 
   1701 		/* structure assignment */
   1702 		*((RF_SparetWait_t *) data) = *waitreq;
   1703 
   1704 		RF_Free(waitreq, sizeof(*waitreq));
   1705 		return (0);
   1706 
   1707 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1708 		 * code in it that will cause the dameon to exit */
   1709 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1710 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1711 		waitreq->fcol = -1;
   1712 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1713 		waitreq->next = rf_sparet_wait_queue;
   1714 		rf_sparet_wait_queue = waitreq;
   1715 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1716 		wakeup(&rf_sparet_wait_queue);
   1717 		return (0);
   1718 
   1719 		/* used by the spare table daemon to deliver a spare table
   1720 		 * into the kernel */
   1721 	case RAIDFRAME_SEND_SPARET:
   1722 
   1723 		/* install the spare table */
   1724 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1725 
   1726 		/* respond to the requestor.  the return status of the spare
   1727 		 * table installation is passed in the "fcol" field */
   1728 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1729 		waitreq->fcol = retcode;
   1730 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1731 		waitreq->next = rf_sparet_resp_queue;
   1732 		rf_sparet_resp_queue = waitreq;
   1733 		wakeup(&rf_sparet_resp_queue);
   1734 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1735 
   1736 		return (retcode);
   1737 #endif
   1738 
   1739 	default:
   1740 		break; /* fall through to the os-specific code below */
   1741 
   1742 	}
   1743 
   1744 	if (!raidPtr->valid)
   1745 		return (EINVAL);
   1746 
   1747 	/*
   1748 	 * Add support for "regular" device ioctls here.
   1749 	 */
   1750 
   1751 	error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
   1752 	if (error != EPASSTHROUGH)
   1753 		return (error);
   1754 
   1755 	switch (cmd) {
   1756 	case DIOCGDINFO:
   1757 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1758 		break;
   1759 #ifdef __HAVE_OLD_DISKLABEL
   1760 	case ODIOCGDINFO:
   1761 		newlabel = *(rs->sc_dkdev.dk_label);
   1762 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1763 			return ENOTTY;
   1764 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1765 		break;
   1766 #endif
   1767 
   1768 	case DIOCGPART:
   1769 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1770 		((struct partinfo *) data)->part =
   1771 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1772 		break;
   1773 
   1774 	case DIOCWDINFO:
   1775 	case DIOCSDINFO:
   1776 #ifdef __HAVE_OLD_DISKLABEL
   1777 	case ODIOCWDINFO:
   1778 	case ODIOCSDINFO:
   1779 #endif
   1780 	{
   1781 		struct disklabel *lp;
   1782 #ifdef __HAVE_OLD_DISKLABEL
   1783 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1784 			memset(&newlabel, 0, sizeof newlabel);
   1785 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1786 			lp = &newlabel;
   1787 		} else
   1788 #endif
   1789 		lp = (struct disklabel *)data;
   1790 
   1791 		if ((error = raidlock(rs)) != 0)
   1792 			return (error);
   1793 
   1794 		rs->sc_flags |= RAIDF_LABELLING;
   1795 
   1796 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1797 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1798 		if (error == 0) {
   1799 			if (cmd == DIOCWDINFO
   1800 #ifdef __HAVE_OLD_DISKLABEL
   1801 			    || cmd == ODIOCWDINFO
   1802 #endif
   1803 			   )
   1804 				error = writedisklabel(RAIDLABELDEV(dev),
   1805 				    raidstrategy, rs->sc_dkdev.dk_label,
   1806 				    rs->sc_dkdev.dk_cpulabel);
   1807 		}
   1808 		rs->sc_flags &= ~RAIDF_LABELLING;
   1809 
   1810 		raidunlock(rs);
   1811 
   1812 		if (error)
   1813 			return (error);
   1814 		break;
   1815 	}
   1816 
   1817 	case DIOCWLABEL:
   1818 		if (*(int *) data != 0)
   1819 			rs->sc_flags |= RAIDF_WLABEL;
   1820 		else
   1821 			rs->sc_flags &= ~RAIDF_WLABEL;
   1822 		break;
   1823 
   1824 	case DIOCGDEFLABEL:
   1825 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1826 		break;
   1827 
   1828 #ifdef __HAVE_OLD_DISKLABEL
   1829 	case ODIOCGDEFLABEL:
   1830 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1831 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1832 			return ENOTTY;
   1833 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1834 		break;
   1835 #endif
   1836 
   1837 	case DIOCAWEDGE:
   1838 	case DIOCDWEDGE:
   1839 	    	dkw = (void *)data;
   1840 
   1841 		/* If the ioctl happens here, the parent is us. */
   1842 		(void)strcpy(dkw->dkw_parent, rs->sc_xname);
   1843 		return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
   1844 
   1845 	case DIOCLWEDGES:
   1846 		return dkwedge_list(&rs->sc_dkdev,
   1847 		    (struct dkwedge_list *)data, l);
   1848 	case DIOCCACHESYNC:
   1849 		return rf_sync_component_caches(raidPtr);
   1850 	default:
   1851 		retcode = ENOTTY;
   1852 	}
   1853 	return (retcode);
   1854 
   1855 }
   1856 
   1857 
   1858 /* raidinit -- complete the rest of the initialization for the
   1859    RAIDframe device.  */
   1860 
   1861 
   1862 static void
   1863 raidinit(RF_Raid_t *raidPtr)
   1864 {
   1865 	cfdata_t cf;
   1866 	struct raid_softc *rs;
   1867 	int     unit;
   1868 
   1869 	unit = raidPtr->raidid;
   1870 
   1871 	rs = &raid_softc[unit];
   1872 
   1873 	/* XXX should check return code first... */
   1874 	rs->sc_flags |= RAIDF_INITED;
   1875 
   1876 	/* XXX doesn't check bounds. */
   1877 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
   1878 
   1879 	/* attach the pseudo device */
   1880 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
   1881 	cf->cf_name = raid_cd.cd_name;
   1882 	cf->cf_atname = raid_cd.cd_name;
   1883 	cf->cf_unit = unit;
   1884 	cf->cf_fstate = FSTATE_STAR;
   1885 
   1886 	rs->sc_dev = config_attach_pseudo(cf);
   1887 
   1888 	if (rs->sc_dev==NULL) {
   1889 		printf("raid%d: config_attach_pseudo failed\n",
   1890 		       raidPtr->raidid);
   1891 		rs->sc_flags &= ~RAIDF_INITED;
   1892 		free(cf, M_RAIDFRAME);
   1893 		return;
   1894 	}
   1895 
   1896 	/* disk_attach actually creates space for the CPU disklabel, among
   1897 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1898 	 * with disklabels. */
   1899 
   1900 	disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
   1901 	disk_attach(&rs->sc_dkdev);
   1902 
   1903 	/* XXX There may be a weird interaction here between this, and
   1904 	 * protectedSectors, as used in RAIDframe.  */
   1905 
   1906 	rs->sc_size = raidPtr->totalSectors;
   1907 
   1908 	dkwedge_discover(&rs->sc_dkdev);
   1909 
   1910 	rf_set_properties(rs, raidPtr);
   1911 
   1912 }
   1913 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   1914 /* wake up the daemon & tell it to get us a spare table
   1915  * XXX
   1916  * the entries in the queues should be tagged with the raidPtr
   1917  * so that in the extremely rare case that two recons happen at once,
   1918  * we know for which device were requesting a spare table
   1919  * XXX
   1920  *
   1921  * XXX This code is not currently used. GO
   1922  */
   1923 int
   1924 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
   1925 {
   1926 	int     retcode;
   1927 
   1928 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1929 	req->next = rf_sparet_wait_queue;
   1930 	rf_sparet_wait_queue = req;
   1931 	wakeup(&rf_sparet_wait_queue);
   1932 
   1933 	/* mpsleep unlocks the mutex */
   1934 	while (!rf_sparet_resp_queue) {
   1935 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1936 		    "raidframe getsparetable", 0);
   1937 	}
   1938 	req = rf_sparet_resp_queue;
   1939 	rf_sparet_resp_queue = req->next;
   1940 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1941 
   1942 	retcode = req->fcol;
   1943 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1944 					 * alloc'd */
   1945 	return (retcode);
   1946 }
   1947 #endif
   1948 
   1949 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1950  * bp & passes it down.
   1951  * any calls originating in the kernel must use non-blocking I/O
   1952  * do some extra sanity checking to return "appropriate" error values for
   1953  * certain conditions (to make some standard utilities work)
   1954  *
   1955  * Formerly known as: rf_DoAccessKernel
   1956  */
   1957 void
   1958 raidstart(RF_Raid_t *raidPtr)
   1959 {
   1960 	RF_SectorCount_t num_blocks, pb, sum;
   1961 	RF_RaidAddr_t raid_addr;
   1962 	struct partition *pp;
   1963 	daddr_t blocknum;
   1964 	int     unit;
   1965 	struct raid_softc *rs;
   1966 	int     do_async;
   1967 	struct buf *bp;
   1968 	int rc;
   1969 
   1970 	unit = raidPtr->raidid;
   1971 	rs = &raid_softc[unit];
   1972 
   1973 	/* quick check to see if anything has died recently */
   1974 	RF_LOCK_MUTEX(raidPtr->mutex);
   1975 	if (raidPtr->numNewFailures > 0) {
   1976 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1977 		rf_update_component_labels(raidPtr,
   1978 					   RF_NORMAL_COMPONENT_UPDATE);
   1979 		RF_LOCK_MUTEX(raidPtr->mutex);
   1980 		raidPtr->numNewFailures--;
   1981 	}
   1982 
   1983 	/* Check to see if we're at the limit... */
   1984 	while (raidPtr->openings > 0) {
   1985 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1986 
   1987 		/* get the next item, if any, from the queue */
   1988 		if ((bp = bufq_get(rs->buf_queue)) == NULL) {
   1989 			/* nothing more to do */
   1990 			return;
   1991 		}
   1992 
   1993 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1994 		 * partition.. Need to make it absolute to the underlying
   1995 		 * device.. */
   1996 
   1997 		blocknum = bp->b_blkno;
   1998 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1999 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   2000 			blocknum += pp->p_offset;
   2001 		}
   2002 
   2003 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   2004 			    (int) blocknum));
   2005 
   2006 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   2007 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   2008 
   2009 		/* *THIS* is where we adjust what block we're going to...
   2010 		 * but DO NOT TOUCH bp->b_blkno!!! */
   2011 		raid_addr = blocknum;
   2012 
   2013 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   2014 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   2015 		sum = raid_addr + num_blocks + pb;
   2016 		if (1 || rf_debugKernelAccess) {
   2017 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   2018 				    (int) raid_addr, (int) sum, (int) num_blocks,
   2019 				    (int) pb, (int) bp->b_resid));
   2020 		}
   2021 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   2022 		    || (sum < num_blocks) || (sum < pb)) {
   2023 			bp->b_error = ENOSPC;
   2024 			bp->b_resid = bp->b_bcount;
   2025 			biodone(bp);
   2026 			RF_LOCK_MUTEX(raidPtr->mutex);
   2027 			continue;
   2028 		}
   2029 		/*
   2030 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   2031 		 */
   2032 
   2033 		if (bp->b_bcount & raidPtr->sectorMask) {
   2034 			bp->b_error = EINVAL;
   2035 			bp->b_resid = bp->b_bcount;
   2036 			biodone(bp);
   2037 			RF_LOCK_MUTEX(raidPtr->mutex);
   2038 			continue;
   2039 
   2040 		}
   2041 		db1_printf(("Calling DoAccess..\n"));
   2042 
   2043 
   2044 		RF_LOCK_MUTEX(raidPtr->mutex);
   2045 		raidPtr->openings--;
   2046 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   2047 
   2048 		/*
   2049 		 * Everything is async.
   2050 		 */
   2051 		do_async = 1;
   2052 
   2053 		disk_busy(&rs->sc_dkdev);
   2054 
   2055 		/* XXX we're still at splbio() here... do we *really*
   2056 		   need to be? */
   2057 
   2058 		/* don't ever condition on bp->b_flags & B_WRITE.
   2059 		 * always condition on B_READ instead */
   2060 
   2061 		rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   2062 				 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   2063 				 do_async, raid_addr, num_blocks,
   2064 				 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   2065 
   2066 		if (rc) {
   2067 			bp->b_error = rc;
   2068 			bp->b_resid = bp->b_bcount;
   2069 			biodone(bp);
   2070 			/* continue loop */
   2071 		}
   2072 
   2073 		RF_LOCK_MUTEX(raidPtr->mutex);
   2074 	}
   2075 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   2076 }
   2077 
   2078 
   2079 
   2080 
   2081 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   2082 
   2083 int
   2084 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
   2085 {
   2086 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   2087 	struct buf *bp;
   2088 
   2089 	req->queue = queue;
   2090 	bp = req->bp;
   2091 
   2092 	switch (req->type) {
   2093 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   2094 		/* XXX need to do something extra here.. */
   2095 		/* I'm leaving this in, as I've never actually seen it used,
   2096 		 * and I'd like folks to report it... GO */
   2097 		printf(("WAKEUP CALLED\n"));
   2098 		queue->numOutstanding++;
   2099 
   2100 		bp->b_flags = 0;
   2101 		bp->b_private = req;
   2102 
   2103 		KernelWakeupFunc(bp);
   2104 		break;
   2105 
   2106 	case RF_IO_TYPE_READ:
   2107 	case RF_IO_TYPE_WRITE:
   2108 #if RF_ACC_TRACE > 0
   2109 		if (req->tracerec) {
   2110 			RF_ETIMER_START(req->tracerec->timer);
   2111 		}
   2112 #endif
   2113 		InitBP(bp, queue->rf_cinfo->ci_vp,
   2114 		    op, queue->rf_cinfo->ci_dev,
   2115 		    req->sectorOffset, req->numSector,
   2116 		    req->buf, KernelWakeupFunc, (void *) req,
   2117 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   2118 
   2119 		if (rf_debugKernelAccess) {
   2120 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   2121 				(long) bp->b_blkno));
   2122 		}
   2123 		queue->numOutstanding++;
   2124 		queue->last_deq_sector = req->sectorOffset;
   2125 		/* acc wouldn't have been let in if there were any pending
   2126 		 * reqs at any other priority */
   2127 		queue->curPriority = req->priority;
   2128 
   2129 		db1_printf(("Going for %c to unit %d col %d\n",
   2130 			    req->type, queue->raidPtr->raidid,
   2131 			    queue->col));
   2132 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   2133 			(int) req->sectorOffset, (int) req->numSector,
   2134 			(int) (req->numSector <<
   2135 			    queue->raidPtr->logBytesPerSector),
   2136 			(int) queue->raidPtr->logBytesPerSector));
   2137 
   2138 		/*
   2139 		 * XXX: drop lock here since this can block at
   2140 		 * least with backing SCSI devices.  Retake it
   2141 		 * to minimize fuss with calling interfaces.
   2142 		 */
   2143 
   2144 		RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
   2145 		bdev_strategy(bp);
   2146 		RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
   2147 		break;
   2148 
   2149 	default:
   2150 		panic("bad req->type in rf_DispatchKernelIO");
   2151 	}
   2152 	db1_printf(("Exiting from DispatchKernelIO\n"));
   2153 
   2154 	return (0);
   2155 }
   2156 /* this is the callback function associated with a I/O invoked from
   2157    kernel code.
   2158  */
   2159 static void
   2160 KernelWakeupFunc(struct buf *bp)
   2161 {
   2162 	RF_DiskQueueData_t *req = NULL;
   2163 	RF_DiskQueue_t *queue;
   2164 	int s;
   2165 
   2166 	s = splbio();
   2167 	db1_printf(("recovering the request queue:\n"));
   2168 	req = bp->b_private;
   2169 
   2170 	queue = (RF_DiskQueue_t *) req->queue;
   2171 
   2172 #if RF_ACC_TRACE > 0
   2173 	if (req->tracerec) {
   2174 		RF_ETIMER_STOP(req->tracerec->timer);
   2175 		RF_ETIMER_EVAL(req->tracerec->timer);
   2176 		RF_LOCK_MUTEX(rf_tracing_mutex);
   2177 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2178 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   2179 		req->tracerec->num_phys_ios++;
   2180 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   2181 	}
   2182 #endif
   2183 
   2184 	/* XXX Ok, let's get aggressive... If b_error is set, let's go
   2185 	 * ballistic, and mark the component as hosed... */
   2186 
   2187 	if (bp->b_error != 0) {
   2188 		/* Mark the disk as dead */
   2189 		/* but only mark it once... */
   2190 		/* and only if it wouldn't leave this RAID set
   2191 		   completely broken */
   2192 		if (((queue->raidPtr->Disks[queue->col].status ==
   2193 		      rf_ds_optimal) ||
   2194 		     (queue->raidPtr->Disks[queue->col].status ==
   2195 		      rf_ds_used_spare)) &&
   2196 		     (queue->raidPtr->numFailures <
   2197 		      queue->raidPtr->Layout.map->faultsTolerated)) {
   2198 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   2199 			       queue->raidPtr->raidid,
   2200 			       queue->raidPtr->Disks[queue->col].devname);
   2201 			queue->raidPtr->Disks[queue->col].status =
   2202 			    rf_ds_failed;
   2203 			queue->raidPtr->status = rf_rs_degraded;
   2204 			queue->raidPtr->numFailures++;
   2205 			queue->raidPtr->numNewFailures++;
   2206 		} else {	/* Disk is already dead... */
   2207 			/* printf("Disk already marked as dead!\n"); */
   2208 		}
   2209 
   2210 	}
   2211 
   2212 	/* Fill in the error value */
   2213 
   2214 	req->error = bp->b_error;
   2215 
   2216 	simple_lock(&queue->raidPtr->iodone_lock);
   2217 
   2218 	/* Drop this one on the "finished" queue... */
   2219 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   2220 
   2221 	/* Let the raidio thread know there is work to be done. */
   2222 	wakeup(&(queue->raidPtr->iodone));
   2223 
   2224 	simple_unlock(&queue->raidPtr->iodone_lock);
   2225 
   2226 	splx(s);
   2227 }
   2228 
   2229 
   2230 
   2231 /*
   2232  * initialize a buf structure for doing an I/O in the kernel.
   2233  */
   2234 static void
   2235 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
   2236        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
   2237        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
   2238        struct proc *b_proc)
   2239 {
   2240 	/* bp->b_flags       = B_PHYS | rw_flag; */
   2241 	bp->b_flags = rw_flag;	/* XXX need B_PHYS here too??? */
   2242 	bp->b_oflags = 0;
   2243 	bp->b_cflags = 0;
   2244 	bp->b_bcount = numSect << logBytesPerSector;
   2245 	bp->b_bufsize = bp->b_bcount;
   2246 	bp->b_error = 0;
   2247 	bp->b_dev = dev;
   2248 	bp->b_data = bf;
   2249 	bp->b_blkno = startSect;
   2250 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   2251 	if (bp->b_bcount == 0) {
   2252 		panic("bp->b_bcount is zero in InitBP!!");
   2253 	}
   2254 	bp->b_proc = b_proc;
   2255 	bp->b_iodone = cbFunc;
   2256 	bp->b_private = cbArg;
   2257 }
   2258 
   2259 static void
   2260 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
   2261 		    struct disklabel *lp)
   2262 {
   2263 	memset(lp, 0, sizeof(*lp));
   2264 
   2265 	/* fabricate a label... */
   2266 	lp->d_secperunit = raidPtr->totalSectors;
   2267 	lp->d_secsize = raidPtr->bytesPerSector;
   2268 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2269 	lp->d_ntracks = 4 * raidPtr->numCol;
   2270 	lp->d_ncylinders = raidPtr->totalSectors /
   2271 		(lp->d_nsectors * lp->d_ntracks);
   2272 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2273 
   2274 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2275 	lp->d_type = DTYPE_RAID;
   2276 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2277 	lp->d_rpm = 3600;
   2278 	lp->d_interleave = 1;
   2279 	lp->d_flags = 0;
   2280 
   2281 	lp->d_partitions[RAW_PART].p_offset = 0;
   2282 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2283 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2284 	lp->d_npartitions = RAW_PART + 1;
   2285 
   2286 	lp->d_magic = DISKMAGIC;
   2287 	lp->d_magic2 = DISKMAGIC;
   2288 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2289 
   2290 }
   2291 /*
   2292  * Read the disklabel from the raid device.  If one is not present, fake one
   2293  * up.
   2294  */
   2295 static void
   2296 raidgetdisklabel(dev_t dev)
   2297 {
   2298 	int     unit = raidunit(dev);
   2299 	struct raid_softc *rs = &raid_softc[unit];
   2300 	const char   *errstring;
   2301 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2302 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2303 	RF_Raid_t *raidPtr;
   2304 
   2305 	db1_printf(("Getting the disklabel...\n"));
   2306 
   2307 	memset(clp, 0, sizeof(*clp));
   2308 
   2309 	raidPtr = raidPtrs[unit];
   2310 
   2311 	raidgetdefaultlabel(raidPtr, rs, lp);
   2312 
   2313 	/*
   2314 	 * Call the generic disklabel extraction routine.
   2315 	 */
   2316 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2317 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2318 	if (errstring)
   2319 		raidmakedisklabel(rs);
   2320 	else {
   2321 		int     i;
   2322 		struct partition *pp;
   2323 
   2324 		/*
   2325 		 * Sanity check whether the found disklabel is valid.
   2326 		 *
   2327 		 * This is necessary since total size of the raid device
   2328 		 * may vary when an interleave is changed even though exactly
   2329 		 * same components are used, and old disklabel may used
   2330 		 * if that is found.
   2331 		 */
   2332 		if (lp->d_secperunit != rs->sc_size)
   2333 			printf("raid%d: WARNING: %s: "
   2334 			    "total sector size in disklabel (%" PRIu32 ") != "
   2335 			    "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
   2336 			    lp->d_secperunit, rs->sc_size);
   2337 		for (i = 0; i < lp->d_npartitions; i++) {
   2338 			pp = &lp->d_partitions[i];
   2339 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2340 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2341 				       "exceeds the size of raid (%" PRIu64 ")\n",
   2342 				       unit, rs->sc_xname, 'a' + i, rs->sc_size);
   2343 		}
   2344 	}
   2345 
   2346 }
   2347 /*
   2348  * Take care of things one might want to take care of in the event
   2349  * that a disklabel isn't present.
   2350  */
   2351 static void
   2352 raidmakedisklabel(struct raid_softc *rs)
   2353 {
   2354 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2355 	db1_printf(("Making a label..\n"));
   2356 
   2357 	/*
   2358 	 * For historical reasons, if there's no disklabel present
   2359 	 * the raw partition must be marked FS_BSDFFS.
   2360 	 */
   2361 
   2362 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2363 
   2364 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2365 
   2366 	lp->d_checksum = dkcksum(lp);
   2367 }
   2368 /*
   2369  * Wait interruptibly for an exclusive lock.
   2370  *
   2371  * XXX
   2372  * Several drivers do this; it should be abstracted and made MP-safe.
   2373  * (Hmm... where have we seen this warning before :->  GO )
   2374  */
   2375 static int
   2376 raidlock(struct raid_softc *rs)
   2377 {
   2378 	int     error;
   2379 
   2380 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2381 		rs->sc_flags |= RAIDF_WANTED;
   2382 		if ((error =
   2383 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2384 			return (error);
   2385 	}
   2386 	rs->sc_flags |= RAIDF_LOCKED;
   2387 	return (0);
   2388 }
   2389 /*
   2390  * Unlock and wake up any waiters.
   2391  */
   2392 static void
   2393 raidunlock(struct raid_softc *rs)
   2394 {
   2395 
   2396 	rs->sc_flags &= ~RAIDF_LOCKED;
   2397 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2398 		rs->sc_flags &= ~RAIDF_WANTED;
   2399 		wakeup(rs);
   2400 	}
   2401 }
   2402 
   2403 
   2404 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2405 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2406 
   2407 int
   2408 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2409 {
   2410 	RF_ComponentLabel_t clabel;
   2411 	raidread_component_label(dev, b_vp, &clabel);
   2412 	clabel.mod_counter = mod_counter;
   2413 	clabel.clean = RF_RAID_CLEAN;
   2414 	raidwrite_component_label(dev, b_vp, &clabel);
   2415 	return(0);
   2416 }
   2417 
   2418 
   2419 int
   2420 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2421 {
   2422 	RF_ComponentLabel_t clabel;
   2423 	raidread_component_label(dev, b_vp, &clabel);
   2424 	clabel.mod_counter = mod_counter;
   2425 	clabel.clean = RF_RAID_DIRTY;
   2426 	raidwrite_component_label(dev, b_vp, &clabel);
   2427 	return(0);
   2428 }
   2429 
   2430 /* ARGSUSED */
   2431 int
   2432 raidread_component_label(dev_t dev, struct vnode *b_vp,
   2433 			 RF_ComponentLabel_t *clabel)
   2434 {
   2435 	struct buf *bp;
   2436 	const struct bdevsw *bdev;
   2437 	int error;
   2438 
   2439 	/* XXX should probably ensure that we don't try to do this if
   2440 	   someone has changed rf_protected_sectors. */
   2441 
   2442 	if (b_vp == NULL) {
   2443 		/* For whatever reason, this component is not valid.
   2444 		   Don't try to read a component label from it. */
   2445 		return(EINVAL);
   2446 	}
   2447 
   2448 	/* get a block of the appropriate size... */
   2449 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2450 	bp->b_dev = dev;
   2451 
   2452 	/* get our ducks in a row for the read */
   2453 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2454 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2455 	bp->b_flags |= B_READ;
   2456  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2457 
   2458 	bdev = bdevsw_lookup(bp->b_dev);
   2459 	if (bdev == NULL)
   2460 		return (ENXIO);
   2461 	(*bdev->d_strategy)(bp);
   2462 
   2463 	error = biowait(bp);
   2464 
   2465 	if (!error) {
   2466 		memcpy(clabel, bp->b_data,
   2467 		       sizeof(RF_ComponentLabel_t));
   2468 	}
   2469 
   2470 	brelse(bp, 0);
   2471 	return(error);
   2472 }
   2473 /* ARGSUSED */
   2474 int
   2475 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
   2476 			  RF_ComponentLabel_t *clabel)
   2477 {
   2478 	struct buf *bp;
   2479 	const struct bdevsw *bdev;
   2480 	int error;
   2481 
   2482 	/* get a block of the appropriate size... */
   2483 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2484 	bp->b_dev = dev;
   2485 
   2486 	/* get our ducks in a row for the write */
   2487 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2488 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2489 	bp->b_flags |= B_WRITE;
   2490  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2491 
   2492 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2493 
   2494 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2495 
   2496 	bdev = bdevsw_lookup(bp->b_dev);
   2497 	if (bdev == NULL)
   2498 		return (ENXIO);
   2499 	(*bdev->d_strategy)(bp);
   2500 	error = biowait(bp);
   2501 	brelse(bp, 0);
   2502 	if (error) {
   2503 #if 1
   2504 		printf("Failed to write RAID component info!\n");
   2505 #endif
   2506 	}
   2507 
   2508 	return(error);
   2509 }
   2510 
   2511 void
   2512 rf_markalldirty(RF_Raid_t *raidPtr)
   2513 {
   2514 	RF_ComponentLabel_t clabel;
   2515 	int sparecol;
   2516 	int c;
   2517 	int j;
   2518 	int scol = -1;
   2519 
   2520 	raidPtr->mod_counter++;
   2521 	for (c = 0; c < raidPtr->numCol; c++) {
   2522 		/* we don't want to touch (at all) a disk that has
   2523 		   failed */
   2524 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   2525 			raidread_component_label(
   2526 						 raidPtr->Disks[c].dev,
   2527 						 raidPtr->raid_cinfo[c].ci_vp,
   2528 						 &clabel);
   2529 			if (clabel.status == rf_ds_spared) {
   2530 				/* XXX do something special...
   2531 				   but whatever you do, don't
   2532 				   try to access it!! */
   2533 			} else {
   2534 				raidmarkdirty(
   2535 					      raidPtr->Disks[c].dev,
   2536 					      raidPtr->raid_cinfo[c].ci_vp,
   2537 					      raidPtr->mod_counter);
   2538 			}
   2539 		}
   2540 	}
   2541 
   2542 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2543 		sparecol = raidPtr->numCol + c;
   2544 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2545 			/*
   2546 
   2547 			   we claim this disk is "optimal" if it's
   2548 			   rf_ds_used_spare, as that means it should be
   2549 			   directly substitutable for the disk it replaced.
   2550 			   We note that too...
   2551 
   2552 			 */
   2553 
   2554 			for(j=0;j<raidPtr->numCol;j++) {
   2555 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2556 					scol = j;
   2557 					break;
   2558 				}
   2559 			}
   2560 
   2561 			raidread_component_label(
   2562 				 raidPtr->Disks[sparecol].dev,
   2563 				 raidPtr->raid_cinfo[sparecol].ci_vp,
   2564 				 &clabel);
   2565 			/* make sure status is noted */
   2566 
   2567 			raid_init_component_label(raidPtr, &clabel);
   2568 
   2569 			clabel.row = 0;
   2570 			clabel.column = scol;
   2571 			/* Note: we *don't* change status from rf_ds_used_spare
   2572 			   to rf_ds_optimal */
   2573 			/* clabel.status = rf_ds_optimal; */
   2574 
   2575 			raidmarkdirty(raidPtr->Disks[sparecol].dev,
   2576 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2577 				      raidPtr->mod_counter);
   2578 		}
   2579 	}
   2580 }
   2581 
   2582 
   2583 void
   2584 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
   2585 {
   2586 	RF_ComponentLabel_t clabel;
   2587 	int sparecol;
   2588 	int c;
   2589 	int j;
   2590 	int scol;
   2591 
   2592 	scol = -1;
   2593 
   2594 	/* XXX should do extra checks to make sure things really are clean,
   2595 	   rather than blindly setting the clean bit... */
   2596 
   2597 	raidPtr->mod_counter++;
   2598 
   2599 	for (c = 0; c < raidPtr->numCol; c++) {
   2600 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   2601 			raidread_component_label(
   2602 						 raidPtr->Disks[c].dev,
   2603 						 raidPtr->raid_cinfo[c].ci_vp,
   2604 						 &clabel);
   2605 			/* make sure status is noted */
   2606 			clabel.status = rf_ds_optimal;
   2607 
   2608 			/* bump the counter */
   2609 			clabel.mod_counter = raidPtr->mod_counter;
   2610 
   2611 			/* note what unit we are configured as */
   2612 			clabel.last_unit = raidPtr->raidid;
   2613 
   2614 			raidwrite_component_label(
   2615 						  raidPtr->Disks[c].dev,
   2616 						  raidPtr->raid_cinfo[c].ci_vp,
   2617 						  &clabel);
   2618 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2619 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2620 					raidmarkclean(
   2621 						      raidPtr->Disks[c].dev,
   2622 						      raidPtr->raid_cinfo[c].ci_vp,
   2623 						      raidPtr->mod_counter);
   2624 				}
   2625 			}
   2626 		}
   2627 		/* else we don't touch it.. */
   2628 	}
   2629 
   2630 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2631 		sparecol = raidPtr->numCol + c;
   2632 		/* Need to ensure that the reconstruct actually completed! */
   2633 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2634 			/*
   2635 
   2636 			   we claim this disk is "optimal" if it's
   2637 			   rf_ds_used_spare, as that means it should be
   2638 			   directly substitutable for the disk it replaced.
   2639 			   We note that too...
   2640 
   2641 			 */
   2642 
   2643 			for(j=0;j<raidPtr->numCol;j++) {
   2644 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2645 					scol = j;
   2646 					break;
   2647 				}
   2648 			}
   2649 
   2650 			/* XXX shouldn't *really* need this... */
   2651 			raidread_component_label(
   2652 				      raidPtr->Disks[sparecol].dev,
   2653 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2654 				      &clabel);
   2655 			/* make sure status is noted */
   2656 
   2657 			raid_init_component_label(raidPtr, &clabel);
   2658 
   2659 			clabel.mod_counter = raidPtr->mod_counter;
   2660 			clabel.column = scol;
   2661 			clabel.status = rf_ds_optimal;
   2662 			clabel.last_unit = raidPtr->raidid;
   2663 
   2664 			raidwrite_component_label(
   2665 				      raidPtr->Disks[sparecol].dev,
   2666 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2667 				      &clabel);
   2668 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2669 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2670 					raidmarkclean( raidPtr->Disks[sparecol].dev,
   2671 						       raidPtr->raid_cinfo[sparecol].ci_vp,
   2672 						       raidPtr->mod_counter);
   2673 				}
   2674 			}
   2675 		}
   2676 	}
   2677 }
   2678 
   2679 void
   2680 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
   2681 {
   2682 
   2683 	if (vp != NULL) {
   2684 		if (auto_configured == 1) {
   2685 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2686 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2687 			vput(vp);
   2688 
   2689 		} else {
   2690 			(void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
   2691 		}
   2692 	}
   2693 }
   2694 
   2695 
   2696 void
   2697 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
   2698 {
   2699 	int r,c;
   2700 	struct vnode *vp;
   2701 	int acd;
   2702 
   2703 
   2704 	/* We take this opportunity to close the vnodes like we should.. */
   2705 
   2706 	for (c = 0; c < raidPtr->numCol; c++) {
   2707 		vp = raidPtr->raid_cinfo[c].ci_vp;
   2708 		acd = raidPtr->Disks[c].auto_configured;
   2709 		rf_close_component(raidPtr, vp, acd);
   2710 		raidPtr->raid_cinfo[c].ci_vp = NULL;
   2711 		raidPtr->Disks[c].auto_configured = 0;
   2712 	}
   2713 
   2714 	for (r = 0; r < raidPtr->numSpare; r++) {
   2715 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
   2716 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
   2717 		rf_close_component(raidPtr, vp, acd);
   2718 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
   2719 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
   2720 	}
   2721 }
   2722 
   2723 
   2724 void
   2725 rf_ReconThread(struct rf_recon_req *req)
   2726 {
   2727 	int     s;
   2728 	RF_Raid_t *raidPtr;
   2729 
   2730 	s = splbio();
   2731 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2732 	raidPtr->recon_in_progress = 1;
   2733 
   2734 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
   2735 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2736 
   2737 	RF_Free(req, sizeof(*req));
   2738 
   2739 	raidPtr->recon_in_progress = 0;
   2740 	splx(s);
   2741 
   2742 	/* That's all... */
   2743 	kthread_exit(0);	/* does not return */
   2744 }
   2745 
   2746 void
   2747 rf_RewriteParityThread(RF_Raid_t *raidPtr)
   2748 {
   2749 	int retcode;
   2750 	int s;
   2751 
   2752 	raidPtr->parity_rewrite_stripes_done = 0;
   2753 	raidPtr->parity_rewrite_in_progress = 1;
   2754 	s = splbio();
   2755 	retcode = rf_RewriteParity(raidPtr);
   2756 	splx(s);
   2757 	if (retcode) {
   2758 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2759 	} else {
   2760 		/* set the clean bit!  If we shutdown correctly,
   2761 		   the clean bit on each component label will get
   2762 		   set */
   2763 		raidPtr->parity_good = RF_RAID_CLEAN;
   2764 	}
   2765 	raidPtr->parity_rewrite_in_progress = 0;
   2766 
   2767 	/* Anyone waiting for us to stop?  If so, inform them... */
   2768 	if (raidPtr->waitShutdown) {
   2769 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2770 	}
   2771 
   2772 	/* That's all... */
   2773 	kthread_exit(0);	/* does not return */
   2774 }
   2775 
   2776 
   2777 void
   2778 rf_CopybackThread(RF_Raid_t *raidPtr)
   2779 {
   2780 	int s;
   2781 
   2782 	raidPtr->copyback_in_progress = 1;
   2783 	s = splbio();
   2784 	rf_CopybackReconstructedData(raidPtr);
   2785 	splx(s);
   2786 	raidPtr->copyback_in_progress = 0;
   2787 
   2788 	/* That's all... */
   2789 	kthread_exit(0);	/* does not return */
   2790 }
   2791 
   2792 
   2793 void
   2794 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
   2795 {
   2796 	int s;
   2797 	RF_Raid_t *raidPtr;
   2798 
   2799 	s = splbio();
   2800 	raidPtr = req->raidPtr;
   2801 	raidPtr->recon_in_progress = 1;
   2802 	rf_ReconstructInPlace(raidPtr, req->col);
   2803 	RF_Free(req, sizeof(*req));
   2804 	raidPtr->recon_in_progress = 0;
   2805 	splx(s);
   2806 
   2807 	/* That's all... */
   2808 	kthread_exit(0);	/* does not return */
   2809 }
   2810 
   2811 static RF_AutoConfig_t *
   2812 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
   2813     const char *cname, RF_SectorCount_t size)
   2814 {
   2815 	int good_one = 0;
   2816 	RF_ComponentLabel_t *clabel;
   2817 	RF_AutoConfig_t *ac;
   2818 
   2819 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
   2820 	if (clabel == NULL) {
   2821 oomem:
   2822 		    while(ac_list) {
   2823 			    ac = ac_list;
   2824 			    if (ac->clabel)
   2825 				    free(ac->clabel, M_RAIDFRAME);
   2826 			    ac_list = ac_list->next;
   2827 			    free(ac, M_RAIDFRAME);
   2828 		    }
   2829 		    printf("RAID auto config: out of memory!\n");
   2830 		    return NULL; /* XXX probably should panic? */
   2831 	}
   2832 
   2833 	if (!raidread_component_label(dev, vp, clabel)) {
   2834 		    /* Got the label.  Does it look reasonable? */
   2835 		    if (rf_reasonable_label(clabel) &&
   2836 			(clabel->partitionSize <= size)) {
   2837 #ifdef DEBUG
   2838 			    printf("Component on: %s: %llu\n",
   2839 				cname, (unsigned long long)size);
   2840 			    rf_print_component_label(clabel);
   2841 #endif
   2842 			    /* if it's reasonable, add it, else ignore it. */
   2843 			    ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
   2844 				M_NOWAIT);
   2845 			    if (ac == NULL) {
   2846 				    free(clabel, M_RAIDFRAME);
   2847 				    goto oomem;
   2848 			    }
   2849 			    strlcpy(ac->devname, cname, sizeof(ac->devname));
   2850 			    ac->dev = dev;
   2851 			    ac->vp = vp;
   2852 			    ac->clabel = clabel;
   2853 			    ac->next = ac_list;
   2854 			    ac_list = ac;
   2855 			    good_one = 1;
   2856 		    }
   2857 	}
   2858 	if (!good_one) {
   2859 		/* cleanup */
   2860 		free(clabel, M_RAIDFRAME);
   2861 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2862 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2863 		vput(vp);
   2864 	}
   2865 	return ac_list;
   2866 }
   2867 
   2868 RF_AutoConfig_t *
   2869 rf_find_raid_components(void)
   2870 {
   2871 	struct vnode *vp;
   2872 	struct disklabel label;
   2873 	device_t dv;
   2874 	dev_t dev;
   2875 	int bmajor, bminor, wedge;
   2876 	int error;
   2877 	int i;
   2878 	RF_AutoConfig_t *ac_list;
   2879 
   2880 
   2881 	/* initialize the AutoConfig list */
   2882 	ac_list = NULL;
   2883 
   2884 	/* we begin by trolling through *all* the devices on the system */
   2885 
   2886 	for (dv = alldevs.tqh_first; dv != NULL;
   2887 	     dv = dv->dv_list.tqe_next) {
   2888 
   2889 		/* we are only interested in disks... */
   2890 		if (device_class(dv) != DV_DISK)
   2891 			continue;
   2892 
   2893 		/* we don't care about floppies... */
   2894 		if (device_is_a(dv, "fd")) {
   2895 			continue;
   2896 		}
   2897 
   2898 		/* we don't care about CD's... */
   2899 		if (device_is_a(dv, "cd")) {
   2900 			continue;
   2901 		}
   2902 
   2903 		/* we don't care about md's... */
   2904 		if (device_is_a(dv, "md")) {
   2905 			continue;
   2906 		}
   2907 
   2908 		/* hdfd is the Atari/Hades floppy driver */
   2909 		if (device_is_a(dv, "hdfd")) {
   2910 			continue;
   2911 		}
   2912 
   2913 		/* fdisa is the Atari/Milan floppy driver */
   2914 		if (device_is_a(dv, "fdisa")) {
   2915 			continue;
   2916 		}
   2917 
   2918 		/* need to find the device_name_to_block_device_major stuff */
   2919 		bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
   2920 
   2921 		/* get a vnode for the raw partition of this disk */
   2922 
   2923 		wedge = device_is_a(dv, "dk");
   2924 		bminor = minor(device_unit(dv));
   2925 		dev = wedge ? makedev(bmajor, bminor) :
   2926 		    MAKEDISKDEV(bmajor, bminor, RAW_PART);
   2927 		if (bdevvp(dev, &vp))
   2928 			panic("RAID can't alloc vnode");
   2929 
   2930 		error = VOP_OPEN(vp, FREAD, NOCRED);
   2931 
   2932 		if (error) {
   2933 			/* "Who cares."  Continue looking
   2934 			   for something that exists*/
   2935 			vput(vp);
   2936 			continue;
   2937 		}
   2938 
   2939 		if (wedge) {
   2940 			struct dkwedge_info dkw;
   2941 			error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
   2942 			    NOCRED);
   2943 			if (error) {
   2944 				printf("RAIDframe: can't get wedge info for "
   2945 				    "dev %s (%d)\n", device_xname(dv), error);
   2946 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2947 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2948 				vput(vp);
   2949 				continue;
   2950 			}
   2951 
   2952 			if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
   2953 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2954 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2955 				vput(vp);
   2956 				continue;
   2957 			}
   2958 
   2959 			ac_list = rf_get_component(ac_list, dev, vp,
   2960 			    device_xname(dv), dkw.dkw_size);
   2961 			continue;
   2962 		}
   2963 
   2964 		/* Ok, the disk exists.  Go get the disklabel. */
   2965 		error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
   2966 		if (error) {
   2967 			/*
   2968 			 * XXX can't happen - open() would
   2969 			 * have errored out (or faked up one)
   2970 			 */
   2971 			if (error != ENOTTY)
   2972 				printf("RAIDframe: can't get label for dev "
   2973 				    "%s (%d)\n", device_xname(dv), error);
   2974 		}
   2975 
   2976 		/* don't need this any more.  We'll allocate it again
   2977 		   a little later if we really do... */
   2978 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2979 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
   2980 		vput(vp);
   2981 
   2982 		if (error)
   2983 			continue;
   2984 
   2985 		for (i = 0; i < label.d_npartitions; i++) {
   2986 			char cname[sizeof(ac_list->devname)];
   2987 
   2988 			/* We only support partitions marked as RAID */
   2989 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2990 				continue;
   2991 
   2992 			dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
   2993 			if (bdevvp(dev, &vp))
   2994 				panic("RAID can't alloc vnode");
   2995 
   2996 			error = VOP_OPEN(vp, FREAD, NOCRED);
   2997 			if (error) {
   2998 				/* Whatever... */
   2999 				vput(vp);
   3000 				continue;
   3001 			}
   3002 			snprintf(cname, sizeof(cname), "%s%c",
   3003 			    device_xname(dv), 'a' + i);
   3004 			ac_list = rf_get_component(ac_list, dev, vp, cname,
   3005 				label.d_partitions[i].p_size);
   3006 		}
   3007 	}
   3008 	return ac_list;
   3009 }
   3010 
   3011 
   3012 static int
   3013 rf_reasonable_label(RF_ComponentLabel_t *clabel)
   3014 {
   3015 
   3016 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   3017 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   3018 	    ((clabel->clean == RF_RAID_CLEAN) ||
   3019 	     (clabel->clean == RF_RAID_DIRTY)) &&
   3020 	    clabel->row >=0 &&
   3021 	    clabel->column >= 0 &&
   3022 	    clabel->num_rows > 0 &&
   3023 	    clabel->num_columns > 0 &&
   3024 	    clabel->row < clabel->num_rows &&
   3025 	    clabel->column < clabel->num_columns &&
   3026 	    clabel->blockSize > 0 &&
   3027 	    clabel->numBlocks > 0) {
   3028 		/* label looks reasonable enough... */
   3029 		return(1);
   3030 	}
   3031 	return(0);
   3032 }
   3033 
   3034 
   3035 #ifdef DEBUG
   3036 void
   3037 rf_print_component_label(RF_ComponentLabel_t *clabel)
   3038 {
   3039 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   3040 	       clabel->row, clabel->column,
   3041 	       clabel->num_rows, clabel->num_columns);
   3042 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   3043 	       clabel->version, clabel->serial_number,
   3044 	       clabel->mod_counter);
   3045 	printf("   Clean: %s Status: %d\n",
   3046 	       clabel->clean ? "Yes" : "No", clabel->status );
   3047 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   3048 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   3049 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   3050 	       (char) clabel->parityConfig, clabel->blockSize,
   3051 	       clabel->numBlocks);
   3052 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   3053 	printf("   Contains root partition: %s\n",
   3054 	       clabel->root_partition ? "Yes" : "No" );
   3055 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   3056 #if 0
   3057 	   printf("   Config order: %d\n", clabel->config_order);
   3058 #endif
   3059 
   3060 }
   3061 #endif
   3062 
   3063 RF_ConfigSet_t *
   3064 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
   3065 {
   3066 	RF_AutoConfig_t *ac;
   3067 	RF_ConfigSet_t *config_sets;
   3068 	RF_ConfigSet_t *cset;
   3069 	RF_AutoConfig_t *ac_next;
   3070 
   3071 
   3072 	config_sets = NULL;
   3073 
   3074 	/* Go through the AutoConfig list, and figure out which components
   3075 	   belong to what sets.  */
   3076 	ac = ac_list;
   3077 	while(ac!=NULL) {
   3078 		/* we're going to putz with ac->next, so save it here
   3079 		   for use at the end of the loop */
   3080 		ac_next = ac->next;
   3081 
   3082 		if (config_sets == NULL) {
   3083 			/* will need at least this one... */
   3084 			config_sets = (RF_ConfigSet_t *)
   3085 				malloc(sizeof(RF_ConfigSet_t),
   3086 				       M_RAIDFRAME, M_NOWAIT);
   3087 			if (config_sets == NULL) {
   3088 				panic("rf_create_auto_sets: No memory!");
   3089 			}
   3090 			/* this one is easy :) */
   3091 			config_sets->ac = ac;
   3092 			config_sets->next = NULL;
   3093 			config_sets->rootable = 0;
   3094 			ac->next = NULL;
   3095 		} else {
   3096 			/* which set does this component fit into? */
   3097 			cset = config_sets;
   3098 			while(cset!=NULL) {
   3099 				if (rf_does_it_fit(cset, ac)) {
   3100 					/* looks like it matches... */
   3101 					ac->next = cset->ac;
   3102 					cset->ac = ac;
   3103 					break;
   3104 				}
   3105 				cset = cset->next;
   3106 			}
   3107 			if (cset==NULL) {
   3108 				/* didn't find a match above... new set..*/
   3109 				cset = (RF_ConfigSet_t *)
   3110 					malloc(sizeof(RF_ConfigSet_t),
   3111 					       M_RAIDFRAME, M_NOWAIT);
   3112 				if (cset == NULL) {
   3113 					panic("rf_create_auto_sets: No memory!");
   3114 				}
   3115 				cset->ac = ac;
   3116 				ac->next = NULL;
   3117 				cset->next = config_sets;
   3118 				cset->rootable = 0;
   3119 				config_sets = cset;
   3120 			}
   3121 		}
   3122 		ac = ac_next;
   3123 	}
   3124 
   3125 
   3126 	return(config_sets);
   3127 }
   3128 
   3129 static int
   3130 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
   3131 {
   3132 	RF_ComponentLabel_t *clabel1, *clabel2;
   3133 
   3134 	/* If this one matches the *first* one in the set, that's good
   3135 	   enough, since the other members of the set would have been
   3136 	   through here too... */
   3137 	/* note that we are not checking partitionSize here..
   3138 
   3139 	   Note that we are also not checking the mod_counters here.
   3140 	   If everything else matches execpt the mod_counter, that's
   3141 	   good enough for this test.  We will deal with the mod_counters
   3142 	   a little later in the autoconfiguration process.
   3143 
   3144 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   3145 
   3146 	   The reason we don't check for this is that failed disks
   3147 	   will have lower modification counts.  If those disks are
   3148 	   not added to the set they used to belong to, then they will
   3149 	   form their own set, which may result in 2 different sets,
   3150 	   for example, competing to be configured at raid0, and
   3151 	   perhaps competing to be the root filesystem set.  If the
   3152 	   wrong ones get configured, or both attempt to become /,
   3153 	   weird behaviour and or serious lossage will occur.  Thus we
   3154 	   need to bring them into the fold here, and kick them out at
   3155 	   a later point.
   3156 
   3157 	*/
   3158 
   3159 	clabel1 = cset->ac->clabel;
   3160 	clabel2 = ac->clabel;
   3161 	if ((clabel1->version == clabel2->version) &&
   3162 	    (clabel1->serial_number == clabel2->serial_number) &&
   3163 	    (clabel1->num_rows == clabel2->num_rows) &&
   3164 	    (clabel1->num_columns == clabel2->num_columns) &&
   3165 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   3166 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   3167 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   3168 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   3169 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   3170 	    (clabel1->blockSize == clabel2->blockSize) &&
   3171 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   3172 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   3173 	    (clabel1->root_partition == clabel2->root_partition) &&
   3174 	    (clabel1->last_unit == clabel2->last_unit) &&
   3175 	    (clabel1->config_order == clabel2->config_order)) {
   3176 		/* if it get's here, it almost *has* to be a match */
   3177 	} else {
   3178 		/* it's not consistent with somebody in the set..
   3179 		   punt */
   3180 		return(0);
   3181 	}
   3182 	/* all was fine.. it must fit... */
   3183 	return(1);
   3184 }
   3185 
   3186 int
   3187 rf_have_enough_components(RF_ConfigSet_t *cset)
   3188 {
   3189 	RF_AutoConfig_t *ac;
   3190 	RF_AutoConfig_t *auto_config;
   3191 	RF_ComponentLabel_t *clabel;
   3192 	int c;
   3193 	int num_cols;
   3194 	int num_missing;
   3195 	int mod_counter;
   3196 	int mod_counter_found;
   3197 	int even_pair_failed;
   3198 	char parity_type;
   3199 
   3200 
   3201 	/* check to see that we have enough 'live' components
   3202 	   of this set.  If so, we can configure it if necessary */
   3203 
   3204 	num_cols = cset->ac->clabel->num_columns;
   3205 	parity_type = cset->ac->clabel->parityConfig;
   3206 
   3207 	/* XXX Check for duplicate components!?!?!? */
   3208 
   3209 	/* Determine what the mod_counter is supposed to be for this set. */
   3210 
   3211 	mod_counter_found = 0;
   3212 	mod_counter = 0;
   3213 	ac = cset->ac;
   3214 	while(ac!=NULL) {
   3215 		if (mod_counter_found==0) {
   3216 			mod_counter = ac->clabel->mod_counter;
   3217 			mod_counter_found = 1;
   3218 		} else {
   3219 			if (ac->clabel->mod_counter > mod_counter) {
   3220 				mod_counter = ac->clabel->mod_counter;
   3221 			}
   3222 		}
   3223 		ac = ac->next;
   3224 	}
   3225 
   3226 	num_missing = 0;
   3227 	auto_config = cset->ac;
   3228 
   3229 	even_pair_failed = 0;
   3230 	for(c=0; c<num_cols; c++) {
   3231 		ac = auto_config;
   3232 		while(ac!=NULL) {
   3233 			if ((ac->clabel->column == c) &&
   3234 			    (ac->clabel->mod_counter == mod_counter)) {
   3235 				/* it's this one... */
   3236 #ifdef DEBUG
   3237 				printf("Found: %s at %d\n",
   3238 				       ac->devname,c);
   3239 #endif
   3240 				break;
   3241 			}
   3242 			ac=ac->next;
   3243 		}
   3244 		if (ac==NULL) {
   3245 				/* Didn't find one here! */
   3246 				/* special case for RAID 1, especially
   3247 				   where there are more than 2
   3248 				   components (where RAIDframe treats
   3249 				   things a little differently :( ) */
   3250 			if (parity_type == '1') {
   3251 				if (c%2 == 0) { /* even component */
   3252 					even_pair_failed = 1;
   3253 				} else { /* odd component.  If
   3254 					    we're failed, and
   3255 					    so is the even
   3256 					    component, it's
   3257 					    "Good Night, Charlie" */
   3258 					if (even_pair_failed == 1) {
   3259 						return(0);
   3260 					}
   3261 				}
   3262 			} else {
   3263 				/* normal accounting */
   3264 				num_missing++;
   3265 			}
   3266 		}
   3267 		if ((parity_type == '1') && (c%2 == 1)) {
   3268 				/* Just did an even component, and we didn't
   3269 				   bail.. reset the even_pair_failed flag,
   3270 				   and go on to the next component.... */
   3271 			even_pair_failed = 0;
   3272 		}
   3273 	}
   3274 
   3275 	clabel = cset->ac->clabel;
   3276 
   3277 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3278 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3279 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3280 		/* XXX this needs to be made *much* more general */
   3281 		/* Too many failures */
   3282 		return(0);
   3283 	}
   3284 	/* otherwise, all is well, and we've got enough to take a kick
   3285 	   at autoconfiguring this set */
   3286 	return(1);
   3287 }
   3288 
   3289 void
   3290 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
   3291 			RF_Raid_t *raidPtr)
   3292 {
   3293 	RF_ComponentLabel_t *clabel;
   3294 	int i;
   3295 
   3296 	clabel = ac->clabel;
   3297 
   3298 	/* 1. Fill in the common stuff */
   3299 	config->numRow = clabel->num_rows = 1;
   3300 	config->numCol = clabel->num_columns;
   3301 	config->numSpare = 0; /* XXX should this be set here? */
   3302 	config->sectPerSU = clabel->sectPerSU;
   3303 	config->SUsPerPU = clabel->SUsPerPU;
   3304 	config->SUsPerRU = clabel->SUsPerRU;
   3305 	config->parityConfig = clabel->parityConfig;
   3306 	/* XXX... */
   3307 	strcpy(config->diskQueueType,"fifo");
   3308 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3309 	config->layoutSpecificSize = 0; /* XXX ?? */
   3310 
   3311 	while(ac!=NULL) {
   3312 		/* row/col values will be in range due to the checks
   3313 		   in reasonable_label() */
   3314 		strcpy(config->devnames[0][ac->clabel->column],
   3315 		       ac->devname);
   3316 		ac = ac->next;
   3317 	}
   3318 
   3319 	for(i=0;i<RF_MAXDBGV;i++) {
   3320 		config->debugVars[i][0] = 0;
   3321 	}
   3322 }
   3323 
   3324 int
   3325 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
   3326 {
   3327 	RF_ComponentLabel_t clabel;
   3328 	struct vnode *vp;
   3329 	dev_t dev;
   3330 	int column;
   3331 	int sparecol;
   3332 
   3333 	raidPtr->autoconfigure = new_value;
   3334 
   3335 	for(column=0; column<raidPtr->numCol; column++) {
   3336 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3337 			dev = raidPtr->Disks[column].dev;
   3338 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3339 			raidread_component_label(dev, vp, &clabel);
   3340 			clabel.autoconfigure = new_value;
   3341 			raidwrite_component_label(dev, vp, &clabel);
   3342 		}
   3343 	}
   3344 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3345 		sparecol = raidPtr->numCol + column;
   3346 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3347 			dev = raidPtr->Disks[sparecol].dev;
   3348 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3349 			raidread_component_label(dev, vp, &clabel);
   3350 			clabel.autoconfigure = new_value;
   3351 			raidwrite_component_label(dev, vp, &clabel);
   3352 		}
   3353 	}
   3354 	return(new_value);
   3355 }
   3356 
   3357 int
   3358 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
   3359 {
   3360 	RF_ComponentLabel_t clabel;
   3361 	struct vnode *vp;
   3362 	dev_t dev;
   3363 	int column;
   3364 	int sparecol;
   3365 
   3366 	raidPtr->root_partition = new_value;
   3367 	for(column=0; column<raidPtr->numCol; column++) {
   3368 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3369 			dev = raidPtr->Disks[column].dev;
   3370 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3371 			raidread_component_label(dev, vp, &clabel);
   3372 			clabel.root_partition = new_value;
   3373 			raidwrite_component_label(dev, vp, &clabel);
   3374 		}
   3375 	}
   3376 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3377 		sparecol = raidPtr->numCol + column;
   3378 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3379 			dev = raidPtr->Disks[sparecol].dev;
   3380 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3381 			raidread_component_label(dev, vp, &clabel);
   3382 			clabel.root_partition = new_value;
   3383 			raidwrite_component_label(dev, vp, &clabel);
   3384 		}
   3385 	}
   3386 	return(new_value);
   3387 }
   3388 
   3389 void
   3390 rf_release_all_vps(RF_ConfigSet_t *cset)
   3391 {
   3392 	RF_AutoConfig_t *ac;
   3393 
   3394 	ac = cset->ac;
   3395 	while(ac!=NULL) {
   3396 		/* Close the vp, and give it back */
   3397 		if (ac->vp) {
   3398 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3399 			VOP_CLOSE(ac->vp, FREAD, NOCRED);
   3400 			vput(ac->vp);
   3401 			ac->vp = NULL;
   3402 		}
   3403 		ac = ac->next;
   3404 	}
   3405 }
   3406 
   3407 
   3408 void
   3409 rf_cleanup_config_set(RF_ConfigSet_t *cset)
   3410 {
   3411 	RF_AutoConfig_t *ac;
   3412 	RF_AutoConfig_t *next_ac;
   3413 
   3414 	ac = cset->ac;
   3415 	while(ac!=NULL) {
   3416 		next_ac = ac->next;
   3417 		/* nuke the label */
   3418 		free(ac->clabel, M_RAIDFRAME);
   3419 		/* cleanup the config structure */
   3420 		free(ac, M_RAIDFRAME);
   3421 		/* "next.." */
   3422 		ac = next_ac;
   3423 	}
   3424 	/* and, finally, nuke the config set */
   3425 	free(cset, M_RAIDFRAME);
   3426 }
   3427 
   3428 
   3429 void
   3430 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   3431 {
   3432 	/* current version number */
   3433 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3434 	clabel->serial_number = raidPtr->serial_number;
   3435 	clabel->mod_counter = raidPtr->mod_counter;
   3436 	clabel->num_rows = 1;
   3437 	clabel->num_columns = raidPtr->numCol;
   3438 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3439 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3440 
   3441 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3442 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3443 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3444 
   3445 	clabel->blockSize = raidPtr->bytesPerSector;
   3446 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3447 
   3448 	/* XXX not portable */
   3449 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3450 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3451 	clabel->autoconfigure = raidPtr->autoconfigure;
   3452 	clabel->root_partition = raidPtr->root_partition;
   3453 	clabel->last_unit = raidPtr->raidid;
   3454 	clabel->config_order = raidPtr->config_order;
   3455 }
   3456 
   3457 int
   3458 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
   3459 {
   3460 	RF_Raid_t *raidPtr;
   3461 	RF_Config_t *config;
   3462 	int raidID;
   3463 	int retcode;
   3464 
   3465 #ifdef DEBUG
   3466 	printf("RAID autoconfigure\n");
   3467 #endif
   3468 
   3469 	retcode = 0;
   3470 	*unit = -1;
   3471 
   3472 	/* 1. Create a config structure */
   3473 
   3474 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3475 				       M_RAIDFRAME,
   3476 				       M_NOWAIT);
   3477 	if (config==NULL) {
   3478 		printf("Out of mem!?!?\n");
   3479 				/* XXX do something more intelligent here. */
   3480 		return(1);
   3481 	}
   3482 
   3483 	memset(config, 0, sizeof(RF_Config_t));
   3484 
   3485 	/*
   3486 	   2. Figure out what RAID ID this one is supposed to live at
   3487 	   See if we can get the same RAID dev that it was configured
   3488 	   on last time..
   3489 	*/
   3490 
   3491 	raidID = cset->ac->clabel->last_unit;
   3492 	if ((raidID < 0) || (raidID >= numraid)) {
   3493 		/* let's not wander off into lala land. */
   3494 		raidID = numraid - 1;
   3495 	}
   3496 	if (raidPtrs[raidID]->valid != 0) {
   3497 
   3498 		/*
   3499 		   Nope... Go looking for an alternative...
   3500 		   Start high so we don't immediately use raid0 if that's
   3501 		   not taken.
   3502 		*/
   3503 
   3504 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3505 			if (raidPtrs[raidID]->valid == 0) {
   3506 				/* can use this one! */
   3507 				break;
   3508 			}
   3509 		}
   3510 	}
   3511 
   3512 	if (raidID < 0) {
   3513 		/* punt... */
   3514 		printf("Unable to auto configure this set!\n");
   3515 		printf("(Out of RAID devs!)\n");
   3516 		free(config, M_RAIDFRAME);
   3517 		return(1);
   3518 	}
   3519 
   3520 #ifdef DEBUG
   3521 	printf("Configuring raid%d:\n",raidID);
   3522 #endif
   3523 
   3524 	raidPtr = raidPtrs[raidID];
   3525 
   3526 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3527 	raidPtr->raidid = raidID;
   3528 	raidPtr->openings = RAIDOUTSTANDING;
   3529 
   3530 	/* 3. Build the configuration structure */
   3531 	rf_create_configuration(cset->ac, config, raidPtr);
   3532 
   3533 	/* 4. Do the configuration */
   3534 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3535 
   3536 	if (retcode == 0) {
   3537 
   3538 		raidinit(raidPtrs[raidID]);
   3539 
   3540 		rf_markalldirty(raidPtrs[raidID]);
   3541 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3542 		if (cset->ac->clabel->root_partition==1) {
   3543 			/* everything configured just fine.  Make a note
   3544 			   that this set is eligible to be root. */
   3545 			cset->rootable = 1;
   3546 			/* XXX do this here? */
   3547 			raidPtrs[raidID]->root_partition = 1;
   3548 		}
   3549 	}
   3550 
   3551 	/* 5. Cleanup */
   3552 	free(config, M_RAIDFRAME);
   3553 
   3554 	*unit = raidID;
   3555 	return(retcode);
   3556 }
   3557 
   3558 void
   3559 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
   3560 {
   3561 	struct buf *bp;
   3562 
   3563 	bp = (struct buf *)desc->bp;
   3564 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3565 	    (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
   3566 }
   3567 
   3568 void
   3569 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
   3570 	     size_t xmin, size_t xmax)
   3571 {
   3572 	pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
   3573 	pool_sethiwat(p, xmax);
   3574 	pool_prime(p, xmin);
   3575 	pool_setlowat(p, xmin);
   3576 }
   3577 
   3578 /*
   3579  * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
   3580  * if there is IO pending and if that IO could possibly be done for a
   3581  * given RAID set.  Returns 0 if IO is waiting and can be done, 1
   3582  * otherwise.
   3583  *
   3584  */
   3585 
   3586 int
   3587 rf_buf_queue_check(int raidid)
   3588 {
   3589 	if ((bufq_peek(raid_softc[raidid].buf_queue) != NULL) &&
   3590 	    raidPtrs[raidid]->openings > 0) {
   3591 		/* there is work to do */
   3592 		return 0;
   3593 	}
   3594 	/* default is nothing to do */
   3595 	return 1;
   3596 }
   3597 
   3598 int
   3599 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
   3600 {
   3601 	struct partinfo dpart;
   3602 	struct dkwedge_info dkw;
   3603 	int error;
   3604 
   3605 	error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred);
   3606 	if (error == 0) {
   3607 		diskPtr->blockSize = dpart.disklab->d_secsize;
   3608 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
   3609 		diskPtr->partitionSize = dpart.part->p_size;
   3610 		return 0;
   3611 	}
   3612 
   3613 	error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred);
   3614 	if (error == 0) {
   3615 		diskPtr->blockSize = 512;	/* XXX */
   3616 		diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors;
   3617 		diskPtr->partitionSize = dkw.dkw_size;
   3618 		return 0;
   3619 	}
   3620 	return error;
   3621 }
   3622 
   3623 static int
   3624 raid_match(device_t self, cfdata_t cfdata, void *aux)
   3625 {
   3626 	return 1;
   3627 }
   3628 
   3629 static void
   3630 raid_attach(device_t parent, device_t self, void *aux)
   3631 {
   3632 
   3633 }
   3634 
   3635 
   3636 static int
   3637 raid_detach(device_t self, int flags)
   3638 {
   3639 	int error;
   3640 	struct raid_softc *rs = &raid_softc[device_unit(self)];
   3641 
   3642 	if ((error = raidlock(rs)) != 0)
   3643 		return (error);
   3644 
   3645 	error = raid_detach_unlocked(rs);
   3646 
   3647 	raidunlock(rs);
   3648 
   3649 	return error;
   3650 }
   3651 
   3652 static void
   3653 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
   3654 {
   3655 	prop_dictionary_t disk_info, odisk_info, geom;
   3656 	disk_info = prop_dictionary_create();
   3657 	geom = prop_dictionary_create();
   3658 	prop_dictionary_set_uint64(geom, "sectors-per-unit",
   3659 				   raidPtr->totalSectors);
   3660 	prop_dictionary_set_uint32(geom, "sector-size",
   3661 				   raidPtr->bytesPerSector);
   3662 
   3663 	prop_dictionary_set_uint16(geom, "sectors-per-track",
   3664 				   raidPtr->Layout.dataSectorsPerStripe);
   3665 	prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
   3666 				   4 * raidPtr->numCol);
   3667 
   3668 	prop_dictionary_set_uint64(geom, "cylinders-per-unit",
   3669 	   raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
   3670 	   (4 * raidPtr->numCol)));
   3671 
   3672 	prop_dictionary_set(disk_info, "geometry", geom);
   3673 	prop_object_release(geom);
   3674 	prop_dictionary_set(device_properties(rs->sc_dev),
   3675 			    "disk-info", disk_info);
   3676 	odisk_info = rs->sc_dkdev.dk_info;
   3677 	rs->sc_dkdev.dk_info = disk_info;
   3678 	if (odisk_info)
   3679 		prop_object_release(odisk_info);
   3680 }
   3681 
   3682 /*
   3683  * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
   3684  * We end up returning whatever error was returned by the first cache flush
   3685  * that fails.
   3686  */
   3687 
   3688 static int
   3689 rf_sync_component_caches(RF_Raid_t *raidPtr)
   3690 {
   3691 	int c, sparecol;
   3692 	int e,error;
   3693 	int force = 1;
   3694 
   3695 	error = 0;
   3696 	for (c = 0; c < raidPtr->numCol; c++) {
   3697 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   3698 			e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
   3699 					  &force, FWRITE, NOCRED);
   3700 			if (e) {
   3701 				if (e != ENODEV)
   3702 					printf("raid%d: cache flush to component %s failed.\n",
   3703 					       raidPtr->raidid, raidPtr->Disks[c].devname);
   3704 				if (error == 0) {
   3705 					error = e;
   3706 				}
   3707 			}
   3708 		}
   3709 	}
   3710 
   3711 	for( c = 0; c < raidPtr->numSpare ; c++) {
   3712 		sparecol = raidPtr->numCol + c;
   3713 		/* Need to ensure that the reconstruct actually completed! */
   3714 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3715 			e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
   3716 					  DIOCCACHESYNC, &force, FWRITE, NOCRED);
   3717 			if (e) {
   3718 				if (e != ENODEV)
   3719 					printf("raid%d: cache flush to component %s failed.\n",
   3720 					       raidPtr->raidid, raidPtr->Disks[sparecol].devname);
   3721 				if (error == 0) {
   3722 					error = e;
   3723 				}
   3724 			}
   3725 		}
   3726 	}
   3727 	return error;
   3728 }
   3729