Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.169
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.169 2003/12/30 21:59:03 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1990, 1993
     40  *      The Regents of the University of California.  All rights reserved.
     41  *
     42  * This code is derived from software contributed to Berkeley by
     43  * the Systems Programming Group of the University of Utah Computer
     44  * Science Department.
     45  *
     46  * Redistribution and use in source and binary forms, with or without
     47  * modification, are permitted provided that the following conditions
     48  * are met:
     49  * 1. Redistributions of source code must retain the above copyright
     50  *    notice, this list of conditions and the following disclaimer.
     51  * 2. Redistributions in binary form must reproduce the above copyright
     52  *    notice, this list of conditions and the following disclaimer in the
     53  *    documentation and/or other materials provided with the distribution.
     54  * 3. Neither the name of the University nor the names of its contributors
     55  *    may be used to endorse or promote products derived from this software
     56  *    without specific prior written permission.
     57  *
     58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     68  * SUCH DAMAGE.
     69  *
     70  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     71  *
     72  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     73  */
     74 
     75 /*
     76  * Copyright (c) 1988 University of Utah.
     77  *
     78  * This code is derived from software contributed to Berkeley by
     79  * the Systems Programming Group of the University of Utah Computer
     80  * Science Department.
     81  *
     82  * Redistribution and use in source and binary forms, with or without
     83  * modification, are permitted provided that the following conditions
     84  * are met:
     85  * 1. Redistributions of source code must retain the above copyright
     86  *    notice, this list of conditions and the following disclaimer.
     87  * 2. Redistributions in binary form must reproduce the above copyright
     88  *    notice, this list of conditions and the following disclaimer in the
     89  *    documentation and/or other materials provided with the distribution.
     90  * 3. All advertising materials mentioning features or use of this software
     91  *    must display the following acknowledgement:
     92  *      This product includes software developed by the University of
     93  *      California, Berkeley and its contributors.
     94  * 4. Neither the name of the University nor the names of its contributors
     95  *    may be used to endorse or promote products derived from this software
     96  *    without specific prior written permission.
     97  *
     98  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     99  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    100  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    101  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
    102  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    103  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    104  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    105  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    106  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    107  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    108  * SUCH DAMAGE.
    109  *
    110  * from: Utah $Hdr: cd.c 1.6 90/11/28$
    111  *
    112  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
    113  */
    114 
    115 /*
    116  * Copyright (c) 1995 Carnegie-Mellon University.
    117  * All rights reserved.
    118  *
    119  * Authors: Mark Holland, Jim Zelenka
    120  *
    121  * Permission to use, copy, modify and distribute this software and
    122  * its documentation is hereby granted, provided that both the copyright
    123  * notice and this permission notice appear in all copies of the
    124  * software, derivative works or modified versions, and any portions
    125  * thereof, and that both notices appear in supporting documentation.
    126  *
    127  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
    128  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
    129  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
    130  *
    131  * Carnegie Mellon requests users of this software to return to
    132  *
    133  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    134  *  School of Computer Science
    135  *  Carnegie Mellon University
    136  *  Pittsburgh PA 15213-3890
    137  *
    138  * any improvements or extensions that they make and grant Carnegie the
    139  * rights to redistribute these changes.
    140  */
    141 
    142 /***********************************************************
    143  *
    144  * rf_kintf.c -- the kernel interface routines for RAIDframe
    145  *
    146  ***********************************************************/
    147 
    148 #include <sys/cdefs.h>
    149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.169 2003/12/30 21:59:03 oster Exp $");
    150 
    151 #include <sys/param.h>
    152 #include <sys/errno.h>
    153 #include <sys/pool.h>
    154 #include <sys/proc.h>
    155 #include <sys/queue.h>
    156 #include <sys/disk.h>
    157 #include <sys/device.h>
    158 #include <sys/stat.h>
    159 #include <sys/ioctl.h>
    160 #include <sys/fcntl.h>
    161 #include <sys/systm.h>
    162 #include <sys/namei.h>
    163 #include <sys/vnode.h>
    164 #include <sys/disklabel.h>
    165 #include <sys/conf.h>
    166 #include <sys/lock.h>
    167 #include <sys/buf.h>
    168 #include <sys/user.h>
    169 #include <sys/reboot.h>
    170 
    171 #include <dev/raidframe/raidframevar.h>
    172 #include <dev/raidframe/raidframeio.h>
    173 #include "raid.h"
    174 #include "opt_raid_autoconfig.h"
    175 #include "rf_raid.h"
    176 #include "rf_copyback.h"
    177 #include "rf_dag.h"
    178 #include "rf_dagflags.h"
    179 #include "rf_desc.h"
    180 #include "rf_diskqueue.h"
    181 #include "rf_etimer.h"
    182 #include "rf_general.h"
    183 #include "rf_kintf.h"
    184 #include "rf_options.h"
    185 #include "rf_driver.h"
    186 #include "rf_parityscan.h"
    187 #include "rf_threadstuff.h"
    188 
    189 #ifdef DEBUG
    190 int     rf_kdebug_level = 0;
    191 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    192 #else				/* DEBUG */
    193 #define db1_printf(a) { }
    194 #endif				/* DEBUG */
    195 
    196 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    197 
    198 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    199 
    200 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    201 						 * spare table */
    202 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    203 						 * installation process */
    204 
    205 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
    206 
    207 /* prototypes */
    208 static void KernelWakeupFunc(struct buf * bp);
    209 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    210 		   dev_t dev, RF_SectorNum_t startSect,
    211 		   RF_SectorCount_t numSect, caddr_t buf,
    212 		   void (*cbFunc) (struct buf *), void *cbArg,
    213 		   int logBytesPerSector, struct proc * b_proc);
    214 static void raidinit(RF_Raid_t *);
    215 
    216 void raidattach(int);
    217 
    218 dev_type_open(raidopen);
    219 dev_type_close(raidclose);
    220 dev_type_read(raidread);
    221 dev_type_write(raidwrite);
    222 dev_type_ioctl(raidioctl);
    223 dev_type_strategy(raidstrategy);
    224 dev_type_dump(raiddump);
    225 dev_type_size(raidsize);
    226 
    227 const struct bdevsw raid_bdevsw = {
    228 	raidopen, raidclose, raidstrategy, raidioctl,
    229 	raiddump, raidsize, D_DISK
    230 };
    231 
    232 const struct cdevsw raid_cdevsw = {
    233 	raidopen, raidclose, raidread, raidwrite, raidioctl,
    234 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
    235 };
    236 
    237 /*
    238  * Pilfered from ccd.c
    239  */
    240 
    241 struct raidbuf {
    242 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    243 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    244 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    245 };
    246 
    247 /* component buffer pool */
    248 struct pool raidframe_cbufpool;
    249 
    250 /* XXX Not sure if the following should be replacing the raidPtrs above,
    251    or if it should be used in conjunction with that...
    252 */
    253 
    254 struct raid_softc {
    255 	int     sc_flags;	/* flags */
    256 	int     sc_cflags;	/* configuration flags */
    257 	size_t  sc_size;        /* size of the raid device */
    258 	char    sc_xname[20];	/* XXX external name */
    259 	struct disk sc_dkdev;	/* generic disk device info */
    260 	struct bufq_state buf_queue;	/* used for the device queue */
    261 };
    262 /* sc_flags */
    263 #define RAIDF_INITED	0x01	/* unit has been initialized */
    264 #define RAIDF_WLABEL	0x02	/* label area is writable */
    265 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    266 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    267 #define RAIDF_LOCKED	0x80	/* unit is locked */
    268 
    269 #define	raidunit(x)	DISKUNIT(x)
    270 int numraid = 0;
    271 
    272 /*
    273  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    274  * Be aware that large numbers can allow the driver to consume a lot of
    275  * kernel memory, especially on writes, and in degraded mode reads.
    276  *
    277  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    278  * a single 64K write will typically require 64K for the old data,
    279  * 64K for the old parity, and 64K for the new parity, for a total
    280  * of 192K (if the parity buffer is not re-used immediately).
    281  * Even it if is used immediately, that's still 128K, which when multiplied
    282  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    283  *
    284  * Now in degraded mode, for example, a 64K read on the above setup may
    285  * require data reconstruction, which will require *all* of the 4 remaining
    286  * disks to participate -- 4 * 32K/disk == 128K again.
    287  */
    288 
    289 #ifndef RAIDOUTSTANDING
    290 #define RAIDOUTSTANDING   6
    291 #endif
    292 
    293 #define RAIDLABELDEV(dev)	\
    294 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    295 
    296 /* declared here, and made public, for the benefit of KVM stuff.. */
    297 struct raid_softc *raid_softc;
    298 
    299 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    300 				     struct disklabel *);
    301 static void raidgetdisklabel(dev_t);
    302 static void raidmakedisklabel(struct raid_softc *);
    303 
    304 static int raidlock(struct raid_softc *);
    305 static void raidunlock(struct raid_softc *);
    306 
    307 static void rf_markalldirty(RF_Raid_t *);
    308 
    309 struct device *raidrootdev;
    310 
    311 void rf_ReconThread(struct rf_recon_req *);
    312 /* XXX what I want is: */
    313 /*void rf_ReconThread(RF_Raid_t *raidPtr);  */
    314 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    315 void rf_CopybackThread(RF_Raid_t *raidPtr);
    316 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    317 int rf_autoconfig(struct device *self);
    318 void rf_buildroothack(RF_ConfigSet_t *);
    319 
    320 RF_AutoConfig_t *rf_find_raid_components(void);
    321 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    322 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    323 static int rf_reasonable_label(RF_ComponentLabel_t *);
    324 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    325 int rf_set_autoconfig(RF_Raid_t *, int);
    326 int rf_set_rootpartition(RF_Raid_t *, int);
    327 void rf_release_all_vps(RF_ConfigSet_t *);
    328 void rf_cleanup_config_set(RF_ConfigSet_t *);
    329 int rf_have_enough_components(RF_ConfigSet_t *);
    330 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    331 
    332 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    333 				  allow autoconfig to take place.
    334 			          Note that this is overridden by having
    335 			          RAID_AUTOCONFIG as an option in the
    336 			          kernel config file.  */
    337 
    338 void
    339 raidattach(int num)
    340 {
    341 	int raidID;
    342 	int i, rc;
    343 
    344 #ifdef DEBUG
    345 	printf("raidattach: Asked for %d units\n", num);
    346 #endif
    347 
    348 	if (num <= 0) {
    349 #ifdef DIAGNOSTIC
    350 		panic("raidattach: count <= 0");
    351 #endif
    352 		return;
    353 	}
    354 	/* This is where all the initialization stuff gets done. */
    355 
    356 	numraid = num;
    357 
    358 	/* Make some space for requested number of units... */
    359 
    360 	RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
    361 	if (raidPtrs == NULL) {
    362 		panic("raidPtrs is NULL!!");
    363 	}
    364 
    365 	/* Initialize the component buffer pool. */
    366 	pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
    367 	    0, 0, "raidpl", NULL);
    368 
    369 	rf_mutex_init(&rf_sparet_wait_mutex);
    370 
    371 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    372 
    373 	for (i = 0; i < num; i++)
    374 		raidPtrs[i] = NULL;
    375 	rc = rf_BootRaidframe();
    376 	if (rc == 0)
    377 		printf("Kernelized RAIDframe activated\n");
    378 	else
    379 		panic("Serious error booting RAID!!");
    380 
    381 	/* put together some datastructures like the CCD device does.. This
    382 	 * lets us lock the device and what-not when it gets opened. */
    383 
    384 	raid_softc = (struct raid_softc *)
    385 		malloc(num * sizeof(struct raid_softc),
    386 		       M_RAIDFRAME, M_NOWAIT);
    387 	if (raid_softc == NULL) {
    388 		printf("WARNING: no memory for RAIDframe driver\n");
    389 		return;
    390 	}
    391 
    392 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    393 
    394 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    395 					      M_RAIDFRAME, M_NOWAIT);
    396 	if (raidrootdev == NULL) {
    397 		panic("No memory for RAIDframe driver!!?!?!");
    398 	}
    399 
    400 	for (raidID = 0; raidID < num; raidID++) {
    401 		bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
    402 
    403 		raidrootdev[raidID].dv_class  = DV_DISK;
    404 		raidrootdev[raidID].dv_cfdata = NULL;
    405 		raidrootdev[raidID].dv_unit   = raidID;
    406 		raidrootdev[raidID].dv_parent = NULL;
    407 		raidrootdev[raidID].dv_flags  = 0;
    408 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    409 
    410 		RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
    411 			  (RF_Raid_t *));
    412 		if (raidPtrs[raidID] == NULL) {
    413 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    414 			numraid = raidID;
    415 			return;
    416 		}
    417 	}
    418 
    419 #ifdef RAID_AUTOCONFIG
    420 	raidautoconfig = 1;
    421 #endif
    422 
    423 	/*
    424 	 * Register a finalizer which will be used to auto-config RAID
    425 	 * sets once all real hardware devices have been found.
    426 	 */
    427 	if (config_finalize_register(NULL, rf_autoconfig) != 0)
    428 		printf("WARNING: unable to register RAIDframe finalizer\n");
    429 }
    430 
    431 int
    432 rf_autoconfig(struct device *self)
    433 {
    434 	RF_AutoConfig_t *ac_list;
    435 	RF_ConfigSet_t *config_sets;
    436 
    437 	if (raidautoconfig == 0)
    438 		return (0);
    439 
    440 	/* XXX This code can only be run once. */
    441 	raidautoconfig = 0;
    442 
    443 	/* 1. locate all RAID components on the system */
    444 #ifdef DEBUG
    445 	printf("Searching for RAID components...\n");
    446 #endif
    447 	ac_list = rf_find_raid_components();
    448 
    449 	/* 2. Sort them into their respective sets. */
    450 	config_sets = rf_create_auto_sets(ac_list);
    451 
    452 	/*
    453 	 * 3. Evaluate each set andconfigure the valid ones.
    454 	 * This gets done in rf_buildroothack().
    455 	 */
    456 	rf_buildroothack(config_sets);
    457 
    458 	return (1);
    459 }
    460 
    461 void
    462 rf_buildroothack(RF_ConfigSet_t *config_sets)
    463 {
    464 	RF_ConfigSet_t *cset;
    465 	RF_ConfigSet_t *next_cset;
    466 	int retcode;
    467 	int raidID;
    468 	int rootID;
    469 	int num_root;
    470 
    471 	rootID = 0;
    472 	num_root = 0;
    473 	cset = config_sets;
    474 	while(cset != NULL ) {
    475 		next_cset = cset->next;
    476 		if (rf_have_enough_components(cset) &&
    477 		    cset->ac->clabel->autoconfigure==1) {
    478 			retcode = rf_auto_config_set(cset,&raidID);
    479 			if (!retcode) {
    480 				if (cset->rootable) {
    481 					rootID = raidID;
    482 					num_root++;
    483 				}
    484 			} else {
    485 				/* The autoconfig didn't work :( */
    486 #if DEBUG
    487 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    488 #endif
    489 				rf_release_all_vps(cset);
    490 			}
    491 		} else {
    492 			/* we're not autoconfiguring this set...
    493 			   release the associated resources */
    494 			rf_release_all_vps(cset);
    495 		}
    496 		/* cleanup */
    497 		rf_cleanup_config_set(cset);
    498 		cset = next_cset;
    499 	}
    500 
    501 	/* we found something bootable... */
    502 
    503 	if (num_root == 1) {
    504 		booted_device = &raidrootdev[rootID];
    505 	} else if (num_root > 1) {
    506 		/* we can't guess.. require the user to answer... */
    507 		boothowto |= RB_ASKNAME;
    508 	}
    509 }
    510 
    511 
    512 int
    513 raidsize(dev_t dev)
    514 {
    515 	struct raid_softc *rs;
    516 	struct disklabel *lp;
    517 	int     part, unit, omask, size;
    518 
    519 	unit = raidunit(dev);
    520 	if (unit >= numraid)
    521 		return (-1);
    522 	rs = &raid_softc[unit];
    523 
    524 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    525 		return (-1);
    526 
    527 	part = DISKPART(dev);
    528 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    529 	lp = rs->sc_dkdev.dk_label;
    530 
    531 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    532 		return (-1);
    533 
    534 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    535 		size = -1;
    536 	else
    537 		size = lp->d_partitions[part].p_size *
    538 		    (lp->d_secsize / DEV_BSIZE);
    539 
    540 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    541 		return (-1);
    542 
    543 	return (size);
    544 
    545 }
    546 
    547 int
    548 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t  size)
    549 {
    550 	/* Not implemented. */
    551 	return ENXIO;
    552 }
    553 /* ARGSUSED */
    554 int
    555 raidopen(dev_t dev, int flags, int fmt, struct proc *p)
    556 {
    557 	int     unit = raidunit(dev);
    558 	struct raid_softc *rs;
    559 	struct disklabel *lp;
    560 	int     part, pmask;
    561 	int     error = 0;
    562 
    563 	if (unit >= numraid)
    564 		return (ENXIO);
    565 	rs = &raid_softc[unit];
    566 
    567 	if ((error = raidlock(rs)) != 0)
    568 		return (error);
    569 	lp = rs->sc_dkdev.dk_label;
    570 
    571 	part = DISKPART(dev);
    572 	pmask = (1 << part);
    573 
    574 	if ((rs->sc_flags & RAIDF_INITED) &&
    575 	    (rs->sc_dkdev.dk_openmask == 0))
    576 		raidgetdisklabel(dev);
    577 
    578 	/* make sure that this partition exists */
    579 
    580 	if (part != RAW_PART) {
    581 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    582 		    ((part >= lp->d_npartitions) ||
    583 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    584 			error = ENXIO;
    585 			raidunlock(rs);
    586 			return (error);
    587 		}
    588 	}
    589 	/* Prevent this unit from being unconfigured while open. */
    590 	switch (fmt) {
    591 	case S_IFCHR:
    592 		rs->sc_dkdev.dk_copenmask |= pmask;
    593 		break;
    594 
    595 	case S_IFBLK:
    596 		rs->sc_dkdev.dk_bopenmask |= pmask;
    597 		break;
    598 	}
    599 
    600 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    601 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    602 		/* First one... mark things as dirty... Note that we *MUST*
    603 		 have done a configure before this.  I DO NOT WANT TO BE
    604 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    605 		 THAT THEY BELONG TOGETHER!!!!! */
    606 		/* XXX should check to see if we're only open for reading
    607 		   here... If so, we needn't do this, but then need some
    608 		   other way of keeping track of what's happened.. */
    609 
    610 		rf_markalldirty( raidPtrs[unit] );
    611 	}
    612 
    613 
    614 	rs->sc_dkdev.dk_openmask =
    615 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    616 
    617 	raidunlock(rs);
    618 
    619 	return (error);
    620 
    621 
    622 }
    623 /* ARGSUSED */
    624 int
    625 raidclose(dev_t dev, int flags, int fmt, struct proc *p)
    626 {
    627 	int     unit = raidunit(dev);
    628 	struct raid_softc *rs;
    629 	int     error = 0;
    630 	int     part;
    631 
    632 	if (unit >= numraid)
    633 		return (ENXIO);
    634 	rs = &raid_softc[unit];
    635 
    636 	if ((error = raidlock(rs)) != 0)
    637 		return (error);
    638 
    639 	part = DISKPART(dev);
    640 
    641 	/* ...that much closer to allowing unconfiguration... */
    642 	switch (fmt) {
    643 	case S_IFCHR:
    644 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    645 		break;
    646 
    647 	case S_IFBLK:
    648 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    649 		break;
    650 	}
    651 	rs->sc_dkdev.dk_openmask =
    652 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    653 
    654 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    655 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    656 		/* Last one... device is not unconfigured yet.
    657 		   Device shutdown has taken care of setting the
    658 		   clean bits if RAIDF_INITED is not set
    659 		   mark things as clean... */
    660 
    661 		rf_update_component_labels(raidPtrs[unit],
    662 						 RF_FINAL_COMPONENT_UPDATE);
    663 		if (doing_shutdown) {
    664 			/* last one, and we're going down, so
    665 			   lights out for this RAID set too. */
    666 			error = rf_Shutdown(raidPtrs[unit]);
    667 
    668 			/* It's no longer initialized... */
    669 			rs->sc_flags &= ~RAIDF_INITED;
    670 
    671 			/* Detach the disk. */
    672 			disk_detach(&rs->sc_dkdev);
    673 		}
    674 	}
    675 
    676 	raidunlock(rs);
    677 	return (0);
    678 
    679 }
    680 
    681 void
    682 raidstrategy(struct buf *bp)
    683 {
    684 	int s;
    685 
    686 	unsigned int raidID = raidunit(bp->b_dev);
    687 	RF_Raid_t *raidPtr;
    688 	struct raid_softc *rs = &raid_softc[raidID];
    689 	int     wlabel;
    690 
    691 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    692 		bp->b_error = ENXIO;
    693 		bp->b_flags |= B_ERROR;
    694 		bp->b_resid = bp->b_bcount;
    695 		biodone(bp);
    696 		return;
    697 	}
    698 	if (raidID >= numraid || !raidPtrs[raidID]) {
    699 		bp->b_error = ENODEV;
    700 		bp->b_flags |= B_ERROR;
    701 		bp->b_resid = bp->b_bcount;
    702 		biodone(bp);
    703 		return;
    704 	}
    705 	raidPtr = raidPtrs[raidID];
    706 	if (!raidPtr->valid) {
    707 		bp->b_error = ENODEV;
    708 		bp->b_flags |= B_ERROR;
    709 		bp->b_resid = bp->b_bcount;
    710 		biodone(bp);
    711 		return;
    712 	}
    713 	if (bp->b_bcount == 0) {
    714 		db1_printf(("b_bcount is zero..\n"));
    715 		biodone(bp);
    716 		return;
    717 	}
    718 
    719 	/*
    720 	 * Do bounds checking and adjust transfer.  If there's an
    721 	 * error, the bounds check will flag that for us.
    722 	 */
    723 
    724 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    725 	if (DISKPART(bp->b_dev) != RAW_PART)
    726 		if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
    727 			db1_printf(("Bounds check failed!!:%d %d\n",
    728 				(int) bp->b_blkno, (int) wlabel));
    729 			biodone(bp);
    730 			return;
    731 		}
    732 	s = splbio();
    733 
    734 	bp->b_resid = 0;
    735 
    736 	/* stuff it onto our queue */
    737 	BUFQ_PUT(&rs->buf_queue, bp);
    738 
    739 	raidstart(raidPtrs[raidID]);
    740 
    741 	splx(s);
    742 }
    743 /* ARGSUSED */
    744 int
    745 raidread(dev_t dev, struct uio *uio, int flags)
    746 {
    747 	int     unit = raidunit(dev);
    748 	struct raid_softc *rs;
    749 
    750 	if (unit >= numraid)
    751 		return (ENXIO);
    752 	rs = &raid_softc[unit];
    753 
    754 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    755 		return (ENXIO);
    756 
    757 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    758 
    759 }
    760 /* ARGSUSED */
    761 int
    762 raidwrite(dev_t dev, struct uio *uio, int flags)
    763 {
    764 	int     unit = raidunit(dev);
    765 	struct raid_softc *rs;
    766 
    767 	if (unit >= numraid)
    768 		return (ENXIO);
    769 	rs = &raid_softc[unit];
    770 
    771 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    772 		return (ENXIO);
    773 
    774 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    775 
    776 }
    777 
    778 int
    779 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
    780 {
    781 	int     unit = raidunit(dev);
    782 	int     error = 0;
    783 	int     part, pmask;
    784 	struct raid_softc *rs;
    785 	RF_Config_t *k_cfg, *u_cfg;
    786 	RF_Raid_t *raidPtr;
    787 	RF_RaidDisk_t *diskPtr;
    788 	RF_AccTotals_t *totals;
    789 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    790 	u_char *specific_buf;
    791 	int retcode = 0;
    792 	int column;
    793 	int raidid;
    794 	struct rf_recon_req *rrcopy, *rr;
    795 	RF_ComponentLabel_t *clabel;
    796 	RF_ComponentLabel_t ci_label;
    797 	RF_ComponentLabel_t **clabel_ptr;
    798 	RF_SingleComponent_t *sparePtr,*componentPtr;
    799 	RF_SingleComponent_t hot_spare;
    800 	RF_SingleComponent_t component;
    801 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    802 	int i, j, d;
    803 #ifdef __HAVE_OLD_DISKLABEL
    804 	struct disklabel newlabel;
    805 #endif
    806 
    807 	if (unit >= numraid)
    808 		return (ENXIO);
    809 	rs = &raid_softc[unit];
    810 	raidPtr = raidPtrs[unit];
    811 
    812 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    813 		(int) DISKPART(dev), (int) unit, (int) cmd));
    814 
    815 	/* Must be open for writes for these commands... */
    816 	switch (cmd) {
    817 	case DIOCSDINFO:
    818 	case DIOCWDINFO:
    819 #ifdef __HAVE_OLD_DISKLABEL
    820 	case ODIOCWDINFO:
    821 	case ODIOCSDINFO:
    822 #endif
    823 	case DIOCWLABEL:
    824 		if ((flag & FWRITE) == 0)
    825 			return (EBADF);
    826 	}
    827 
    828 	/* Must be initialized for these... */
    829 	switch (cmd) {
    830 	case DIOCGDINFO:
    831 	case DIOCSDINFO:
    832 	case DIOCWDINFO:
    833 #ifdef __HAVE_OLD_DISKLABEL
    834 	case ODIOCGDINFO:
    835 	case ODIOCWDINFO:
    836 	case ODIOCSDINFO:
    837 	case ODIOCGDEFLABEL:
    838 #endif
    839 	case DIOCGPART:
    840 	case DIOCWLABEL:
    841 	case DIOCGDEFLABEL:
    842 	case RAIDFRAME_SHUTDOWN:
    843 	case RAIDFRAME_REWRITEPARITY:
    844 	case RAIDFRAME_GET_INFO:
    845 	case RAIDFRAME_RESET_ACCTOTALS:
    846 	case RAIDFRAME_GET_ACCTOTALS:
    847 	case RAIDFRAME_KEEP_ACCTOTALS:
    848 	case RAIDFRAME_GET_SIZE:
    849 	case RAIDFRAME_FAIL_DISK:
    850 	case RAIDFRAME_COPYBACK:
    851 	case RAIDFRAME_CHECK_RECON_STATUS:
    852 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
    853 	case RAIDFRAME_GET_COMPONENT_LABEL:
    854 	case RAIDFRAME_SET_COMPONENT_LABEL:
    855 	case RAIDFRAME_ADD_HOT_SPARE:
    856 	case RAIDFRAME_REMOVE_HOT_SPARE:
    857 	case RAIDFRAME_INIT_LABELS:
    858 	case RAIDFRAME_REBUILD_IN_PLACE:
    859 	case RAIDFRAME_CHECK_PARITY:
    860 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    861 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
    862 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    863 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
    864 	case RAIDFRAME_SET_AUTOCONFIG:
    865 	case RAIDFRAME_SET_ROOT:
    866 	case RAIDFRAME_DELETE_COMPONENT:
    867 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
    868 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    869 			return (ENXIO);
    870 	}
    871 
    872 	switch (cmd) {
    873 
    874 		/* configure the system */
    875 	case RAIDFRAME_CONFIGURE:
    876 
    877 		if (raidPtr->valid) {
    878 			/* There is a valid RAID set running on this unit! */
    879 			printf("raid%d: Device already configured!\n",unit);
    880 			return(EINVAL);
    881 		}
    882 
    883 		/* copy-in the configuration information */
    884 		/* data points to a pointer to the configuration structure */
    885 
    886 		u_cfg = *((RF_Config_t **) data);
    887 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    888 		if (k_cfg == NULL) {
    889 			return (ENOMEM);
    890 		}
    891 		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
    892 		if (retcode) {
    893 			RF_Free(k_cfg, sizeof(RF_Config_t));
    894 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    895 				retcode));
    896 			return (retcode);
    897 		}
    898 		/* allocate a buffer for the layout-specific data, and copy it
    899 		 * in */
    900 		if (k_cfg->layoutSpecificSize) {
    901 			if (k_cfg->layoutSpecificSize > 10000) {
    902 				/* sanity check */
    903 				RF_Free(k_cfg, sizeof(RF_Config_t));
    904 				return (EINVAL);
    905 			}
    906 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    907 			    (u_char *));
    908 			if (specific_buf == NULL) {
    909 				RF_Free(k_cfg, sizeof(RF_Config_t));
    910 				return (ENOMEM);
    911 			}
    912 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
    913 			    k_cfg->layoutSpecificSize);
    914 			if (retcode) {
    915 				RF_Free(k_cfg, sizeof(RF_Config_t));
    916 				RF_Free(specific_buf,
    917 					k_cfg->layoutSpecificSize);
    918 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    919 					retcode));
    920 				return (retcode);
    921 			}
    922 		} else
    923 			specific_buf = NULL;
    924 		k_cfg->layoutSpecific = specific_buf;
    925 
    926 		/* should do some kind of sanity check on the configuration.
    927 		 * Store the sum of all the bytes in the last byte? */
    928 
    929 		/* configure the system */
    930 
    931 		/*
    932 		 * Clear the entire RAID descriptor, just to make sure
    933 		 *  there is no stale data left in the case of a
    934 		 *  reconfiguration
    935 		 */
    936 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
    937 		raidPtr->raidid = unit;
    938 
    939 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    940 
    941 		if (retcode == 0) {
    942 
    943 			/* allow this many simultaneous IO's to
    944 			   this RAID device */
    945 			raidPtr->openings = RAIDOUTSTANDING;
    946 
    947 			raidinit(raidPtr);
    948 			rf_markalldirty(raidPtr);
    949 		}
    950 		/* free the buffers.  No return code here. */
    951 		if (k_cfg->layoutSpecificSize) {
    952 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    953 		}
    954 		RF_Free(k_cfg, sizeof(RF_Config_t));
    955 
    956 		return (retcode);
    957 
    958 		/* shutdown the system */
    959 	case RAIDFRAME_SHUTDOWN:
    960 
    961 		if ((error = raidlock(rs)) != 0)
    962 			return (error);
    963 
    964 		/*
    965 		 * If somebody has a partition mounted, we shouldn't
    966 		 * shutdown.
    967 		 */
    968 
    969 		part = DISKPART(dev);
    970 		pmask = (1 << part);
    971 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    972 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    973 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    974 			raidunlock(rs);
    975 			return (EBUSY);
    976 		}
    977 
    978 		retcode = rf_Shutdown(raidPtr);
    979 
    980 		/* It's no longer initialized... */
    981 		rs->sc_flags &= ~RAIDF_INITED;
    982 
    983 		/* Detach the disk. */
    984 		disk_detach(&rs->sc_dkdev);
    985 
    986 		raidunlock(rs);
    987 
    988 		return (retcode);
    989 	case RAIDFRAME_GET_COMPONENT_LABEL:
    990 		clabel_ptr = (RF_ComponentLabel_t **) data;
    991 		/* need to read the component label for the disk indicated
    992 		   by row,column in clabel */
    993 
    994 		/* For practice, let's get it directly fromdisk, rather
    995 		   than from the in-core copy */
    996 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    997 			   (RF_ComponentLabel_t *));
    998 		if (clabel == NULL)
    999 			return (ENOMEM);
   1000 
   1001 		memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
   1002 
   1003 		retcode = copyin( *clabel_ptr, clabel,
   1004 				  sizeof(RF_ComponentLabel_t));
   1005 
   1006 		if (retcode) {
   1007 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1008 			return(retcode);
   1009 		}
   1010 
   1011 		clabel->row = 0; /* Don't allow looking at anything else.*/
   1012 
   1013 		column = clabel->column;
   1014 
   1015 		if ((column < 0) || (column >= raidPtr->numCol +
   1016 				     raidPtr->numSpare)) {
   1017 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1018 			return(EINVAL);
   1019 		}
   1020 
   1021 		raidread_component_label(raidPtr->Disks[column].dev,
   1022 				raidPtr->raid_cinfo[column].ci_vp,
   1023 				clabel );
   1024 
   1025 		retcode = copyout(clabel, *clabel_ptr,
   1026 				  sizeof(RF_ComponentLabel_t));
   1027 		RF_Free(clabel, sizeof(RF_ComponentLabel_t));
   1028 		return (retcode);
   1029 
   1030 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1031 		clabel = (RF_ComponentLabel_t *) data;
   1032 
   1033 		/* XXX check the label for valid stuff... */
   1034 		/* Note that some things *should not* get modified --
   1035 		   the user should be re-initing the labels instead of
   1036 		   trying to patch things.
   1037 		   */
   1038 
   1039 		raidid = raidPtr->raidid;
   1040 		printf("raid%d: Got component label:\n", raidid);
   1041 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1042 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1043 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1044 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1045 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1046 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1047 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1048 
   1049 		clabel->row = 0;
   1050 		column = clabel->column;
   1051 
   1052 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1053 			return(EINVAL);
   1054 		}
   1055 
   1056 		/* XXX this isn't allowed to do anything for now :-) */
   1057 
   1058 		/* XXX and before it is, we need to fill in the rest
   1059 		   of the fields!?!?!?! */
   1060 #if 0
   1061 		raidwrite_component_label(
   1062                             raidPtr->Disks[column].dev,
   1063 			    raidPtr->raid_cinfo[column].ci_vp,
   1064 			    clabel );
   1065 #endif
   1066 		return (0);
   1067 
   1068 	case RAIDFRAME_INIT_LABELS:
   1069 		clabel = (RF_ComponentLabel_t *) data;
   1070 		/*
   1071 		   we only want the serial number from
   1072 		   the above.  We get all the rest of the information
   1073 		   from the config that was used to create this RAID
   1074 		   set.
   1075 		   */
   1076 
   1077 		raidPtr->serial_number = clabel->serial_number;
   1078 
   1079 		raid_init_component_label(raidPtr, &ci_label);
   1080 		ci_label.serial_number = clabel->serial_number;
   1081 		ci_label.row = 0; /* we dont' pretend to support more */
   1082 
   1083 		for(column=0;column<raidPtr->numCol;column++) {
   1084 			diskPtr = &raidPtr->Disks[column];
   1085 			if (!RF_DEAD_DISK(diskPtr->status)) {
   1086 				ci_label.partitionSize = diskPtr->partitionSize;
   1087 				ci_label.column = column;
   1088 				raidwrite_component_label(
   1089 							  raidPtr->Disks[column].dev,
   1090 							  raidPtr->raid_cinfo[column].ci_vp,
   1091 							  &ci_label );
   1092 			}
   1093 		}
   1094 
   1095 		return (retcode);
   1096 	case RAIDFRAME_SET_AUTOCONFIG:
   1097 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1098 		printf("raid%d: New autoconfig value is: %d\n",
   1099 		       raidPtr->raidid, d);
   1100 		*(int *) data = d;
   1101 		return (retcode);
   1102 
   1103 	case RAIDFRAME_SET_ROOT:
   1104 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1105 		printf("raid%d: New rootpartition value is: %d\n",
   1106 		       raidPtr->raidid, d);
   1107 		*(int *) data = d;
   1108 		return (retcode);
   1109 
   1110 		/* initialize all parity */
   1111 	case RAIDFRAME_REWRITEPARITY:
   1112 
   1113 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1114 			/* Parity for RAID 0 is trivially correct */
   1115 			raidPtr->parity_good = RF_RAID_CLEAN;
   1116 			return(0);
   1117 		}
   1118 
   1119 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1120 			/* Re-write is already in progress! */
   1121 			return(EINVAL);
   1122 		}
   1123 
   1124 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1125 					   rf_RewriteParityThread,
   1126 					   raidPtr,"raid_parity");
   1127 		return (retcode);
   1128 
   1129 
   1130 	case RAIDFRAME_ADD_HOT_SPARE:
   1131 		sparePtr = (RF_SingleComponent_t *) data;
   1132 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1133 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1134 		return(retcode);
   1135 
   1136 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1137 		return(retcode);
   1138 
   1139 	case RAIDFRAME_DELETE_COMPONENT:
   1140 		componentPtr = (RF_SingleComponent_t *)data;
   1141 		memcpy( &component, componentPtr,
   1142 			sizeof(RF_SingleComponent_t));
   1143 		retcode = rf_delete_component(raidPtr, &component);
   1144 		return(retcode);
   1145 
   1146 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1147 		componentPtr = (RF_SingleComponent_t *)data;
   1148 		memcpy( &component, componentPtr,
   1149 			sizeof(RF_SingleComponent_t));
   1150 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1151 		return(retcode);
   1152 
   1153 	case RAIDFRAME_REBUILD_IN_PLACE:
   1154 
   1155 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1156 			/* Can't do this on a RAID 0!! */
   1157 			return(EINVAL);
   1158 		}
   1159 
   1160 		if (raidPtr->recon_in_progress == 1) {
   1161 			/* a reconstruct is already in progress! */
   1162 			return(EINVAL);
   1163 		}
   1164 
   1165 		componentPtr = (RF_SingleComponent_t *) data;
   1166 		memcpy( &component, componentPtr,
   1167 			sizeof(RF_SingleComponent_t));
   1168 		component.row = 0; /* we don't support any more */
   1169 		column = component.column;
   1170 
   1171 		if ((column < 0) || (column >= raidPtr->numCol)) {
   1172 			return(EINVAL);
   1173 		}
   1174 
   1175 		RF_LOCK_MUTEX(raidPtr->mutex);
   1176 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
   1177 		    (raidPtr->numFailures > 0)) {
   1178 			/* XXX 0 above shouldn't be constant!!! */
   1179 			/* some component other than this has failed.
   1180 			   Let's not make things worse than they already
   1181 			   are... */
   1182 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1183 			       raidPtr->raidid);
   1184 			printf("raid%d:     Col: %d   Too many failures.\n",
   1185 			       raidPtr->raidid, column);
   1186 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1187 			return (EINVAL);
   1188 		}
   1189 		if (raidPtr->Disks[column].status ==
   1190 		    rf_ds_reconstructing) {
   1191 			printf("raid%d: Unable to reconstruct to disk at:\n",
   1192 			       raidPtr->raidid);
   1193 			printf("raid%d:    Col: %d   Reconstruction already occuring!\n", raidPtr->raidid, column);
   1194 
   1195 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1196 			return (EINVAL);
   1197 		}
   1198 		if (raidPtr->Disks[column].status == rf_ds_spared) {
   1199 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1200 			return (EINVAL);
   1201 		}
   1202 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1203 
   1204 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1205 		if (rrcopy == NULL)
   1206 			return(ENOMEM);
   1207 
   1208 		rrcopy->raidPtr = (void *) raidPtr;
   1209 		rrcopy->col = column;
   1210 
   1211 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1212 					   rf_ReconstructInPlaceThread,
   1213 					   rrcopy,"raid_reconip");
   1214 		return(retcode);
   1215 
   1216 	case RAIDFRAME_GET_INFO:
   1217 		if (!raidPtr->valid)
   1218 			return (ENODEV);
   1219 		ucfgp = (RF_DeviceConfig_t **) data;
   1220 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1221 			  (RF_DeviceConfig_t *));
   1222 		if (d_cfg == NULL)
   1223 			return (ENOMEM);
   1224 		memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
   1225 		d_cfg->rows = 1; /* there is only 1 row now */
   1226 		d_cfg->cols = raidPtr->numCol;
   1227 		d_cfg->ndevs = raidPtr->numCol;
   1228 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1229 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1230 			return (ENOMEM);
   1231 		}
   1232 		d_cfg->nspares = raidPtr->numSpare;
   1233 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1234 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1235 			return (ENOMEM);
   1236 		}
   1237 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1238 		d = 0;
   1239 		for (j = 0; j < d_cfg->cols; j++) {
   1240 			d_cfg->devs[d] = raidPtr->Disks[j];
   1241 			d++;
   1242 		}
   1243 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1244 			d_cfg->spares[i] = raidPtr->Disks[j];
   1245 		}
   1246 		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
   1247 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1248 
   1249 		return (retcode);
   1250 
   1251 	case RAIDFRAME_CHECK_PARITY:
   1252 		*(int *) data = raidPtr->parity_good;
   1253 		return (0);
   1254 
   1255 	case RAIDFRAME_RESET_ACCTOTALS:
   1256 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1257 		return (0);
   1258 
   1259 	case RAIDFRAME_GET_ACCTOTALS:
   1260 		totals = (RF_AccTotals_t *) data;
   1261 		*totals = raidPtr->acc_totals;
   1262 		return (0);
   1263 
   1264 	case RAIDFRAME_KEEP_ACCTOTALS:
   1265 		raidPtr->keep_acc_totals = *(int *)data;
   1266 		return (0);
   1267 
   1268 	case RAIDFRAME_GET_SIZE:
   1269 		*(int *) data = raidPtr->totalSectors;
   1270 		return (0);
   1271 
   1272 		/* fail a disk & optionally start reconstruction */
   1273 	case RAIDFRAME_FAIL_DISK:
   1274 
   1275 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1276 			/* Can't do this on a RAID 0!! */
   1277 			return(EINVAL);
   1278 		}
   1279 
   1280 		rr = (struct rf_recon_req *) data;
   1281 		rr->row = 0;
   1282 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
   1283 			return (EINVAL);
   1284 
   1285 
   1286 		RF_LOCK_MUTEX(raidPtr->mutex);
   1287 		if ((raidPtr->Disks[rr->col].status ==
   1288 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
   1289 			/* some other component has failed.  Let's not make
   1290 			   things worse. XXX wrong for RAID6 */
   1291 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1292 			return (EINVAL);
   1293 		}
   1294 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
   1295 			/* Can't fail a spared disk! */
   1296 			RF_UNLOCK_MUTEX(raidPtr->mutex);
   1297 			return (EINVAL);
   1298 		}
   1299 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1300 
   1301 		/* make a copy of the recon request so that we don't rely on
   1302 		 * the user's buffer */
   1303 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1304 		if (rrcopy == NULL)
   1305 			return(ENOMEM);
   1306 		memcpy(rrcopy, rr, sizeof(*rr));
   1307 		rrcopy->raidPtr = (void *) raidPtr;
   1308 
   1309 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1310 					   rf_ReconThread,
   1311 					   rrcopy,"raid_recon");
   1312 		return (0);
   1313 
   1314 		/* invoke a copyback operation after recon on whatever disk
   1315 		 * needs it, if any */
   1316 	case RAIDFRAME_COPYBACK:
   1317 
   1318 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1319 			/* This makes no sense on a RAID 0!! */
   1320 			return(EINVAL);
   1321 		}
   1322 
   1323 		if (raidPtr->copyback_in_progress == 1) {
   1324 			/* Copyback is already in progress! */
   1325 			return(EINVAL);
   1326 		}
   1327 
   1328 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1329 					   rf_CopybackThread,
   1330 					   raidPtr,"raid_copyback");
   1331 		return (retcode);
   1332 
   1333 		/* return the percentage completion of reconstruction */
   1334 	case RAIDFRAME_CHECK_RECON_STATUS:
   1335 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1336 			/* This makes no sense on a RAID 0, so tell the
   1337 			   user it's done. */
   1338 			*(int *) data = 100;
   1339 			return(0);
   1340 		}
   1341 		if (raidPtr->status != rf_rs_reconstructing)
   1342 			*(int *) data = 100;
   1343 		else
   1344 			*(int *) data = raidPtr->reconControl->percentComplete;
   1345 		return (0);
   1346 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1347 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1348 		if (raidPtr->status != rf_rs_reconstructing) {
   1349 			progressInfo.remaining = 0;
   1350 			progressInfo.completed = 100;
   1351 			progressInfo.total = 100;
   1352 		} else {
   1353 			progressInfo.total =
   1354 				raidPtr->reconControl->numRUsTotal;
   1355 			progressInfo.completed =
   1356 				raidPtr->reconControl->numRUsComplete;
   1357 			progressInfo.remaining = progressInfo.total -
   1358 				progressInfo.completed;
   1359 		}
   1360 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1361 				  sizeof(RF_ProgressInfo_t));
   1362 		return (retcode);
   1363 
   1364 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1365 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1366 			/* This makes no sense on a RAID 0, so tell the
   1367 			   user it's done. */
   1368 			*(int *) data = 100;
   1369 			return(0);
   1370 		}
   1371 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1372 			*(int *) data = 100 *
   1373 				raidPtr->parity_rewrite_stripes_done /
   1374 				raidPtr->Layout.numStripe;
   1375 		} else {
   1376 			*(int *) data = 100;
   1377 		}
   1378 		return (0);
   1379 
   1380 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1381 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1382 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1383 			progressInfo.total = raidPtr->Layout.numStripe;
   1384 			progressInfo.completed =
   1385 				raidPtr->parity_rewrite_stripes_done;
   1386 			progressInfo.remaining = progressInfo.total -
   1387 				progressInfo.completed;
   1388 		} else {
   1389 			progressInfo.remaining = 0;
   1390 			progressInfo.completed = 100;
   1391 			progressInfo.total = 100;
   1392 		}
   1393 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1394 				  sizeof(RF_ProgressInfo_t));
   1395 		return (retcode);
   1396 
   1397 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1398 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1399 			/* This makes no sense on a RAID 0 */
   1400 			*(int *) data = 100;
   1401 			return(0);
   1402 		}
   1403 		if (raidPtr->copyback_in_progress == 1) {
   1404 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1405 				raidPtr->Layout.numStripe;
   1406 		} else {
   1407 			*(int *) data = 100;
   1408 		}
   1409 		return (0);
   1410 
   1411 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1412 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1413 		if (raidPtr->copyback_in_progress == 1) {
   1414 			progressInfo.total = raidPtr->Layout.numStripe;
   1415 			progressInfo.completed =
   1416 				raidPtr->copyback_stripes_done;
   1417 			progressInfo.remaining = progressInfo.total -
   1418 				progressInfo.completed;
   1419 		} else {
   1420 			progressInfo.remaining = 0;
   1421 			progressInfo.completed = 100;
   1422 			progressInfo.total = 100;
   1423 		}
   1424 		retcode = copyout(&progressInfo, *progressInfoPtr,
   1425 				  sizeof(RF_ProgressInfo_t));
   1426 		return (retcode);
   1427 
   1428 		/* the sparetable daemon calls this to wait for the kernel to
   1429 		 * need a spare table. this ioctl does not return until a
   1430 		 * spare table is needed. XXX -- calling mpsleep here in the
   1431 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1432 		 * -- I should either compute the spare table in the kernel,
   1433 		 * or have a different -- XXX XXX -- interface (a different
   1434 		 * character device) for delivering the table     -- XXX */
   1435 #if 0
   1436 	case RAIDFRAME_SPARET_WAIT:
   1437 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1438 		while (!rf_sparet_wait_queue)
   1439 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1440 		waitreq = rf_sparet_wait_queue;
   1441 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1442 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1443 
   1444 		/* structure assignment */
   1445 		*((RF_SparetWait_t *) data) = *waitreq;
   1446 
   1447 		RF_Free(waitreq, sizeof(*waitreq));
   1448 		return (0);
   1449 
   1450 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1451 		 * code in it that will cause the dameon to exit */
   1452 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1453 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1454 		waitreq->fcol = -1;
   1455 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1456 		waitreq->next = rf_sparet_wait_queue;
   1457 		rf_sparet_wait_queue = waitreq;
   1458 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1459 		wakeup(&rf_sparet_wait_queue);
   1460 		return (0);
   1461 
   1462 		/* used by the spare table daemon to deliver a spare table
   1463 		 * into the kernel */
   1464 	case RAIDFRAME_SEND_SPARET:
   1465 
   1466 		/* install the spare table */
   1467 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1468 
   1469 		/* respond to the requestor.  the return status of the spare
   1470 		 * table installation is passed in the "fcol" field */
   1471 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1472 		waitreq->fcol = retcode;
   1473 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1474 		waitreq->next = rf_sparet_resp_queue;
   1475 		rf_sparet_resp_queue = waitreq;
   1476 		wakeup(&rf_sparet_resp_queue);
   1477 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1478 
   1479 		return (retcode);
   1480 #endif
   1481 
   1482 	default:
   1483 		break; /* fall through to the os-specific code below */
   1484 
   1485 	}
   1486 
   1487 	if (!raidPtr->valid)
   1488 		return (EINVAL);
   1489 
   1490 	/*
   1491 	 * Add support for "regular" device ioctls here.
   1492 	 */
   1493 
   1494 	switch (cmd) {
   1495 	case DIOCGDINFO:
   1496 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1497 		break;
   1498 #ifdef __HAVE_OLD_DISKLABEL
   1499 	case ODIOCGDINFO:
   1500 		newlabel = *(rs->sc_dkdev.dk_label);
   1501 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1502 			return ENOTTY;
   1503 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1504 		break;
   1505 #endif
   1506 
   1507 	case DIOCGPART:
   1508 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1509 		((struct partinfo *) data)->part =
   1510 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1511 		break;
   1512 
   1513 	case DIOCWDINFO:
   1514 	case DIOCSDINFO:
   1515 #ifdef __HAVE_OLD_DISKLABEL
   1516 	case ODIOCWDINFO:
   1517 	case ODIOCSDINFO:
   1518 #endif
   1519 	{
   1520 		struct disklabel *lp;
   1521 #ifdef __HAVE_OLD_DISKLABEL
   1522 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1523 			memset(&newlabel, 0, sizeof newlabel);
   1524 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1525 			lp = &newlabel;
   1526 		} else
   1527 #endif
   1528 		lp = (struct disklabel *)data;
   1529 
   1530 		if ((error = raidlock(rs)) != 0)
   1531 			return (error);
   1532 
   1533 		rs->sc_flags |= RAIDF_LABELLING;
   1534 
   1535 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1536 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1537 		if (error == 0) {
   1538 			if (cmd == DIOCWDINFO
   1539 #ifdef __HAVE_OLD_DISKLABEL
   1540 			    || cmd == ODIOCWDINFO
   1541 #endif
   1542 			   )
   1543 				error = writedisklabel(RAIDLABELDEV(dev),
   1544 				    raidstrategy, rs->sc_dkdev.dk_label,
   1545 				    rs->sc_dkdev.dk_cpulabel);
   1546 		}
   1547 		rs->sc_flags &= ~RAIDF_LABELLING;
   1548 
   1549 		raidunlock(rs);
   1550 
   1551 		if (error)
   1552 			return (error);
   1553 		break;
   1554 	}
   1555 
   1556 	case DIOCWLABEL:
   1557 		if (*(int *) data != 0)
   1558 			rs->sc_flags |= RAIDF_WLABEL;
   1559 		else
   1560 			rs->sc_flags &= ~RAIDF_WLABEL;
   1561 		break;
   1562 
   1563 	case DIOCGDEFLABEL:
   1564 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1565 		break;
   1566 
   1567 #ifdef __HAVE_OLD_DISKLABEL
   1568 	case ODIOCGDEFLABEL:
   1569 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1570 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1571 			return ENOTTY;
   1572 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1573 		break;
   1574 #endif
   1575 
   1576 	default:
   1577 		retcode = ENOTTY;
   1578 	}
   1579 	return (retcode);
   1580 
   1581 }
   1582 
   1583 
   1584 /* raidinit -- complete the rest of the initialization for the
   1585    RAIDframe device.  */
   1586 
   1587 
   1588 static void
   1589 raidinit(RF_Raid_t *raidPtr)
   1590 {
   1591 	struct raid_softc *rs;
   1592 	int     unit;
   1593 
   1594 	unit = raidPtr->raidid;
   1595 
   1596 	rs = &raid_softc[unit];
   1597 
   1598 	/* XXX should check return code first... */
   1599 	rs->sc_flags |= RAIDF_INITED;
   1600 
   1601 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1602 
   1603 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1604 
   1605 	/* disk_attach actually creates space for the CPU disklabel, among
   1606 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1607 	 * with disklabels. */
   1608 
   1609 	disk_attach(&rs->sc_dkdev);
   1610 
   1611 	/* XXX There may be a weird interaction here between this, and
   1612 	 * protectedSectors, as used in RAIDframe.  */
   1613 
   1614 	rs->sc_size = raidPtr->totalSectors;
   1615 
   1616 }
   1617 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
   1618 /* wake up the daemon & tell it to get us a spare table
   1619  * XXX
   1620  * the entries in the queues should be tagged with the raidPtr
   1621  * so that in the extremely rare case that two recons happen at once,
   1622  * we know for which device were requesting a spare table
   1623  * XXX
   1624  *
   1625  * XXX This code is not currently used. GO
   1626  */
   1627 int
   1628 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
   1629 {
   1630 	int     retcode;
   1631 
   1632 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1633 	req->next = rf_sparet_wait_queue;
   1634 	rf_sparet_wait_queue = req;
   1635 	wakeup(&rf_sparet_wait_queue);
   1636 
   1637 	/* mpsleep unlocks the mutex */
   1638 	while (!rf_sparet_resp_queue) {
   1639 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1640 		    "raidframe getsparetable", 0);
   1641 	}
   1642 	req = rf_sparet_resp_queue;
   1643 	rf_sparet_resp_queue = req->next;
   1644 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1645 
   1646 	retcode = req->fcol;
   1647 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1648 					 * alloc'd */
   1649 	return (retcode);
   1650 }
   1651 #endif
   1652 
   1653 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1654  * bp & passes it down.
   1655  * any calls originating in the kernel must use non-blocking I/O
   1656  * do some extra sanity checking to return "appropriate" error values for
   1657  * certain conditions (to make some standard utilities work)
   1658  *
   1659  * Formerly known as: rf_DoAccessKernel
   1660  */
   1661 void
   1662 raidstart(RF_Raid_t *raidPtr)
   1663 {
   1664 	RF_SectorCount_t num_blocks, pb, sum;
   1665 	RF_RaidAddr_t raid_addr;
   1666 	struct partition *pp;
   1667 	daddr_t blocknum;
   1668 	int     unit;
   1669 	struct raid_softc *rs;
   1670 	int     do_async;
   1671 	struct buf *bp;
   1672 
   1673 	unit = raidPtr->raidid;
   1674 	rs = &raid_softc[unit];
   1675 
   1676 	/* quick check to see if anything has died recently */
   1677 	RF_LOCK_MUTEX(raidPtr->mutex);
   1678 	if (raidPtr->numNewFailures > 0) {
   1679 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1680 		rf_update_component_labels(raidPtr,
   1681 					   RF_NORMAL_COMPONENT_UPDATE);
   1682 		RF_LOCK_MUTEX(raidPtr->mutex);
   1683 		raidPtr->numNewFailures--;
   1684 	}
   1685 
   1686 	/* Check to see if we're at the limit... */
   1687 	while (raidPtr->openings > 0) {
   1688 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1689 
   1690 		/* get the next item, if any, from the queue */
   1691 		if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
   1692 			/* nothing more to do */
   1693 			return;
   1694 		}
   1695 
   1696 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1697 		 * partition.. Need to make it absolute to the underlying
   1698 		 * device.. */
   1699 
   1700 		blocknum = bp->b_blkno;
   1701 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1702 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1703 			blocknum += pp->p_offset;
   1704 		}
   1705 
   1706 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1707 			    (int) blocknum));
   1708 
   1709 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1710 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1711 
   1712 		/* *THIS* is where we adjust what block we're going to...
   1713 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1714 		raid_addr = blocknum;
   1715 
   1716 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1717 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1718 		sum = raid_addr + num_blocks + pb;
   1719 		if (1 || rf_debugKernelAccess) {
   1720 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1721 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1722 				    (int) pb, (int) bp->b_resid));
   1723 		}
   1724 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1725 		    || (sum < num_blocks) || (sum < pb)) {
   1726 			bp->b_error = ENOSPC;
   1727 			bp->b_flags |= B_ERROR;
   1728 			bp->b_resid = bp->b_bcount;
   1729 			biodone(bp);
   1730 			RF_LOCK_MUTEX(raidPtr->mutex);
   1731 			continue;
   1732 		}
   1733 		/*
   1734 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1735 		 */
   1736 
   1737 		if (bp->b_bcount & raidPtr->sectorMask) {
   1738 			bp->b_error = EINVAL;
   1739 			bp->b_flags |= B_ERROR;
   1740 			bp->b_resid = bp->b_bcount;
   1741 			biodone(bp);
   1742 			RF_LOCK_MUTEX(raidPtr->mutex);
   1743 			continue;
   1744 
   1745 		}
   1746 		db1_printf(("Calling DoAccess..\n"));
   1747 
   1748 
   1749 		RF_LOCK_MUTEX(raidPtr->mutex);
   1750 		raidPtr->openings--;
   1751 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1752 
   1753 		/*
   1754 		 * Everything is async.
   1755 		 */
   1756 		do_async = 1;
   1757 
   1758 		disk_busy(&rs->sc_dkdev);
   1759 
   1760 		/* XXX we're still at splbio() here... do we *really*
   1761 		   need to be? */
   1762 
   1763 		/* don't ever condition on bp->b_flags & B_WRITE.
   1764 		 * always condition on B_READ instead */
   1765 
   1766 		bp->b_error = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1767 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1768 				      do_async, raid_addr, num_blocks,
   1769 				      bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1770 
   1771 		if (bp->b_error) {
   1772 			bp->b_flags |= B_ERROR;
   1773 		}
   1774 
   1775 		RF_LOCK_MUTEX(raidPtr->mutex);
   1776 	}
   1777 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1778 }
   1779 
   1780 
   1781 
   1782 
   1783 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1784 
   1785 int
   1786 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
   1787 {
   1788 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1789 	struct buf *bp;
   1790 	struct raidbuf *raidbp = NULL;
   1791 
   1792 	req->queue = queue;
   1793 
   1794 #if DIAGNOSTIC
   1795 	if (queue->raidPtr->raidid >= numraid) {
   1796 		printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
   1797 		    numraid);
   1798 		panic("Invalid Unit number in rf_DispatchKernelIO");
   1799 	}
   1800 #endif
   1801 
   1802 	bp = req->bp;
   1803 #if 1
   1804 	/* XXX when there is a physical disk failure, someone is passing us a
   1805 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1806 	 * without taking a performance hit... (not sure where the real bug
   1807 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1808 
   1809 	if (bp->b_flags & B_ERROR) {
   1810 		bp->b_flags &= ~B_ERROR;
   1811 	}
   1812 	if (bp->b_error != 0) {
   1813 		bp->b_error = 0;
   1814 	}
   1815 #endif
   1816 	raidbp = pool_get(&raidframe_cbufpool, PR_NOWAIT);
   1817 	if (raidbp == NULL) {
   1818 		bp->b_flags |= B_ERROR;
   1819 		bp->b_error = ENOMEM;
   1820 		return (ENOMEM);
   1821 	}
   1822 	BUF_INIT(&raidbp->rf_buf);
   1823 
   1824 	/*
   1825 	 * context for raidiodone
   1826 	 */
   1827 	raidbp->rf_obp = bp;
   1828 	raidbp->req = req;
   1829 
   1830 	switch (req->type) {
   1831 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1832 		/* XXX need to do something extra here.. */
   1833 		/* I'm leaving this in, as I've never actually seen it used,
   1834 		 * and I'd like folks to report it... GO */
   1835 		printf(("WAKEUP CALLED\n"));
   1836 		queue->numOutstanding++;
   1837 
   1838 		/* XXX need to glue the original buffer into this??  */
   1839 
   1840 		KernelWakeupFunc(&raidbp->rf_buf);
   1841 		break;
   1842 
   1843 	case RF_IO_TYPE_READ:
   1844 	case RF_IO_TYPE_WRITE:
   1845 
   1846 		if (req->tracerec) {
   1847 			RF_ETIMER_START(req->tracerec->timer);
   1848 		}
   1849 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1850 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1851 		    req->sectorOffset, req->numSector,
   1852 		    req->buf, KernelWakeupFunc, (void *) req,
   1853 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1854 
   1855 		if (rf_debugKernelAccess) {
   1856 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1857 				(long) bp->b_blkno));
   1858 		}
   1859 		queue->numOutstanding++;
   1860 		queue->last_deq_sector = req->sectorOffset;
   1861 		/* acc wouldn't have been let in if there were any pending
   1862 		 * reqs at any other priority */
   1863 		queue->curPriority = req->priority;
   1864 
   1865 		db1_printf(("Going for %c to unit %d col %d\n",
   1866 			    req->type, queue->raidPtr->raidid,
   1867 			    queue->col));
   1868 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1869 			(int) req->sectorOffset, (int) req->numSector,
   1870 			(int) (req->numSector <<
   1871 			    queue->raidPtr->logBytesPerSector),
   1872 			(int) queue->raidPtr->logBytesPerSector));
   1873 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1874 			raidbp->rf_buf.b_vp->v_numoutput++;
   1875 		}
   1876 		VOP_STRATEGY(&raidbp->rf_buf);
   1877 
   1878 		break;
   1879 
   1880 	default:
   1881 		panic("bad req->type in rf_DispatchKernelIO");
   1882 	}
   1883 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1884 
   1885 	return (0);
   1886 }
   1887 /* this is the callback function associated with a I/O invoked from
   1888    kernel code.
   1889  */
   1890 static void
   1891 KernelWakeupFunc(struct buf *vbp)
   1892 {
   1893 	RF_DiskQueueData_t *req = NULL;
   1894 	RF_DiskQueue_t *queue;
   1895 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1896 	struct buf *bp;
   1897 	int s;
   1898 
   1899 	s = splbio();
   1900 	db1_printf(("recovering the request queue:\n"));
   1901 	req = raidbp->req;
   1902 
   1903 	bp = raidbp->rf_obp;
   1904 
   1905 	queue = (RF_DiskQueue_t *) req->queue;
   1906 
   1907 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1908 		bp->b_flags |= B_ERROR;
   1909 		bp->b_error = raidbp->rf_buf.b_error ?
   1910 		    raidbp->rf_buf.b_error : EIO;
   1911 	}
   1912 
   1913 	/* XXX methinks this could be wrong... */
   1914 #if 1
   1915 	bp->b_resid = raidbp->rf_buf.b_resid;
   1916 #endif
   1917 
   1918 	if (req->tracerec) {
   1919 		RF_ETIMER_STOP(req->tracerec->timer);
   1920 		RF_ETIMER_EVAL(req->tracerec->timer);
   1921 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1922 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1923 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1924 		req->tracerec->num_phys_ios++;
   1925 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1926 	}
   1927 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1928 
   1929 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1930 	 * ballistic, and mark the component as hosed... */
   1931 
   1932 	if (bp->b_flags & B_ERROR) {
   1933 		/* Mark the disk as dead */
   1934 		/* but only mark it once... */
   1935 		if (queue->raidPtr->Disks[queue->col].status ==
   1936 		    rf_ds_optimal) {
   1937 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1938 			       queue->raidPtr->raidid,
   1939 			       queue->raidPtr->Disks[queue->col].devname);
   1940 			queue->raidPtr->Disks[queue->col].status =
   1941 			    rf_ds_failed;
   1942 			queue->raidPtr->status = rf_rs_degraded;
   1943 			queue->raidPtr->numFailures++;
   1944 			queue->raidPtr->numNewFailures++;
   1945 		} else {	/* Disk is already dead... */
   1946 			/* printf("Disk already marked as dead!\n"); */
   1947 		}
   1948 
   1949 	}
   1950 
   1951 	pool_put(&raidframe_cbufpool, raidbp);
   1952 
   1953 	/* Fill in the error value */
   1954 
   1955 	req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
   1956 
   1957 	simple_lock(&queue->raidPtr->iodone_lock);
   1958 
   1959 	/* Drop this one on the "finished" queue... */
   1960 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
   1961 
   1962 	/* Let the raidio thread know there is work to be done. */
   1963 	wakeup(&(queue->raidPtr->iodone));
   1964 
   1965 	simple_unlock(&queue->raidPtr->iodone_lock);
   1966 
   1967 	splx(s);
   1968 }
   1969 
   1970 
   1971 
   1972 /*
   1973  * initialize a buf structure for doing an I/O in the kernel.
   1974  */
   1975 static void
   1976 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
   1977        RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t buf,
   1978        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
   1979        struct proc *b_proc)
   1980 {
   1981 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1982 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1983 	bp->b_bcount = numSect << logBytesPerSector;
   1984 	bp->b_bufsize = bp->b_bcount;
   1985 	bp->b_error = 0;
   1986 	bp->b_dev = dev;
   1987 	bp->b_data = buf;
   1988 	bp->b_blkno = startSect;
   1989 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1990 	if (bp->b_bcount == 0) {
   1991 		panic("bp->b_bcount is zero in InitBP!!");
   1992 	}
   1993 	bp->b_proc = b_proc;
   1994 	bp->b_iodone = cbFunc;
   1995 	bp->b_vp = b_vp;
   1996 
   1997 }
   1998 
   1999 static void
   2000 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
   2001 		    struct disklabel *lp)
   2002 {
   2003 	memset(lp, 0, sizeof(*lp));
   2004 
   2005 	/* fabricate a label... */
   2006 	lp->d_secperunit = raidPtr->totalSectors;
   2007 	lp->d_secsize = raidPtr->bytesPerSector;
   2008 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   2009 	lp->d_ntracks = 4 * raidPtr->numCol;
   2010 	lp->d_ncylinders = raidPtr->totalSectors /
   2011 		(lp->d_nsectors * lp->d_ntracks);
   2012 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   2013 
   2014 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2015 	lp->d_type = DTYPE_RAID;
   2016 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2017 	lp->d_rpm = 3600;
   2018 	lp->d_interleave = 1;
   2019 	lp->d_flags = 0;
   2020 
   2021 	lp->d_partitions[RAW_PART].p_offset = 0;
   2022 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2023 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2024 	lp->d_npartitions = RAW_PART + 1;
   2025 
   2026 	lp->d_magic = DISKMAGIC;
   2027 	lp->d_magic2 = DISKMAGIC;
   2028 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2029 
   2030 }
   2031 /*
   2032  * Read the disklabel from the raid device.  If one is not present, fake one
   2033  * up.
   2034  */
   2035 static void
   2036 raidgetdisklabel(dev_t dev)
   2037 {
   2038 	int     unit = raidunit(dev);
   2039 	struct raid_softc *rs = &raid_softc[unit];
   2040 	const char   *errstring;
   2041 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2042 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2043 	RF_Raid_t *raidPtr;
   2044 
   2045 	db1_printf(("Getting the disklabel...\n"));
   2046 
   2047 	memset(clp, 0, sizeof(*clp));
   2048 
   2049 	raidPtr = raidPtrs[unit];
   2050 
   2051 	raidgetdefaultlabel(raidPtr, rs, lp);
   2052 
   2053 	/*
   2054 	 * Call the generic disklabel extraction routine.
   2055 	 */
   2056 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2057 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2058 	if (errstring)
   2059 		raidmakedisklabel(rs);
   2060 	else {
   2061 		int     i;
   2062 		struct partition *pp;
   2063 
   2064 		/*
   2065 		 * Sanity check whether the found disklabel is valid.
   2066 		 *
   2067 		 * This is necessary since total size of the raid device
   2068 		 * may vary when an interleave is changed even though exactly
   2069 		 * same componets are used, and old disklabel may used
   2070 		 * if that is found.
   2071 		 */
   2072 		if (lp->d_secperunit != rs->sc_size)
   2073 			printf("raid%d: WARNING: %s: "
   2074 			    "total sector size in disklabel (%d) != "
   2075 			    "the size of raid (%ld)\n", unit, rs->sc_xname,
   2076 			    lp->d_secperunit, (long) rs->sc_size);
   2077 		for (i = 0; i < lp->d_npartitions; i++) {
   2078 			pp = &lp->d_partitions[i];
   2079 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2080 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2081 				       "exceeds the size of raid (%ld)\n",
   2082 				       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2083 		}
   2084 	}
   2085 
   2086 }
   2087 /*
   2088  * Take care of things one might want to take care of in the event
   2089  * that a disklabel isn't present.
   2090  */
   2091 static void
   2092 raidmakedisklabel(struct raid_softc *rs)
   2093 {
   2094 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2095 	db1_printf(("Making a label..\n"));
   2096 
   2097 	/*
   2098 	 * For historical reasons, if there's no disklabel present
   2099 	 * the raw partition must be marked FS_BSDFFS.
   2100 	 */
   2101 
   2102 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2103 
   2104 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2105 
   2106 	lp->d_checksum = dkcksum(lp);
   2107 }
   2108 /*
   2109  * Lookup the provided name in the filesystem.  If the file exists,
   2110  * is a valid block device, and isn't being used by anyone else,
   2111  * set *vpp to the file's vnode.
   2112  * You'll find the original of this in ccd.c
   2113  */
   2114 int
   2115 raidlookup(char *path, struct proc *p, struct vnode **vpp)
   2116 {
   2117 	struct nameidata nd;
   2118 	struct vnode *vp;
   2119 	struct vattr va;
   2120 	int     error;
   2121 
   2122 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2123 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2124 		return (error);
   2125 	}
   2126 	vp = nd.ni_vp;
   2127 	if (vp->v_usecount > 1) {
   2128 		VOP_UNLOCK(vp, 0);
   2129 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2130 		return (EBUSY);
   2131 	}
   2132 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2133 		VOP_UNLOCK(vp, 0);
   2134 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2135 		return (error);
   2136 	}
   2137 	/* XXX: eventually we should handle VREG, too. */
   2138 	if (va.va_type != VBLK) {
   2139 		VOP_UNLOCK(vp, 0);
   2140 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2141 		return (ENOTBLK);
   2142 	}
   2143 	VOP_UNLOCK(vp, 0);
   2144 	*vpp = vp;
   2145 	return (0);
   2146 }
   2147 /*
   2148  * Wait interruptibly for an exclusive lock.
   2149  *
   2150  * XXX
   2151  * Several drivers do this; it should be abstracted and made MP-safe.
   2152  * (Hmm... where have we seen this warning before :->  GO )
   2153  */
   2154 static int
   2155 raidlock(struct raid_softc *rs)
   2156 {
   2157 	int     error;
   2158 
   2159 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2160 		rs->sc_flags |= RAIDF_WANTED;
   2161 		if ((error =
   2162 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2163 			return (error);
   2164 	}
   2165 	rs->sc_flags |= RAIDF_LOCKED;
   2166 	return (0);
   2167 }
   2168 /*
   2169  * Unlock and wake up any waiters.
   2170  */
   2171 static void
   2172 raidunlock(struct raid_softc *rs)
   2173 {
   2174 
   2175 	rs->sc_flags &= ~RAIDF_LOCKED;
   2176 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2177 		rs->sc_flags &= ~RAIDF_WANTED;
   2178 		wakeup(rs);
   2179 	}
   2180 }
   2181 
   2182 
   2183 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2184 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2185 
   2186 int
   2187 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2188 {
   2189 	RF_ComponentLabel_t clabel;
   2190 	raidread_component_label(dev, b_vp, &clabel);
   2191 	clabel.mod_counter = mod_counter;
   2192 	clabel.clean = RF_RAID_CLEAN;
   2193 	raidwrite_component_label(dev, b_vp, &clabel);
   2194 	return(0);
   2195 }
   2196 
   2197 
   2198 int
   2199 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2200 {
   2201 	RF_ComponentLabel_t clabel;
   2202 	raidread_component_label(dev, b_vp, &clabel);
   2203 	clabel.mod_counter = mod_counter;
   2204 	clabel.clean = RF_RAID_DIRTY;
   2205 	raidwrite_component_label(dev, b_vp, &clabel);
   2206 	return(0);
   2207 }
   2208 
   2209 /* ARGSUSED */
   2210 int
   2211 raidread_component_label(dev_t dev, struct vnode *b_vp,
   2212 			 RF_ComponentLabel_t *clabel)
   2213 {
   2214 	struct buf *bp;
   2215 	const struct bdevsw *bdev;
   2216 	int error;
   2217 
   2218 	/* XXX should probably ensure that we don't try to do this if
   2219 	   someone has changed rf_protected_sectors. */
   2220 
   2221 	if (b_vp == NULL) {
   2222 		/* For whatever reason, this component is not valid.
   2223 		   Don't try to read a component label from it. */
   2224 		return(EINVAL);
   2225 	}
   2226 
   2227 	/* get a block of the appropriate size... */
   2228 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2229 	bp->b_dev = dev;
   2230 
   2231 	/* get our ducks in a row for the read */
   2232 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2233 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2234 	bp->b_flags |= B_READ;
   2235  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2236 
   2237 	bdev = bdevsw_lookup(bp->b_dev);
   2238 	if (bdev == NULL)
   2239 		return (ENXIO);
   2240 	(*bdev->d_strategy)(bp);
   2241 
   2242 	error = biowait(bp);
   2243 
   2244 	if (!error) {
   2245 		memcpy(clabel, bp->b_data,
   2246 		       sizeof(RF_ComponentLabel_t));
   2247         }
   2248 
   2249 	brelse(bp);
   2250 	return(error);
   2251 }
   2252 /* ARGSUSED */
   2253 int
   2254 raidwrite_component_label(dev_t dev, struct vnode *b_vp,
   2255 			  RF_ComponentLabel_t *clabel)
   2256 {
   2257 	struct buf *bp;
   2258 	const struct bdevsw *bdev;
   2259 	int error;
   2260 
   2261 	/* get a block of the appropriate size... */
   2262 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2263 	bp->b_dev = dev;
   2264 
   2265 	/* get our ducks in a row for the write */
   2266 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2267 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2268 	bp->b_flags |= B_WRITE;
   2269  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2270 
   2271 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2272 
   2273 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2274 
   2275 	bdev = bdevsw_lookup(bp->b_dev);
   2276 	if (bdev == NULL)
   2277 		return (ENXIO);
   2278 	(*bdev->d_strategy)(bp);
   2279 	error = biowait(bp);
   2280 	brelse(bp);
   2281 	if (error) {
   2282 #if 1
   2283 		printf("Failed to write RAID component info!\n");
   2284 #endif
   2285 	}
   2286 
   2287 	return(error);
   2288 }
   2289 
   2290 void
   2291 rf_markalldirty(RF_Raid_t *raidPtr)
   2292 {
   2293 	RF_ComponentLabel_t clabel;
   2294 	int sparecol;
   2295 	int c;
   2296 	int j;
   2297 	int scol = -1;
   2298 
   2299 	raidPtr->mod_counter++;
   2300 	for (c = 0; c < raidPtr->numCol; c++) {
   2301 		/* we don't want to touch (at all) a disk that has
   2302 		   failed */
   2303 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
   2304 			raidread_component_label(
   2305 						 raidPtr->Disks[c].dev,
   2306 						 raidPtr->raid_cinfo[c].ci_vp,
   2307 						 &clabel);
   2308 			if (clabel.status == rf_ds_spared) {
   2309 				/* XXX do something special...
   2310 				   but whatever you do, don't
   2311 				   try to access it!! */
   2312 			} else {
   2313 				raidmarkdirty(
   2314 					      raidPtr->Disks[c].dev,
   2315 					      raidPtr->raid_cinfo[c].ci_vp,
   2316 					      raidPtr->mod_counter);
   2317 			}
   2318 		}
   2319 	}
   2320 
   2321 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2322 		sparecol = raidPtr->numCol + c;
   2323 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2324 			/*
   2325 
   2326 			   we claim this disk is "optimal" if it's
   2327 			   rf_ds_used_spare, as that means it should be
   2328 			   directly substitutable for the disk it replaced.
   2329 			   We note that too...
   2330 
   2331 			 */
   2332 
   2333 			for(j=0;j<raidPtr->numCol;j++) {
   2334 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2335 					scol = j;
   2336 					break;
   2337 				}
   2338 			}
   2339 
   2340 			raidread_component_label(
   2341 				 raidPtr->Disks[sparecol].dev,
   2342 				 raidPtr->raid_cinfo[sparecol].ci_vp,
   2343 				 &clabel);
   2344 			/* make sure status is noted */
   2345 
   2346 			raid_init_component_label(raidPtr, &clabel);
   2347 
   2348 			clabel.row = 0;
   2349 			clabel.column = scol;
   2350 			/* Note: we *don't* change status from rf_ds_used_spare
   2351 			   to rf_ds_optimal */
   2352 			/* clabel.status = rf_ds_optimal; */
   2353 
   2354 			raidmarkdirty(raidPtr->Disks[sparecol].dev,
   2355 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2356 				      raidPtr->mod_counter);
   2357 		}
   2358 	}
   2359 }
   2360 
   2361 
   2362 void
   2363 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
   2364 {
   2365 	RF_ComponentLabel_t clabel;
   2366 	int sparecol;
   2367 	int c;
   2368 	int j;
   2369 	int scol;
   2370 
   2371 	scol = -1;
   2372 
   2373 	/* XXX should do extra checks to make sure things really are clean,
   2374 	   rather than blindly setting the clean bit... */
   2375 
   2376 	raidPtr->mod_counter++;
   2377 
   2378 	for (c = 0; c < raidPtr->numCol; c++) {
   2379 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
   2380 			raidread_component_label(
   2381 						 raidPtr->Disks[c].dev,
   2382 						 raidPtr->raid_cinfo[c].ci_vp,
   2383 						 &clabel);
   2384 				/* make sure status is noted */
   2385 			clabel.status = rf_ds_optimal;
   2386 				/* bump the counter */
   2387 			clabel.mod_counter = raidPtr->mod_counter;
   2388 
   2389 			raidwrite_component_label(
   2390 						  raidPtr->Disks[c].dev,
   2391 						  raidPtr->raid_cinfo[c].ci_vp,
   2392 						  &clabel);
   2393 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2394 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2395 					raidmarkclean(
   2396 						      raidPtr->Disks[c].dev,
   2397 						      raidPtr->raid_cinfo[c].ci_vp,
   2398 						      raidPtr->mod_counter);
   2399 				}
   2400 			}
   2401 		}
   2402 		/* else we don't touch it.. */
   2403 	}
   2404 
   2405 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2406 		sparecol = raidPtr->numCol + c;
   2407 		/* Need to ensure that the reconstruct actually completed! */
   2408 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   2409 			/*
   2410 
   2411 			   we claim this disk is "optimal" if it's
   2412 			   rf_ds_used_spare, as that means it should be
   2413 			   directly substitutable for the disk it replaced.
   2414 			   We note that too...
   2415 
   2416 			 */
   2417 
   2418 			for(j=0;j<raidPtr->numCol;j++) {
   2419 				if (raidPtr->Disks[j].spareCol == sparecol) {
   2420 					scol = j;
   2421 					break;
   2422 				}
   2423 			}
   2424 
   2425 			/* XXX shouldn't *really* need this... */
   2426 			raidread_component_label(
   2427 				      raidPtr->Disks[sparecol].dev,
   2428 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2429 				      &clabel);
   2430 			/* make sure status is noted */
   2431 
   2432 			raid_init_component_label(raidPtr, &clabel);
   2433 
   2434 			clabel.mod_counter = raidPtr->mod_counter;
   2435 			clabel.column = scol;
   2436 			clabel.status = rf_ds_optimal;
   2437 
   2438 			raidwrite_component_label(
   2439 				      raidPtr->Disks[sparecol].dev,
   2440 				      raidPtr->raid_cinfo[sparecol].ci_vp,
   2441 				      &clabel);
   2442 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2443 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2444 					raidmarkclean( raidPtr->Disks[sparecol].dev,
   2445 						       raidPtr->raid_cinfo[sparecol].ci_vp,
   2446 						       raidPtr->mod_counter);
   2447 				}
   2448 			}
   2449 		}
   2450 	}
   2451 }
   2452 
   2453 void
   2454 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
   2455 {
   2456 	struct proc *p;
   2457 
   2458 	p = raidPtr->engine_thread;
   2459 
   2460 	if (vp != NULL) {
   2461 		if (auto_configured == 1) {
   2462 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2463 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2464 			vput(vp);
   2465 
   2466 		} else {
   2467 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2468 		}
   2469 	}
   2470 }
   2471 
   2472 
   2473 void
   2474 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
   2475 {
   2476 	int r,c;
   2477 	struct vnode *vp;
   2478 	int acd;
   2479 
   2480 
   2481 	/* We take this opportunity to close the vnodes like we should.. */
   2482 
   2483 	for (c = 0; c < raidPtr->numCol; c++) {
   2484 		vp = raidPtr->raid_cinfo[c].ci_vp;
   2485 		acd = raidPtr->Disks[c].auto_configured;
   2486 		rf_close_component(raidPtr, vp, acd);
   2487 		raidPtr->raid_cinfo[c].ci_vp = NULL;
   2488 		raidPtr->Disks[c].auto_configured = 0;
   2489 	}
   2490 
   2491 	for (r = 0; r < raidPtr->numSpare; r++) {
   2492 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
   2493 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
   2494 		rf_close_component(raidPtr, vp, acd);
   2495 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
   2496 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
   2497 	}
   2498 }
   2499 
   2500 
   2501 void
   2502 rf_ReconThread(struct rf_recon_req *req)
   2503 {
   2504 	int     s;
   2505 	RF_Raid_t *raidPtr;
   2506 
   2507 	s = splbio();
   2508 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2509 	raidPtr->recon_in_progress = 1;
   2510 
   2511 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
   2512 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2513 
   2514 	/* XXX get rid of this! we don't need it at all.. */
   2515 	RF_Free(req, sizeof(*req));
   2516 
   2517 	raidPtr->recon_in_progress = 0;
   2518 	splx(s);
   2519 
   2520 	/* That's all... */
   2521 	kthread_exit(0);        /* does not return */
   2522 }
   2523 
   2524 void
   2525 rf_RewriteParityThread(RF_Raid_t *raidPtr)
   2526 {
   2527 	int retcode;
   2528 	int s;
   2529 
   2530 	raidPtr->parity_rewrite_in_progress = 1;
   2531 	s = splbio();
   2532 	retcode = rf_RewriteParity(raidPtr);
   2533 	splx(s);
   2534 	if (retcode) {
   2535 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2536 	} else {
   2537 		/* set the clean bit!  If we shutdown correctly,
   2538 		   the clean bit on each component label will get
   2539 		   set */
   2540 		raidPtr->parity_good = RF_RAID_CLEAN;
   2541 	}
   2542 	raidPtr->parity_rewrite_in_progress = 0;
   2543 
   2544 	/* Anyone waiting for us to stop?  If so, inform them... */
   2545 	if (raidPtr->waitShutdown) {
   2546 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2547 	}
   2548 
   2549 	/* That's all... */
   2550 	kthread_exit(0);        /* does not return */
   2551 }
   2552 
   2553 
   2554 void
   2555 rf_CopybackThread(RF_Raid_t *raidPtr)
   2556 {
   2557 	int s;
   2558 
   2559 	raidPtr->copyback_in_progress = 1;
   2560 	s = splbio();
   2561 	rf_CopybackReconstructedData(raidPtr);
   2562 	splx(s);
   2563 	raidPtr->copyback_in_progress = 0;
   2564 
   2565 	/* That's all... */
   2566 	kthread_exit(0);        /* does not return */
   2567 }
   2568 
   2569 
   2570 void
   2571 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
   2572 {
   2573 	int s;
   2574 	RF_Raid_t *raidPtr;
   2575 
   2576 	s = splbio();
   2577 	raidPtr = req->raidPtr;
   2578 	raidPtr->recon_in_progress = 1;
   2579 	rf_ReconstructInPlace(raidPtr, req->col);
   2580 	RF_Free(req, sizeof(*req));
   2581 	raidPtr->recon_in_progress = 0;
   2582 	splx(s);
   2583 
   2584 	/* That's all... */
   2585 	kthread_exit(0);        /* does not return */
   2586 }
   2587 
   2588 RF_AutoConfig_t *
   2589 rf_find_raid_components()
   2590 {
   2591 	struct vnode *vp;
   2592 	struct disklabel label;
   2593 	struct device *dv;
   2594 	dev_t dev;
   2595 	int bmajor;
   2596 	int error;
   2597 	int i;
   2598 	int good_one;
   2599 	RF_ComponentLabel_t *clabel;
   2600 	RF_AutoConfig_t *ac_list;
   2601 	RF_AutoConfig_t *ac;
   2602 
   2603 
   2604 	/* initialize the AutoConfig list */
   2605 	ac_list = NULL;
   2606 
   2607 	/* we begin by trolling through *all* the devices on the system */
   2608 
   2609 	for (dv = alldevs.tqh_first; dv != NULL;
   2610 	     dv = dv->dv_list.tqe_next) {
   2611 
   2612 		/* we are only interested in disks... */
   2613 		if (dv->dv_class != DV_DISK)
   2614 			continue;
   2615 
   2616 		/* we don't care about floppies... */
   2617 		if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
   2618 			continue;
   2619 		}
   2620 
   2621 		/* we don't care about CD's... */
   2622 		if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
   2623 			continue;
   2624 		}
   2625 
   2626 		/* hdfd is the Atari/Hades floppy driver */
   2627 		if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
   2628 			continue;
   2629 		}
   2630 		/* fdisa is the Atari/Milan floppy driver */
   2631 		if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
   2632 			continue;
   2633 		}
   2634 
   2635 		/* need to find the device_name_to_block_device_major stuff */
   2636 		bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
   2637 
   2638 		/* get a vnode for the raw partition of this disk */
   2639 
   2640 		dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
   2641 		if (bdevvp(dev, &vp))
   2642 			panic("RAID can't alloc vnode");
   2643 
   2644 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2645 
   2646 		if (error) {
   2647 			/* "Who cares."  Continue looking
   2648 			   for something that exists*/
   2649 			vput(vp);
   2650 			continue;
   2651 		}
   2652 
   2653 		/* Ok, the disk exists.  Go get the disklabel. */
   2654 		error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
   2655 		if (error) {
   2656 			/*
   2657 			 * XXX can't happen - open() would
   2658 			 * have errored out (or faked up one)
   2659 			 */
   2660 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2661 			       dv->dv_xname, 'a' + RAW_PART, error);
   2662 		}
   2663 
   2664 		/* don't need this any more.  We'll allocate it again
   2665 		   a little later if we really do... */
   2666 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2667 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2668 		vput(vp);
   2669 
   2670 		for (i=0; i < label.d_npartitions; i++) {
   2671 			/* We only support partitions marked as RAID */
   2672 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2673 				continue;
   2674 
   2675 			dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
   2676 			if (bdevvp(dev, &vp))
   2677 				panic("RAID can't alloc vnode");
   2678 
   2679 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2680 			if (error) {
   2681 				/* Whatever... */
   2682 				vput(vp);
   2683 				continue;
   2684 			}
   2685 
   2686 			good_one = 0;
   2687 
   2688 			clabel = (RF_ComponentLabel_t *)
   2689 				malloc(sizeof(RF_ComponentLabel_t),
   2690 				       M_RAIDFRAME, M_NOWAIT);
   2691 			if (clabel == NULL) {
   2692 				/* XXX CLEANUP HERE */
   2693 				printf("RAID auto config: out of memory!\n");
   2694 				return(NULL); /* XXX probably should panic? */
   2695 			}
   2696 
   2697 			if (!raidread_component_label(dev, vp, clabel)) {
   2698 				/* Got the label.  Does it look reasonable? */
   2699 				if (rf_reasonable_label(clabel) &&
   2700 				    (clabel->partitionSize <=
   2701 				     label.d_partitions[i].p_size)) {
   2702 #if DEBUG
   2703 					printf("Component on: %s%c: %d\n",
   2704 					       dv->dv_xname, 'a'+i,
   2705 					       label.d_partitions[i].p_size);
   2706 					rf_print_component_label(clabel);
   2707 #endif
   2708 					/* if it's reasonable, add it,
   2709 					   else ignore it. */
   2710 					ac = (RF_AutoConfig_t *)
   2711 						malloc(sizeof(RF_AutoConfig_t),
   2712 						       M_RAIDFRAME,
   2713 						       M_NOWAIT);
   2714 					if (ac == NULL) {
   2715 						/* XXX should panic?? */
   2716 						return(NULL);
   2717 					}
   2718 
   2719 					sprintf(ac->devname, "%s%c",
   2720 						dv->dv_xname, 'a'+i);
   2721 					ac->dev = dev;
   2722 					ac->vp = vp;
   2723 					ac->clabel = clabel;
   2724 					ac->next = ac_list;
   2725 					ac_list = ac;
   2726 					good_one = 1;
   2727 				}
   2728 			}
   2729 			if (!good_one) {
   2730 				/* cleanup */
   2731 				free(clabel, M_RAIDFRAME);
   2732 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2733 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2734 				vput(vp);
   2735 			}
   2736 		}
   2737 	}
   2738 	return(ac_list);
   2739 }
   2740 
   2741 static int
   2742 rf_reasonable_label(RF_ComponentLabel_t *clabel)
   2743 {
   2744 
   2745 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2746 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2747 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2748 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2749 	    clabel->row >=0 &&
   2750 	    clabel->column >= 0 &&
   2751 	    clabel->num_rows > 0 &&
   2752 	    clabel->num_columns > 0 &&
   2753 	    clabel->row < clabel->num_rows &&
   2754 	    clabel->column < clabel->num_columns &&
   2755 	    clabel->blockSize > 0 &&
   2756 	    clabel->numBlocks > 0) {
   2757 		/* label looks reasonable enough... */
   2758 		return(1);
   2759 	}
   2760 	return(0);
   2761 }
   2762 
   2763 
   2764 #if DEBUG
   2765 void
   2766 rf_print_component_label(RF_ComponentLabel_t *clabel)
   2767 {
   2768 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2769 	       clabel->row, clabel->column,
   2770 	       clabel->num_rows, clabel->num_columns);
   2771 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2772 	       clabel->version, clabel->serial_number,
   2773 	       clabel->mod_counter);
   2774 	printf("   Clean: %s Status: %d\n",
   2775 	       clabel->clean ? "Yes" : "No", clabel->status );
   2776 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2777 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2778 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2779 	       (char) clabel->parityConfig, clabel->blockSize,
   2780 	       clabel->numBlocks);
   2781 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2782 	printf("   Contains root partition: %s\n",
   2783 	       clabel->root_partition ? "Yes" : "No" );
   2784 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2785 #if 0
   2786 	   printf("   Config order: %d\n", clabel->config_order);
   2787 #endif
   2788 
   2789 }
   2790 #endif
   2791 
   2792 RF_ConfigSet_t *
   2793 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
   2794 {
   2795 	RF_AutoConfig_t *ac;
   2796 	RF_ConfigSet_t *config_sets;
   2797 	RF_ConfigSet_t *cset;
   2798 	RF_AutoConfig_t *ac_next;
   2799 
   2800 
   2801 	config_sets = NULL;
   2802 
   2803 	/* Go through the AutoConfig list, and figure out which components
   2804 	   belong to what sets.  */
   2805 	ac = ac_list;
   2806 	while(ac!=NULL) {
   2807 		/* we're going to putz with ac->next, so save it here
   2808 		   for use at the end of the loop */
   2809 		ac_next = ac->next;
   2810 
   2811 		if (config_sets == NULL) {
   2812 			/* will need at least this one... */
   2813 			config_sets = (RF_ConfigSet_t *)
   2814 				malloc(sizeof(RF_ConfigSet_t),
   2815 				       M_RAIDFRAME, M_NOWAIT);
   2816 			if (config_sets == NULL) {
   2817 				panic("rf_create_auto_sets: No memory!");
   2818 			}
   2819 			/* this one is easy :) */
   2820 			config_sets->ac = ac;
   2821 			config_sets->next = NULL;
   2822 			config_sets->rootable = 0;
   2823 			ac->next = NULL;
   2824 		} else {
   2825 			/* which set does this component fit into? */
   2826 			cset = config_sets;
   2827 			while(cset!=NULL) {
   2828 				if (rf_does_it_fit(cset, ac)) {
   2829 					/* looks like it matches... */
   2830 					ac->next = cset->ac;
   2831 					cset->ac = ac;
   2832 					break;
   2833 				}
   2834 				cset = cset->next;
   2835 			}
   2836 			if (cset==NULL) {
   2837 				/* didn't find a match above... new set..*/
   2838 				cset = (RF_ConfigSet_t *)
   2839 					malloc(sizeof(RF_ConfigSet_t),
   2840 					       M_RAIDFRAME, M_NOWAIT);
   2841 				if (cset == NULL) {
   2842 					panic("rf_create_auto_sets: No memory!");
   2843 				}
   2844 				cset->ac = ac;
   2845 				ac->next = NULL;
   2846 				cset->next = config_sets;
   2847 				cset->rootable = 0;
   2848 				config_sets = cset;
   2849 			}
   2850 		}
   2851 		ac = ac_next;
   2852 	}
   2853 
   2854 
   2855 	return(config_sets);
   2856 }
   2857 
   2858 static int
   2859 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
   2860 {
   2861 	RF_ComponentLabel_t *clabel1, *clabel2;
   2862 
   2863 	/* If this one matches the *first* one in the set, that's good
   2864 	   enough, since the other members of the set would have been
   2865 	   through here too... */
   2866 	/* note that we are not checking partitionSize here..
   2867 
   2868 	   Note that we are also not checking the mod_counters here.
   2869 	   If everything else matches execpt the mod_counter, that's
   2870 	   good enough for this test.  We will deal with the mod_counters
   2871 	   a little later in the autoconfiguration process.
   2872 
   2873 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2874 
   2875 	   The reason we don't check for this is that failed disks
   2876 	   will have lower modification counts.  If those disks are
   2877 	   not added to the set they used to belong to, then they will
   2878 	   form their own set, which may result in 2 different sets,
   2879 	   for example, competing to be configured at raid0, and
   2880 	   perhaps competing to be the root filesystem set.  If the
   2881 	   wrong ones get configured, or both attempt to become /,
   2882 	   weird behaviour and or serious lossage will occur.  Thus we
   2883 	   need to bring them into the fold here, and kick them out at
   2884 	   a later point.
   2885 
   2886 	*/
   2887 
   2888 	clabel1 = cset->ac->clabel;
   2889 	clabel2 = ac->clabel;
   2890 	if ((clabel1->version == clabel2->version) &&
   2891 	    (clabel1->serial_number == clabel2->serial_number) &&
   2892 	    (clabel1->num_rows == clabel2->num_rows) &&
   2893 	    (clabel1->num_columns == clabel2->num_columns) &&
   2894 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2895 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2896 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2897 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2898 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2899 	    (clabel1->blockSize == clabel2->blockSize) &&
   2900 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2901 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2902 	    (clabel1->root_partition == clabel2->root_partition) &&
   2903 	    (clabel1->last_unit == clabel2->last_unit) &&
   2904 	    (clabel1->config_order == clabel2->config_order)) {
   2905 		/* if it get's here, it almost *has* to be a match */
   2906 	} else {
   2907 		/* it's not consistent with somebody in the set..
   2908 		   punt */
   2909 		return(0);
   2910 	}
   2911 	/* all was fine.. it must fit... */
   2912 	return(1);
   2913 }
   2914 
   2915 int
   2916 rf_have_enough_components(RF_ConfigSet_t *cset)
   2917 {
   2918 	RF_AutoConfig_t *ac;
   2919 	RF_AutoConfig_t *auto_config;
   2920 	RF_ComponentLabel_t *clabel;
   2921 	int c;
   2922 	int num_cols;
   2923 	int num_missing;
   2924 	int mod_counter;
   2925 	int mod_counter_found;
   2926 	int even_pair_failed;
   2927 	char parity_type;
   2928 
   2929 
   2930 	/* check to see that we have enough 'live' components
   2931 	   of this set.  If so, we can configure it if necessary */
   2932 
   2933 	num_cols = cset->ac->clabel->num_columns;
   2934 	parity_type = cset->ac->clabel->parityConfig;
   2935 
   2936 	/* XXX Check for duplicate components!?!?!? */
   2937 
   2938 	/* Determine what the mod_counter is supposed to be for this set. */
   2939 
   2940 	mod_counter_found = 0;
   2941 	mod_counter = 0;
   2942 	ac = cset->ac;
   2943 	while(ac!=NULL) {
   2944 		if (mod_counter_found==0) {
   2945 			mod_counter = ac->clabel->mod_counter;
   2946 			mod_counter_found = 1;
   2947 		} else {
   2948 			if (ac->clabel->mod_counter > mod_counter) {
   2949 				mod_counter = ac->clabel->mod_counter;
   2950 			}
   2951 		}
   2952 		ac = ac->next;
   2953 	}
   2954 
   2955 	num_missing = 0;
   2956 	auto_config = cset->ac;
   2957 
   2958 	even_pair_failed = 0;
   2959 	for(c=0; c<num_cols; c++) {
   2960 		ac = auto_config;
   2961 		while(ac!=NULL) {
   2962 			if ((ac->clabel->column == c) &&
   2963 			    (ac->clabel->mod_counter == mod_counter)) {
   2964 				/* it's this one... */
   2965 #if DEBUG
   2966 				printf("Found: %s at %d\n",
   2967 				       ac->devname,c);
   2968 #endif
   2969 				break;
   2970 			}
   2971 			ac=ac->next;
   2972 		}
   2973 		if (ac==NULL) {
   2974 				/* Didn't find one here! */
   2975 				/* special case for RAID 1, especially
   2976 				   where there are more than 2
   2977 				   components (where RAIDframe treats
   2978 				   things a little differently :( ) */
   2979 			if (parity_type == '1') {
   2980 				if (c%2 == 0) { /* even component */
   2981 					even_pair_failed = 1;
   2982 				} else { /* odd component.  If
   2983 					    we're failed, and
   2984 					    so is the even
   2985 					    component, it's
   2986 					    "Good Night, Charlie" */
   2987 					if (even_pair_failed == 1) {
   2988 						return(0);
   2989 					}
   2990 				}
   2991 			} else {
   2992 				/* normal accounting */
   2993 				num_missing++;
   2994 			}
   2995 		}
   2996 		if ((parity_type == '1') && (c%2 == 1)) {
   2997 				/* Just did an even component, and we didn't
   2998 				   bail.. reset the even_pair_failed flag,
   2999 				   and go on to the next component.... */
   3000 			even_pair_failed = 0;
   3001 		}
   3002 	}
   3003 
   3004 	clabel = cset->ac->clabel;
   3005 
   3006 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3007 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3008 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3009 		/* XXX this needs to be made *much* more general */
   3010 		/* Too many failures */
   3011 		return(0);
   3012 	}
   3013 	/* otherwise, all is well, and we've got enough to take a kick
   3014 	   at autoconfiguring this set */
   3015 	return(1);
   3016 }
   3017 
   3018 void
   3019 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
   3020 			RF_Raid_t *raidPtr)
   3021 {
   3022 	RF_ComponentLabel_t *clabel;
   3023 	int i;
   3024 
   3025 	clabel = ac->clabel;
   3026 
   3027 	/* 1. Fill in the common stuff */
   3028 	config->numRow = clabel->num_rows = 1;
   3029 	config->numCol = clabel->num_columns;
   3030 	config->numSpare = 0; /* XXX should this be set here? */
   3031 	config->sectPerSU = clabel->sectPerSU;
   3032 	config->SUsPerPU = clabel->SUsPerPU;
   3033 	config->SUsPerRU = clabel->SUsPerRU;
   3034 	config->parityConfig = clabel->parityConfig;
   3035 	/* XXX... */
   3036 	strcpy(config->diskQueueType,"fifo");
   3037 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3038 	config->layoutSpecificSize = 0; /* XXX ?? */
   3039 
   3040 	while(ac!=NULL) {
   3041 		/* row/col values will be in range due to the checks
   3042 		   in reasonable_label() */
   3043 		strcpy(config->devnames[0][ac->clabel->column],
   3044 		       ac->devname);
   3045 		ac = ac->next;
   3046 	}
   3047 
   3048 	for(i=0;i<RF_MAXDBGV;i++) {
   3049 		config->debugVars[i][0] = 0;
   3050 	}
   3051 }
   3052 
   3053 int
   3054 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
   3055 {
   3056 	RF_ComponentLabel_t clabel;
   3057 	struct vnode *vp;
   3058 	dev_t dev;
   3059 	int column;
   3060 	int sparecol;
   3061 
   3062 	raidPtr->autoconfigure = new_value;
   3063 
   3064 	for(column=0; column<raidPtr->numCol; column++) {
   3065 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3066 			dev = raidPtr->Disks[column].dev;
   3067 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3068 			raidread_component_label(dev, vp, &clabel);
   3069 			clabel.autoconfigure = new_value;
   3070 			raidwrite_component_label(dev, vp, &clabel);
   3071 		}
   3072 	}
   3073 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3074 		sparecol = raidPtr->numCol + column;
   3075 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3076 			dev = raidPtr->Disks[sparecol].dev;
   3077 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3078 			raidread_component_label(dev, vp, &clabel);
   3079 			clabel.autoconfigure = new_value;
   3080 			raidwrite_component_label(dev, vp, &clabel);
   3081 		}
   3082 	}
   3083 	return(new_value);
   3084 }
   3085 
   3086 int
   3087 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
   3088 {
   3089 	RF_ComponentLabel_t clabel;
   3090 	struct vnode *vp;
   3091 	dev_t dev;
   3092 	int column;
   3093 	int sparecol;
   3094 
   3095 	raidPtr->root_partition = new_value;
   3096 	for(column=0; column<raidPtr->numCol; column++) {
   3097 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
   3098 			dev = raidPtr->Disks[column].dev;
   3099 			vp = raidPtr->raid_cinfo[column].ci_vp;
   3100 			raidread_component_label(dev, vp, &clabel);
   3101 			clabel.root_partition = new_value;
   3102 			raidwrite_component_label(dev, vp, &clabel);
   3103 		}
   3104 	}
   3105 	for(column = 0; column < raidPtr->numSpare ; column++) {
   3106 		sparecol = raidPtr->numCol + column;
   3107 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
   3108 			dev = raidPtr->Disks[sparecol].dev;
   3109 			vp = raidPtr->raid_cinfo[sparecol].ci_vp;
   3110 			raidread_component_label(dev, vp, &clabel);
   3111 			clabel.root_partition = new_value;
   3112 			raidwrite_component_label(dev, vp, &clabel);
   3113 		}
   3114 	}
   3115 	return(new_value);
   3116 }
   3117 
   3118 void
   3119 rf_release_all_vps(RF_ConfigSet_t *cset)
   3120 {
   3121 	RF_AutoConfig_t *ac;
   3122 
   3123 	ac = cset->ac;
   3124 	while(ac!=NULL) {
   3125 		/* Close the vp, and give it back */
   3126 		if (ac->vp) {
   3127 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3128 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3129 			vput(ac->vp);
   3130 			ac->vp = NULL;
   3131 		}
   3132 		ac = ac->next;
   3133 	}
   3134 }
   3135 
   3136 
   3137 void
   3138 rf_cleanup_config_set(RF_ConfigSet_t *cset)
   3139 {
   3140 	RF_AutoConfig_t *ac;
   3141 	RF_AutoConfig_t *next_ac;
   3142 
   3143 	ac = cset->ac;
   3144 	while(ac!=NULL) {
   3145 		next_ac = ac->next;
   3146 		/* nuke the label */
   3147 		free(ac->clabel, M_RAIDFRAME);
   3148 		/* cleanup the config structure */
   3149 		free(ac, M_RAIDFRAME);
   3150 		/* "next.." */
   3151 		ac = next_ac;
   3152 	}
   3153 	/* and, finally, nuke the config set */
   3154 	free(cset, M_RAIDFRAME);
   3155 }
   3156 
   3157 
   3158 void
   3159 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
   3160 {
   3161 	/* current version number */
   3162 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3163 	clabel->serial_number = raidPtr->serial_number;
   3164 	clabel->mod_counter = raidPtr->mod_counter;
   3165 	clabel->num_rows = 1;
   3166 	clabel->num_columns = raidPtr->numCol;
   3167 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3168 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3169 
   3170 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3171 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3172 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3173 
   3174 	clabel->blockSize = raidPtr->bytesPerSector;
   3175 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3176 
   3177 	/* XXX not portable */
   3178 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3179 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3180 	clabel->autoconfigure = raidPtr->autoconfigure;
   3181 	clabel->root_partition = raidPtr->root_partition;
   3182 	clabel->last_unit = raidPtr->raidid;
   3183 	clabel->config_order = raidPtr->config_order;
   3184 }
   3185 
   3186 int
   3187 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
   3188 {
   3189 	RF_Raid_t *raidPtr;
   3190 	RF_Config_t *config;
   3191 	int raidID;
   3192 	int retcode;
   3193 
   3194 #if DEBUG
   3195 	printf("RAID autoconfigure\n");
   3196 #endif
   3197 
   3198 	retcode = 0;
   3199 	*unit = -1;
   3200 
   3201 	/* 1. Create a config structure */
   3202 
   3203 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3204 				       M_RAIDFRAME,
   3205 				       M_NOWAIT);
   3206 	if (config==NULL) {
   3207 		printf("Out of mem!?!?\n");
   3208 				/* XXX do something more intelligent here. */
   3209 		return(1);
   3210 	}
   3211 
   3212 	memset(config, 0, sizeof(RF_Config_t));
   3213 
   3214 	/*
   3215 	   2. Figure out what RAID ID this one is supposed to live at
   3216 	   See if we can get the same RAID dev that it was configured
   3217 	   on last time..
   3218 	*/
   3219 
   3220 	raidID = cset->ac->clabel->last_unit;
   3221 	if ((raidID < 0) || (raidID >= numraid)) {
   3222 		/* let's not wander off into lala land. */
   3223 		raidID = numraid - 1;
   3224 	}
   3225 	if (raidPtrs[raidID]->valid != 0) {
   3226 
   3227 		/*
   3228 		   Nope... Go looking for an alternative...
   3229 		   Start high so we don't immediately use raid0 if that's
   3230 		   not taken.
   3231 		*/
   3232 
   3233 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3234 			if (raidPtrs[raidID]->valid == 0) {
   3235 				/* can use this one! */
   3236 				break;
   3237 			}
   3238 		}
   3239 	}
   3240 
   3241 	if (raidID < 0) {
   3242 		/* punt... */
   3243 		printf("Unable to auto configure this set!\n");
   3244 		printf("(Out of RAID devs!)\n");
   3245 		return(1);
   3246 	}
   3247 
   3248 #if DEBUG
   3249 	printf("Configuring raid%d:\n",raidID);
   3250 #endif
   3251 
   3252 	raidPtr = raidPtrs[raidID];
   3253 
   3254 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3255 	raidPtr->raidid = raidID;
   3256 	raidPtr->openings = RAIDOUTSTANDING;
   3257 
   3258 	/* 3. Build the configuration structure */
   3259 	rf_create_configuration(cset->ac, config, raidPtr);
   3260 
   3261 	/* 4. Do the configuration */
   3262 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3263 
   3264 	if (retcode == 0) {
   3265 
   3266 		raidinit(raidPtrs[raidID]);
   3267 
   3268 		rf_markalldirty(raidPtrs[raidID]);
   3269 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3270 		if (cset->ac->clabel->root_partition==1) {
   3271 			/* everything configured just fine.  Make a note
   3272 			   that this set is eligible to be root. */
   3273 			cset->rootable = 1;
   3274 			/* XXX do this here? */
   3275 			raidPtrs[raidID]->root_partition = 1;
   3276 		}
   3277 	}
   3278 
   3279 	/* 5. Cleanup */
   3280 	free(config, M_RAIDFRAME);
   3281 
   3282 	*unit = raidID;
   3283 	return(retcode);
   3284 }
   3285 
   3286 void
   3287 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
   3288 {
   3289 	struct buf *bp;
   3290 
   3291 	bp = (struct buf *)desc->bp;
   3292 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3293 	    (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
   3294 }
   3295