Home | History | Annotate | Line # | Download | only in raidframe
rf_netbsdkintf.c revision 1.104.2.14
      1 /*	$NetBSD: rf_netbsdkintf.c,v 1.104.2.14 2002/08/13 02:19:53 nathanw Exp $	*/
      2 /*-
      3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster; Jason R. Thorpe.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1988 University of Utah.
     40  * Copyright (c) 1990, 1993
     41  *      The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * the Systems Programming Group of the University of Utah Computer
     45  * Science Department.
     46  *
     47  * Redistribution and use in source and binary forms, with or without
     48  * modification, are permitted provided that the following conditions
     49  * are met:
     50  * 1. Redistributions of source code must retain the above copyright
     51  *    notice, this list of conditions and the following disclaimer.
     52  * 2. Redistributions in binary form must reproduce the above copyright
     53  *    notice, this list of conditions and the following disclaimer in the
     54  *    documentation and/or other materials provided with the distribution.
     55  * 3. All advertising materials mentioning features or use of this software
     56  *    must display the following acknowledgement:
     57  *      This product includes software developed by the University of
     58  *      California, Berkeley and its contributors.
     59  * 4. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
     76  *
     77  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
     78  */
     79 
     80 
     81 
     82 
     83 /*
     84  * Copyright (c) 1995 Carnegie-Mellon University.
     85  * All rights reserved.
     86  *
     87  * Authors: Mark Holland, Jim Zelenka
     88  *
     89  * Permission to use, copy, modify and distribute this software and
     90  * its documentation is hereby granted, provided that both the copyright
     91  * notice and this permission notice appear in all copies of the
     92  * software, derivative works or modified versions, and any portions
     93  * thereof, and that both notices appear in supporting documentation.
     94  *
     95  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     96  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     97  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     98  *
     99  * Carnegie Mellon requests users of this software to return to
    100  *
    101  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
    102  *  School of Computer Science
    103  *  Carnegie Mellon University
    104  *  Pittsburgh PA 15213-3890
    105  *
    106  * any improvements or extensions that they make and grant Carnegie the
    107  * rights to redistribute these changes.
    108  */
    109 
    110 /***********************************************************
    111  *
    112  * rf_kintf.c -- the kernel interface routines for RAIDframe
    113  *
    114  ***********************************************************/
    115 
    116 #include <sys/cdefs.h>
    117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.104.2.14 2002/08/13 02:19:53 nathanw Exp $");
    118 
    119 #include <sys/param.h>
    120 #include <sys/errno.h>
    121 #include <sys/pool.h>
    122 #include <sys/proc.h>
    123 #include <sys/queue.h>
    124 #include <sys/disk.h>
    125 #include <sys/device.h>
    126 #include <sys/stat.h>
    127 #include <sys/ioctl.h>
    128 #include <sys/fcntl.h>
    129 #include <sys/systm.h>
    130 #include <sys/namei.h>
    131 #include <sys/vnode.h>
    132 #include <sys/disklabel.h>
    133 #include <sys/conf.h>
    134 #include <sys/lock.h>
    135 #include <sys/buf.h>
    136 #include <sys/user.h>
    137 #include <sys/reboot.h>
    138 
    139 #include <dev/raidframe/raidframevar.h>
    140 #include <dev/raidframe/raidframeio.h>
    141 #include "raid.h"
    142 #include "opt_raid_autoconfig.h"
    143 #include "rf_raid.h"
    144 #include "rf_copyback.h"
    145 #include "rf_dag.h"
    146 #include "rf_dagflags.h"
    147 #include "rf_desc.h"
    148 #include "rf_diskqueue.h"
    149 #include "rf_etimer.h"
    150 #include "rf_general.h"
    151 #include "rf_kintf.h"
    152 #include "rf_options.h"
    153 #include "rf_driver.h"
    154 #include "rf_parityscan.h"
    155 #include "rf_threadstuff.h"
    156 
    157 int     rf_kdebug_level = 0;
    158 
    159 #ifdef DEBUG
    160 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
    161 #else				/* DEBUG */
    162 #define db1_printf(a) { }
    163 #endif				/* DEBUG */
    164 
    165 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
    166 
    167 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
    168 
    169 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
    170 						 * spare table */
    171 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
    172 						 * installation process */
    173 
    174 /* prototypes */
    175 static void KernelWakeupFunc(struct buf * bp);
    176 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
    177 		   dev_t dev, RF_SectorNum_t startSect,
    178 		   RF_SectorCount_t numSect, caddr_t buf,
    179 		   void (*cbFunc) (struct buf *), void *cbArg,
    180 		   int logBytesPerSector, struct proc * b_proc);
    181 static void raidinit(RF_Raid_t *);
    182 
    183 void raidattach(int);
    184 int raidsize(dev_t);
    185 int raidopen(dev_t, int, int, struct proc *);
    186 int raidclose(dev_t, int, int, struct proc *);
    187 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *);
    188 int raidwrite(dev_t, struct uio *, int);
    189 int raidread(dev_t, struct uio *, int);
    190 void raidstrategy(struct buf *);
    191 int raiddump(dev_t, daddr_t, caddr_t, size_t);
    192 
    193 /*
    194  * Pilfered from ccd.c
    195  */
    196 
    197 struct raidbuf {
    198 	struct buf rf_buf;	/* new I/O buf.  MUST BE FIRST!!! */
    199 	struct buf *rf_obp;	/* ptr. to original I/O buf */
    200 	int     rf_flags;	/* misc. flags */
    201 	RF_DiskQueueData_t *req;/* the request that this was part of.. */
    202 };
    203 
    204 /* component buffer pool */
    205 struct pool raidframe_cbufpool;
    206 
    207 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
    208 #define	RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
    209 
    210 /* XXX Not sure if the following should be replacing the raidPtrs above,
    211    or if it should be used in conjunction with that...
    212 */
    213 
    214 struct raid_softc {
    215 	int     sc_flags;	/* flags */
    216 	int     sc_cflags;	/* configuration flags */
    217 	size_t  sc_size;        /* size of the raid device */
    218 	char    sc_xname[20];	/* XXX external name */
    219 	struct disk sc_dkdev;	/* generic disk device info */
    220 	struct bufq_state buf_queue;	/* used for the device queue */
    221 };
    222 /* sc_flags */
    223 #define RAIDF_INITED	0x01	/* unit has been initialized */
    224 #define RAIDF_WLABEL	0x02	/* label area is writable */
    225 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
    226 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
    227 #define RAIDF_LOCKED	0x80	/* unit is locked */
    228 
    229 #define	raidunit(x)	DISKUNIT(x)
    230 int numraid = 0;
    231 
    232 /*
    233  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
    234  * Be aware that large numbers can allow the driver to consume a lot of
    235  * kernel memory, especially on writes, and in degraded mode reads.
    236  *
    237  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
    238  * a single 64K write will typically require 64K for the old data,
    239  * 64K for the old parity, and 64K for the new parity, for a total
    240  * of 192K (if the parity buffer is not re-used immediately).
    241  * Even it if is used immediately, that's still 128K, which when multiplied
    242  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
    243  *
    244  * Now in degraded mode, for example, a 64K read on the above setup may
    245  * require data reconstruction, which will require *all* of the 4 remaining
    246  * disks to participate -- 4 * 32K/disk == 128K again.
    247  */
    248 
    249 #ifndef RAIDOUTSTANDING
    250 #define RAIDOUTSTANDING   6
    251 #endif
    252 
    253 #define RAIDLABELDEV(dev)	\
    254 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
    255 
    256 /* declared here, and made public, for the benefit of KVM stuff.. */
    257 struct raid_softc *raid_softc;
    258 
    259 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
    260 				     struct disklabel *);
    261 static void raidgetdisklabel(dev_t);
    262 static void raidmakedisklabel(struct raid_softc *);
    263 
    264 static int raidlock(struct raid_softc *);
    265 static void raidunlock(struct raid_softc *);
    266 
    267 static void rf_markalldirty(RF_Raid_t *);
    268 void rf_mountroot_hook(struct device *);
    269 
    270 struct device *raidrootdev;
    271 
    272 void rf_ReconThread(struct rf_recon_req *);
    273 /* XXX what I want is: */
    274 /*void rf_ReconThread(RF_Raid_t *raidPtr);  */
    275 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
    276 void rf_CopybackThread(RF_Raid_t *raidPtr);
    277 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
    278 void rf_buildroothack(void *);
    279 
    280 RF_AutoConfig_t *rf_find_raid_components(void);
    281 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
    282 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
    283 static int rf_reasonable_label(RF_ComponentLabel_t *);
    284 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
    285 int rf_set_autoconfig(RF_Raid_t *, int);
    286 int rf_set_rootpartition(RF_Raid_t *, int);
    287 void rf_release_all_vps(RF_ConfigSet_t *);
    288 void rf_cleanup_config_set(RF_ConfigSet_t *);
    289 int rf_have_enough_components(RF_ConfigSet_t *);
    290 int rf_auto_config_set(RF_ConfigSet_t *, int *);
    291 
    292 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
    293 				  allow autoconfig to take place.
    294 			          Note that this is overridden by having
    295 			          RAID_AUTOCONFIG as an option in the
    296 			          kernel config file.  */
    297 
    298 void
    299 raidattach(num)
    300 	int     num;
    301 {
    302 	int raidID;
    303 	int i, rc;
    304 	RF_AutoConfig_t *ac_list; /* autoconfig list */
    305 	RF_ConfigSet_t *config_sets;
    306 
    307 #ifdef DEBUG
    308 	printf("raidattach: Asked for %d units\n", num);
    309 #endif
    310 
    311 	if (num <= 0) {
    312 #ifdef DIAGNOSTIC
    313 		panic("raidattach: count <= 0");
    314 #endif
    315 		return;
    316 	}
    317 	/* This is where all the initialization stuff gets done. */
    318 
    319 	numraid = num;
    320 
    321 	/* Make some space for requested number of units... */
    322 
    323 	RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
    324 	if (raidPtrs == NULL) {
    325 		panic("raidPtrs is NULL!!\n");
    326 	}
    327 
    328 	/* Initialize the component buffer pool. */
    329 	pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
    330 	    0, 0, "raidpl", NULL);
    331 
    332 	rc = rf_mutex_init(&rf_sparet_wait_mutex);
    333 	if (rc) {
    334 		RF_PANIC();
    335 	}
    336 
    337 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
    338 
    339 	for (i = 0; i < num; i++)
    340 		raidPtrs[i] = NULL;
    341 	rc = rf_BootRaidframe();
    342 	if (rc == 0)
    343 		printf("Kernelized RAIDframe activated\n");
    344 	else
    345 		panic("Serious error booting RAID!!\n");
    346 
    347 	/* put together some datastructures like the CCD device does.. This
    348 	 * lets us lock the device and what-not when it gets opened. */
    349 
    350 	raid_softc = (struct raid_softc *)
    351 		malloc(num * sizeof(struct raid_softc),
    352 		       M_RAIDFRAME, M_NOWAIT);
    353 	if (raid_softc == NULL) {
    354 		printf("WARNING: no memory for RAIDframe driver\n");
    355 		return;
    356 	}
    357 
    358 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
    359 
    360 	raidrootdev = (struct device *)malloc(num * sizeof(struct device),
    361 					      M_RAIDFRAME, M_NOWAIT);
    362 	if (raidrootdev == NULL) {
    363 		panic("No memory for RAIDframe driver!!?!?!\n");
    364 	}
    365 
    366 	for (raidID = 0; raidID < num; raidID++) {
    367 		bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
    368 
    369 		raidrootdev[raidID].dv_class  = DV_DISK;
    370 		raidrootdev[raidID].dv_cfdata = NULL;
    371 		raidrootdev[raidID].dv_unit   = raidID;
    372 		raidrootdev[raidID].dv_parent = NULL;
    373 		raidrootdev[raidID].dv_flags  = 0;
    374 		sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
    375 
    376 		RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
    377 			  (RF_Raid_t *));
    378 		if (raidPtrs[raidID] == NULL) {
    379 			printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
    380 			numraid = raidID;
    381 			return;
    382 		}
    383 	}
    384 
    385 #ifdef RAID_AUTOCONFIG
    386 	raidautoconfig = 1;
    387 #endif
    388 
    389 if (raidautoconfig) {
    390 	/* 1. locate all RAID components on the system */
    391 
    392 #if DEBUG
    393 	printf("Searching for raid components...\n");
    394 #endif
    395 	ac_list = rf_find_raid_components();
    396 
    397 	/* 2. sort them into their respective sets */
    398 
    399 	config_sets = rf_create_auto_sets(ac_list);
    400 
    401 	/* 3. evaluate each set and configure the valid ones
    402 	   This gets done in rf_buildroothack() */
    403 
    404 	/* schedule the creation of the thread to do the
    405 	   "/ on RAID" stuff */
    406 
    407 	kthread_create(rf_buildroothack,config_sets);
    408 
    409 #if 0
    410 	mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
    411 #endif
    412 }
    413 
    414 }
    415 
    416 void
    417 rf_buildroothack(arg)
    418 	void *arg;
    419 {
    420 	RF_ConfigSet_t *config_sets = arg;
    421 	RF_ConfigSet_t *cset;
    422 	RF_ConfigSet_t *next_cset;
    423 	int retcode;
    424 	int raidID;
    425 	int rootID;
    426 	int num_root;
    427 
    428 	rootID = 0;
    429 	num_root = 0;
    430 	cset = config_sets;
    431 	while(cset != NULL ) {
    432 		next_cset = cset->next;
    433 		if (rf_have_enough_components(cset) &&
    434 		    cset->ac->clabel->autoconfigure==1) {
    435 			retcode = rf_auto_config_set(cset,&raidID);
    436 			if (!retcode) {
    437 				if (cset->rootable) {
    438 					rootID = raidID;
    439 					num_root++;
    440 				}
    441 			} else {
    442 				/* The autoconfig didn't work :( */
    443 #if DEBUG
    444 				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
    445 #endif
    446 				rf_release_all_vps(cset);
    447 			}
    448 		} else {
    449 			/* we're not autoconfiguring this set...
    450 			   release the associated resources */
    451 			rf_release_all_vps(cset);
    452 		}
    453 		/* cleanup */
    454 		rf_cleanup_config_set(cset);
    455 		cset = next_cset;
    456 	}
    457 
    458 	/* we found something bootable... */
    459 
    460 	if (num_root == 1) {
    461 		booted_device = &raidrootdev[rootID];
    462 	} else if (num_root > 1) {
    463 		/* we can't guess.. require the user to answer... */
    464 		boothowto |= RB_ASKNAME;
    465 	}
    466 }
    467 
    468 
    469 int
    470 raidsize(dev)
    471 	dev_t   dev;
    472 {
    473 	struct raid_softc *rs;
    474 	struct disklabel *lp;
    475 	int     part, unit, omask, size;
    476 
    477 	unit = raidunit(dev);
    478 	if (unit >= numraid)
    479 		return (-1);
    480 	rs = &raid_softc[unit];
    481 
    482 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    483 		return (-1);
    484 
    485 	part = DISKPART(dev);
    486 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
    487 	lp = rs->sc_dkdev.dk_label;
    488 
    489 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
    490 		return (-1);
    491 
    492 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
    493 		size = -1;
    494 	else
    495 		size = lp->d_partitions[part].p_size *
    496 		    (lp->d_secsize / DEV_BSIZE);
    497 
    498 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
    499 		return (-1);
    500 
    501 	return (size);
    502 
    503 }
    504 
    505 int
    506 raiddump(dev, blkno, va, size)
    507 	dev_t   dev;
    508 	daddr_t blkno;
    509 	caddr_t va;
    510 	size_t  size;
    511 {
    512 	/* Not implemented. */
    513 	return ENXIO;
    514 }
    515 /* ARGSUSED */
    516 int
    517 raidopen(dev, flags, fmt, p)
    518 	dev_t   dev;
    519 	int     flags, fmt;
    520 	struct proc *p;
    521 {
    522 	int     unit = raidunit(dev);
    523 	struct raid_softc *rs;
    524 	struct disklabel *lp;
    525 	int     part, pmask;
    526 	int     error = 0;
    527 
    528 	if (unit >= numraid)
    529 		return (ENXIO);
    530 	rs = &raid_softc[unit];
    531 
    532 	if ((error = raidlock(rs)) != 0)
    533 		return (error);
    534 	lp = rs->sc_dkdev.dk_label;
    535 
    536 	part = DISKPART(dev);
    537 	pmask = (1 << part);
    538 
    539 	db1_printf(("Opening raid device number: %d partition: %d\n",
    540 		unit, part));
    541 
    542 
    543 	if ((rs->sc_flags & RAIDF_INITED) &&
    544 	    (rs->sc_dkdev.dk_openmask == 0))
    545 		raidgetdisklabel(dev);
    546 
    547 	/* make sure that this partition exists */
    548 
    549 	if (part != RAW_PART) {
    550 		db1_printf(("Not a raw partition..\n"));
    551 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
    552 		    ((part >= lp->d_npartitions) ||
    553 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
    554 			error = ENXIO;
    555 			raidunlock(rs);
    556 			db1_printf(("Bailing out...\n"));
    557 			return (error);
    558 		}
    559 	}
    560 	/* Prevent this unit from being unconfigured while open. */
    561 	switch (fmt) {
    562 	case S_IFCHR:
    563 		rs->sc_dkdev.dk_copenmask |= pmask;
    564 		break;
    565 
    566 	case S_IFBLK:
    567 		rs->sc_dkdev.dk_bopenmask |= pmask;
    568 		break;
    569 	}
    570 
    571 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    572 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    573 		/* First one... mark things as dirty... Note that we *MUST*
    574 		 have done a configure before this.  I DO NOT WANT TO BE
    575 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
    576 		 THAT THEY BELONG TOGETHER!!!!! */
    577 		/* XXX should check to see if we're only open for reading
    578 		   here... If so, we needn't do this, but then need some
    579 		   other way of keeping track of what's happened.. */
    580 
    581 		rf_markalldirty( raidPtrs[unit] );
    582 	}
    583 
    584 
    585 	rs->sc_dkdev.dk_openmask =
    586 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    587 
    588 	raidunlock(rs);
    589 
    590 	return (error);
    591 
    592 
    593 }
    594 /* ARGSUSED */
    595 int
    596 raidclose(dev, flags, fmt, p)
    597 	dev_t   dev;
    598 	int     flags, fmt;
    599 	struct proc *p;
    600 {
    601 	int     unit = raidunit(dev);
    602 	struct raid_softc *rs;
    603 	int     error = 0;
    604 	int     part;
    605 
    606 	if (unit >= numraid)
    607 		return (ENXIO);
    608 	rs = &raid_softc[unit];
    609 
    610 	if ((error = raidlock(rs)) != 0)
    611 		return (error);
    612 
    613 	part = DISKPART(dev);
    614 
    615 	/* ...that much closer to allowing unconfiguration... */
    616 	switch (fmt) {
    617 	case S_IFCHR:
    618 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
    619 		break;
    620 
    621 	case S_IFBLK:
    622 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
    623 		break;
    624 	}
    625 	rs->sc_dkdev.dk_openmask =
    626 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
    627 
    628 	if ((rs->sc_dkdev.dk_openmask == 0) &&
    629 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
    630 		/* Last one... device is not unconfigured yet.
    631 		   Device shutdown has taken care of setting the
    632 		   clean bits if RAIDF_INITED is not set
    633 		   mark things as clean... */
    634 #if 0
    635 		printf("Last one on raid%d.  Updating status.\n",unit);
    636 #endif
    637 		rf_update_component_labels(raidPtrs[unit],
    638 						 RF_FINAL_COMPONENT_UPDATE);
    639 		if (doing_shutdown) {
    640 			/* last one, and we're going down, so
    641 			   lights out for this RAID set too. */
    642 			error = rf_Shutdown(raidPtrs[unit]);
    643 
    644 			/* It's no longer initialized... */
    645 			rs->sc_flags &= ~RAIDF_INITED;
    646 
    647 			/* Detach the disk. */
    648 			disk_detach(&rs->sc_dkdev);
    649 		}
    650 	}
    651 
    652 	raidunlock(rs);
    653 	return (0);
    654 
    655 }
    656 
    657 void
    658 raidstrategy(bp)
    659 	struct buf *bp;
    660 {
    661 	int s;
    662 
    663 	unsigned int raidID = raidunit(bp->b_dev);
    664 	RF_Raid_t *raidPtr;
    665 	struct raid_softc *rs = &raid_softc[raidID];
    666 	struct disklabel *lp;
    667 	int     wlabel;
    668 
    669 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
    670 		bp->b_error = ENXIO;
    671 		bp->b_flags |= B_ERROR;
    672 		bp->b_resid = bp->b_bcount;
    673 		biodone(bp);
    674 		return;
    675 	}
    676 	if (raidID >= numraid || !raidPtrs[raidID]) {
    677 		bp->b_error = ENODEV;
    678 		bp->b_flags |= B_ERROR;
    679 		bp->b_resid = bp->b_bcount;
    680 		biodone(bp);
    681 		return;
    682 	}
    683 	raidPtr = raidPtrs[raidID];
    684 	if (!raidPtr->valid) {
    685 		bp->b_error = ENODEV;
    686 		bp->b_flags |= B_ERROR;
    687 		bp->b_resid = bp->b_bcount;
    688 		biodone(bp);
    689 		return;
    690 	}
    691 	if (bp->b_bcount == 0) {
    692 		db1_printf(("b_bcount is zero..\n"));
    693 		biodone(bp);
    694 		return;
    695 	}
    696 	lp = rs->sc_dkdev.dk_label;
    697 
    698 	/*
    699 	 * Do bounds checking and adjust transfer.  If there's an
    700 	 * error, the bounds check will flag that for us.
    701 	 */
    702 
    703 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
    704 	if (DISKPART(bp->b_dev) != RAW_PART)
    705 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
    706 			db1_printf(("Bounds check failed!!:%d %d\n",
    707 				(int) bp->b_blkno, (int) wlabel));
    708 			biodone(bp);
    709 			return;
    710 		}
    711 	s = splbio();
    712 
    713 	bp->b_resid = 0;
    714 
    715 	/* stuff it onto our queue */
    716 	BUFQ_PUT(&rs->buf_queue, bp);
    717 
    718 	raidstart(raidPtrs[raidID]);
    719 
    720 	splx(s);
    721 }
    722 /* ARGSUSED */
    723 int
    724 raidread(dev, uio, flags)
    725 	dev_t   dev;
    726 	struct uio *uio;
    727 	int     flags;
    728 {
    729 	int     unit = raidunit(dev);
    730 	struct raid_softc *rs;
    731 	int     part;
    732 
    733 	if (unit >= numraid)
    734 		return (ENXIO);
    735 	rs = &raid_softc[unit];
    736 
    737 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    738 		return (ENXIO);
    739 	part = DISKPART(dev);
    740 
    741 	db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
    742 
    743 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
    744 
    745 }
    746 /* ARGSUSED */
    747 int
    748 raidwrite(dev, uio, flags)
    749 	dev_t   dev;
    750 	struct uio *uio;
    751 	int     flags;
    752 {
    753 	int     unit = raidunit(dev);
    754 	struct raid_softc *rs;
    755 
    756 	if (unit >= numraid)
    757 		return (ENXIO);
    758 	rs = &raid_softc[unit];
    759 
    760 	if ((rs->sc_flags & RAIDF_INITED) == 0)
    761 		return (ENXIO);
    762 	db1_printf(("raidwrite\n"));
    763 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
    764 
    765 }
    766 
    767 int
    768 raidioctl(dev, cmd, data, flag, p)
    769 	dev_t   dev;
    770 	u_long  cmd;
    771 	caddr_t data;
    772 	int     flag;
    773 	struct proc *p;
    774 {
    775 	int     unit = raidunit(dev);
    776 	int     error = 0;
    777 	int     part, pmask;
    778 	struct raid_softc *rs;
    779 	RF_Config_t *k_cfg, *u_cfg;
    780 	RF_Raid_t *raidPtr;
    781 	RF_RaidDisk_t *diskPtr;
    782 	RF_AccTotals_t *totals;
    783 	RF_DeviceConfig_t *d_cfg, **ucfgp;
    784 	u_char *specific_buf;
    785 	int retcode = 0;
    786 	int row;
    787 	int column;
    788 	int raidid;
    789 	struct rf_recon_req *rrcopy, *rr;
    790 	RF_ComponentLabel_t *clabel;
    791 	RF_ComponentLabel_t ci_label;
    792 	RF_ComponentLabel_t **clabel_ptr;
    793 	RF_SingleComponent_t *sparePtr,*componentPtr;
    794 	RF_SingleComponent_t hot_spare;
    795 	RF_SingleComponent_t component;
    796 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
    797 	int i, j, d;
    798 #ifdef __HAVE_OLD_DISKLABEL
    799 	struct disklabel newlabel;
    800 #endif
    801 
    802 	if (unit >= numraid)
    803 		return (ENXIO);
    804 	rs = &raid_softc[unit];
    805 	raidPtr = raidPtrs[unit];
    806 
    807 	db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
    808 		(int) DISKPART(dev), (int) unit, (int) cmd));
    809 
    810 	/* Must be open for writes for these commands... */
    811 	switch (cmd) {
    812 	case DIOCSDINFO:
    813 	case DIOCWDINFO:
    814 #ifdef __HAVE_OLD_DISKLABEL
    815 	case ODIOCWDINFO:
    816 	case ODIOCSDINFO:
    817 #endif
    818 	case DIOCWLABEL:
    819 		if ((flag & FWRITE) == 0)
    820 			return (EBADF);
    821 	}
    822 
    823 	/* Must be initialized for these... */
    824 	switch (cmd) {
    825 	case DIOCGDINFO:
    826 	case DIOCSDINFO:
    827 	case DIOCWDINFO:
    828 #ifdef __HAVE_OLD_DISKLABEL
    829 	case ODIOCGDINFO:
    830 	case ODIOCWDINFO:
    831 	case ODIOCSDINFO:
    832 	case ODIOCGDEFLABEL:
    833 #endif
    834 	case DIOCGPART:
    835 	case DIOCWLABEL:
    836 	case DIOCGDEFLABEL:
    837 	case RAIDFRAME_SHUTDOWN:
    838 	case RAIDFRAME_REWRITEPARITY:
    839 	case RAIDFRAME_GET_INFO:
    840 	case RAIDFRAME_RESET_ACCTOTALS:
    841 	case RAIDFRAME_GET_ACCTOTALS:
    842 	case RAIDFRAME_KEEP_ACCTOTALS:
    843 	case RAIDFRAME_GET_SIZE:
    844 	case RAIDFRAME_FAIL_DISK:
    845 	case RAIDFRAME_COPYBACK:
    846 	case RAIDFRAME_CHECK_RECON_STATUS:
    847 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
    848 	case RAIDFRAME_GET_COMPONENT_LABEL:
    849 	case RAIDFRAME_SET_COMPONENT_LABEL:
    850 	case RAIDFRAME_ADD_HOT_SPARE:
    851 	case RAIDFRAME_REMOVE_HOT_SPARE:
    852 	case RAIDFRAME_INIT_LABELS:
    853 	case RAIDFRAME_REBUILD_IN_PLACE:
    854 	case RAIDFRAME_CHECK_PARITY:
    855 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
    856 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
    857 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
    858 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
    859 	case RAIDFRAME_SET_AUTOCONFIG:
    860 	case RAIDFRAME_SET_ROOT:
    861 	case RAIDFRAME_DELETE_COMPONENT:
    862 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
    863 		if ((rs->sc_flags & RAIDF_INITED) == 0)
    864 			return (ENXIO);
    865 	}
    866 
    867 	switch (cmd) {
    868 
    869 		/* configure the system */
    870 	case RAIDFRAME_CONFIGURE:
    871 
    872 		if (raidPtr->valid) {
    873 			/* There is a valid RAID set running on this unit! */
    874 			printf("raid%d: Device already configured!\n",unit);
    875 			return(EINVAL);
    876 		}
    877 
    878 		/* copy-in the configuration information */
    879 		/* data points to a pointer to the configuration structure */
    880 
    881 		u_cfg = *((RF_Config_t **) data);
    882 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
    883 		if (k_cfg == NULL) {
    884 			return (ENOMEM);
    885 		}
    886 		retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
    887 		    sizeof(RF_Config_t));
    888 		if (retcode) {
    889 			RF_Free(k_cfg, sizeof(RF_Config_t));
    890 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
    891 				retcode));
    892 			return (retcode);
    893 		}
    894 		/* allocate a buffer for the layout-specific data, and copy it
    895 		 * in */
    896 		if (k_cfg->layoutSpecificSize) {
    897 			if (k_cfg->layoutSpecificSize > 10000) {
    898 				/* sanity check */
    899 				RF_Free(k_cfg, sizeof(RF_Config_t));
    900 				return (EINVAL);
    901 			}
    902 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
    903 			    (u_char *));
    904 			if (specific_buf == NULL) {
    905 				RF_Free(k_cfg, sizeof(RF_Config_t));
    906 				return (ENOMEM);
    907 			}
    908 			retcode = copyin(k_cfg->layoutSpecific,
    909 			    (caddr_t) specific_buf,
    910 			    k_cfg->layoutSpecificSize);
    911 			if (retcode) {
    912 				RF_Free(k_cfg, sizeof(RF_Config_t));
    913 				RF_Free(specific_buf,
    914 					k_cfg->layoutSpecificSize);
    915 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
    916 					retcode));
    917 				return (retcode);
    918 			}
    919 		} else
    920 			specific_buf = NULL;
    921 		k_cfg->layoutSpecific = specific_buf;
    922 
    923 		/* should do some kind of sanity check on the configuration.
    924 		 * Store the sum of all the bytes in the last byte? */
    925 
    926 		/* configure the system */
    927 
    928 		/*
    929 		 * Clear the entire RAID descriptor, just to make sure
    930 		 *  there is no stale data left in the case of a
    931 		 *  reconfiguration
    932 		 */
    933 		memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
    934 		raidPtr->raidid = unit;
    935 
    936 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
    937 
    938 		if (retcode == 0) {
    939 
    940 			/* allow this many simultaneous IO's to
    941 			   this RAID device */
    942 			raidPtr->openings = RAIDOUTSTANDING;
    943 
    944 			raidinit(raidPtr);
    945 			rf_markalldirty(raidPtr);
    946 		}
    947 		/* free the buffers.  No return code here. */
    948 		if (k_cfg->layoutSpecificSize) {
    949 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
    950 		}
    951 		RF_Free(k_cfg, sizeof(RF_Config_t));
    952 
    953 		return (retcode);
    954 
    955 		/* shutdown the system */
    956 	case RAIDFRAME_SHUTDOWN:
    957 
    958 		if ((error = raidlock(rs)) != 0)
    959 			return (error);
    960 
    961 		/*
    962 		 * If somebody has a partition mounted, we shouldn't
    963 		 * shutdown.
    964 		 */
    965 
    966 		part = DISKPART(dev);
    967 		pmask = (1 << part);
    968 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
    969 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
    970 			(rs->sc_dkdev.dk_copenmask & pmask))) {
    971 			raidunlock(rs);
    972 			return (EBUSY);
    973 		}
    974 
    975 		retcode = rf_Shutdown(raidPtr);
    976 
    977 		/* It's no longer initialized... */
    978 		rs->sc_flags &= ~RAIDF_INITED;
    979 
    980 		/* Detach the disk. */
    981 		disk_detach(&rs->sc_dkdev);
    982 
    983 		raidunlock(rs);
    984 
    985 		return (retcode);
    986 	case RAIDFRAME_GET_COMPONENT_LABEL:
    987 		clabel_ptr = (RF_ComponentLabel_t **) data;
    988 		/* need to read the component label for the disk indicated
    989 		   by row,column in clabel */
    990 
    991 		/* For practice, let's get it directly fromdisk, rather
    992 		   than from the in-core copy */
    993 		RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
    994 			   (RF_ComponentLabel_t *));
    995 		if (clabel == NULL)
    996 			return (ENOMEM);
    997 
    998 		memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
    999 
   1000 		retcode = copyin( *clabel_ptr, clabel,
   1001 				  sizeof(RF_ComponentLabel_t));
   1002 
   1003 		if (retcode) {
   1004 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1005 			return(retcode);
   1006 		}
   1007 
   1008 		row = clabel->row;
   1009 		column = clabel->column;
   1010 
   1011 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1012 		    (column < 0) || (column >= raidPtr->numCol +
   1013 				     raidPtr->numSpare)) {
   1014 			RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1015 			return(EINVAL);
   1016 		}
   1017 
   1018 		raidread_component_label(raidPtr->Disks[row][column].dev,
   1019 				raidPtr->raid_cinfo[row][column].ci_vp,
   1020 				clabel );
   1021 
   1022 		retcode = copyout((caddr_t) clabel,
   1023 				  (caddr_t) *clabel_ptr,
   1024 				  sizeof(RF_ComponentLabel_t));
   1025 		RF_Free( clabel, sizeof(RF_ComponentLabel_t));
   1026 		return (retcode);
   1027 
   1028 	case RAIDFRAME_SET_COMPONENT_LABEL:
   1029 		clabel = (RF_ComponentLabel_t *) data;
   1030 
   1031 		/* XXX check the label for valid stuff... */
   1032 		/* Note that some things *should not* get modified --
   1033 		   the user should be re-initing the labels instead of
   1034 		   trying to patch things.
   1035 		   */
   1036 
   1037 		raidid = raidPtr->raidid;
   1038 		printf("raid%d: Got component label:\n", raidid);
   1039 		printf("raid%d: Version: %d\n", raidid, clabel->version);
   1040 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
   1041 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
   1042 		printf("raid%d: Row: %d\n", raidid, clabel->row);
   1043 		printf("raid%d: Column: %d\n", raidid, clabel->column);
   1044 		printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
   1045 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
   1046 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
   1047 		printf("raid%d: Status: %d\n", raidid, clabel->status);
   1048 
   1049 		row = clabel->row;
   1050 		column = clabel->column;
   1051 
   1052 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1053 		    (column < 0) || (column >= raidPtr->numCol)) {
   1054 			return(EINVAL);
   1055 		}
   1056 
   1057 		/* XXX this isn't allowed to do anything for now :-) */
   1058 
   1059 		/* XXX and before it is, we need to fill in the rest
   1060 		   of the fields!?!?!?! */
   1061 #if 0
   1062 		raidwrite_component_label(
   1063                             raidPtr->Disks[row][column].dev,
   1064 			    raidPtr->raid_cinfo[row][column].ci_vp,
   1065 			    clabel );
   1066 #endif
   1067 		return (0);
   1068 
   1069 	case RAIDFRAME_INIT_LABELS:
   1070 		clabel = (RF_ComponentLabel_t *) data;
   1071 		/*
   1072 		   we only want the serial number from
   1073 		   the above.  We get all the rest of the information
   1074 		   from the config that was used to create this RAID
   1075 		   set.
   1076 		   */
   1077 
   1078 		raidPtr->serial_number = clabel->serial_number;
   1079 
   1080 		raid_init_component_label(raidPtr, &ci_label);
   1081 		ci_label.serial_number = clabel->serial_number;
   1082 
   1083 		for(row=0;row<raidPtr->numRow;row++) {
   1084 			ci_label.row = row;
   1085 			for(column=0;column<raidPtr->numCol;column++) {
   1086 				diskPtr = &raidPtr->Disks[row][column];
   1087 				if (!RF_DEAD_DISK(diskPtr->status)) {
   1088 					ci_label.partitionSize = diskPtr->partitionSize;
   1089 					ci_label.column = column;
   1090 					raidwrite_component_label(
   1091 					  raidPtr->Disks[row][column].dev,
   1092 					  raidPtr->raid_cinfo[row][column].ci_vp,
   1093 					  &ci_label );
   1094 				}
   1095 			}
   1096 		}
   1097 
   1098 		return (retcode);
   1099 	case RAIDFRAME_SET_AUTOCONFIG:
   1100 		d = rf_set_autoconfig(raidPtr, *(int *) data);
   1101 		printf("raid%d: New autoconfig value is: %d\n",
   1102 		       raidPtr->raidid, d);
   1103 		*(int *) data = d;
   1104 		return (retcode);
   1105 
   1106 	case RAIDFRAME_SET_ROOT:
   1107 		d = rf_set_rootpartition(raidPtr, *(int *) data);
   1108 		printf("raid%d: New rootpartition value is: %d\n",
   1109 		       raidPtr->raidid, d);
   1110 		*(int *) data = d;
   1111 		return (retcode);
   1112 
   1113 		/* initialize all parity */
   1114 	case RAIDFRAME_REWRITEPARITY:
   1115 
   1116 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1117 			/* Parity for RAID 0 is trivially correct */
   1118 			raidPtr->parity_good = RF_RAID_CLEAN;
   1119 			return(0);
   1120 		}
   1121 
   1122 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1123 			/* Re-write is already in progress! */
   1124 			return(EINVAL);
   1125 		}
   1126 
   1127 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
   1128 					   rf_RewriteParityThread,
   1129 					   raidPtr,"raid_parity");
   1130 		return (retcode);
   1131 
   1132 
   1133 	case RAIDFRAME_ADD_HOT_SPARE:
   1134 		sparePtr = (RF_SingleComponent_t *) data;
   1135 		memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
   1136 		retcode = rf_add_hot_spare(raidPtr, &hot_spare);
   1137 		return(retcode);
   1138 
   1139 	case RAIDFRAME_REMOVE_HOT_SPARE:
   1140 		return(retcode);
   1141 
   1142 	case RAIDFRAME_DELETE_COMPONENT:
   1143 		componentPtr = (RF_SingleComponent_t *)data;
   1144 		memcpy( &component, componentPtr,
   1145 			sizeof(RF_SingleComponent_t));
   1146 		retcode = rf_delete_component(raidPtr, &component);
   1147 		return(retcode);
   1148 
   1149 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
   1150 		componentPtr = (RF_SingleComponent_t *)data;
   1151 		memcpy( &component, componentPtr,
   1152 			sizeof(RF_SingleComponent_t));
   1153 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
   1154 		return(retcode);
   1155 
   1156 	case RAIDFRAME_REBUILD_IN_PLACE:
   1157 
   1158 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1159 			/* Can't do this on a RAID 0!! */
   1160 			return(EINVAL);
   1161 		}
   1162 
   1163 		if (raidPtr->recon_in_progress == 1) {
   1164 			/* a reconstruct is already in progress! */
   1165 			return(EINVAL);
   1166 		}
   1167 
   1168 		componentPtr = (RF_SingleComponent_t *) data;
   1169 		memcpy( &component, componentPtr,
   1170 			sizeof(RF_SingleComponent_t));
   1171 		row = component.row;
   1172 		column = component.column;
   1173 		printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
   1174 		       row, column);
   1175 		if ((row < 0) || (row >= raidPtr->numRow) ||
   1176 		    (column < 0) || (column >= raidPtr->numCol)) {
   1177 			return(EINVAL);
   1178 		}
   1179 
   1180 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1181 		if (rrcopy == NULL)
   1182 			return(ENOMEM);
   1183 
   1184 		rrcopy->raidPtr = (void *) raidPtr;
   1185 		rrcopy->row = row;
   1186 		rrcopy->col = column;
   1187 
   1188 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1189 					   rf_ReconstructInPlaceThread,
   1190 					   rrcopy,"raid_reconip");
   1191 		return(retcode);
   1192 
   1193 	case RAIDFRAME_GET_INFO:
   1194 		if (!raidPtr->valid)
   1195 			return (ENODEV);
   1196 		ucfgp = (RF_DeviceConfig_t **) data;
   1197 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
   1198 			  (RF_DeviceConfig_t *));
   1199 		if (d_cfg == NULL)
   1200 			return (ENOMEM);
   1201 		memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
   1202 		d_cfg->rows = raidPtr->numRow;
   1203 		d_cfg->cols = raidPtr->numCol;
   1204 		d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
   1205 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
   1206 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1207 			return (ENOMEM);
   1208 		}
   1209 		d_cfg->nspares = raidPtr->numSpare;
   1210 		if (d_cfg->nspares >= RF_MAX_DISKS) {
   1211 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1212 			return (ENOMEM);
   1213 		}
   1214 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
   1215 		d = 0;
   1216 		for (i = 0; i < d_cfg->rows; i++) {
   1217 			for (j = 0; j < d_cfg->cols; j++) {
   1218 				d_cfg->devs[d] = raidPtr->Disks[i][j];
   1219 				d++;
   1220 			}
   1221 		}
   1222 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
   1223 			d_cfg->spares[i] = raidPtr->Disks[0][j];
   1224 		}
   1225 		retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
   1226 				  sizeof(RF_DeviceConfig_t));
   1227 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
   1228 
   1229 		return (retcode);
   1230 
   1231 	case RAIDFRAME_CHECK_PARITY:
   1232 		*(int *) data = raidPtr->parity_good;
   1233 		return (0);
   1234 
   1235 	case RAIDFRAME_RESET_ACCTOTALS:
   1236 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
   1237 		return (0);
   1238 
   1239 	case RAIDFRAME_GET_ACCTOTALS:
   1240 		totals = (RF_AccTotals_t *) data;
   1241 		*totals = raidPtr->acc_totals;
   1242 		return (0);
   1243 
   1244 	case RAIDFRAME_KEEP_ACCTOTALS:
   1245 		raidPtr->keep_acc_totals = *(int *)data;
   1246 		return (0);
   1247 
   1248 	case RAIDFRAME_GET_SIZE:
   1249 		*(int *) data = raidPtr->totalSectors;
   1250 		return (0);
   1251 
   1252 		/* fail a disk & optionally start reconstruction */
   1253 	case RAIDFRAME_FAIL_DISK:
   1254 
   1255 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1256 			/* Can't do this on a RAID 0!! */
   1257 			return(EINVAL);
   1258 		}
   1259 
   1260 		rr = (struct rf_recon_req *) data;
   1261 
   1262 		if (rr->row < 0 || rr->row >= raidPtr->numRow
   1263 		    || rr->col < 0 || rr->col >= raidPtr->numCol)
   1264 			return (EINVAL);
   1265 
   1266 		printf("raid%d: Failing the disk: row: %d col: %d\n",
   1267 		       unit, rr->row, rr->col);
   1268 
   1269 		/* make a copy of the recon request so that we don't rely on
   1270 		 * the user's buffer */
   1271 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
   1272 		if (rrcopy == NULL)
   1273 			return(ENOMEM);
   1274 		memcpy(rrcopy, rr, sizeof(*rr));
   1275 		rrcopy->raidPtr = (void *) raidPtr;
   1276 
   1277 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
   1278 					   rf_ReconThread,
   1279 					   rrcopy,"raid_recon");
   1280 		return (0);
   1281 
   1282 		/* invoke a copyback operation after recon on whatever disk
   1283 		 * needs it, if any */
   1284 	case RAIDFRAME_COPYBACK:
   1285 
   1286 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1287 			/* This makes no sense on a RAID 0!! */
   1288 			return(EINVAL);
   1289 		}
   1290 
   1291 		if (raidPtr->copyback_in_progress == 1) {
   1292 			/* Copyback is already in progress! */
   1293 			return(EINVAL);
   1294 		}
   1295 
   1296 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
   1297 					   rf_CopybackThread,
   1298 					   raidPtr,"raid_copyback");
   1299 		return (retcode);
   1300 
   1301 		/* return the percentage completion of reconstruction */
   1302 	case RAIDFRAME_CHECK_RECON_STATUS:
   1303 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1304 			/* This makes no sense on a RAID 0, so tell the
   1305 			   user it's done. */
   1306 			*(int *) data = 100;
   1307 			return(0);
   1308 		}
   1309 		row = 0; /* XXX we only consider a single row... */
   1310 		if (raidPtr->status[row] != rf_rs_reconstructing)
   1311 			*(int *) data = 100;
   1312 		else
   1313 			*(int *) data = raidPtr->reconControl[row]->percentComplete;
   1314 		return (0);
   1315 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
   1316 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1317 		row = 0; /* XXX we only consider a single row... */
   1318 		if (raidPtr->status[row] != rf_rs_reconstructing) {
   1319 			progressInfo.remaining = 0;
   1320 			progressInfo.completed = 100;
   1321 			progressInfo.total = 100;
   1322 		} else {
   1323 			progressInfo.total =
   1324 				raidPtr->reconControl[row]->numRUsTotal;
   1325 			progressInfo.completed =
   1326 				raidPtr->reconControl[row]->numRUsComplete;
   1327 			progressInfo.remaining = progressInfo.total -
   1328 				progressInfo.completed;
   1329 		}
   1330 		retcode = copyout((caddr_t) &progressInfo,
   1331 				  (caddr_t) *progressInfoPtr,
   1332 				  sizeof(RF_ProgressInfo_t));
   1333 		return (retcode);
   1334 
   1335 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
   1336 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1337 			/* This makes no sense on a RAID 0, so tell the
   1338 			   user it's done. */
   1339 			*(int *) data = 100;
   1340 			return(0);
   1341 		}
   1342 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1343 			*(int *) data = 100 *
   1344 				raidPtr->parity_rewrite_stripes_done /
   1345 				raidPtr->Layout.numStripe;
   1346 		} else {
   1347 			*(int *) data = 100;
   1348 		}
   1349 		return (0);
   1350 
   1351 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
   1352 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1353 		if (raidPtr->parity_rewrite_in_progress == 1) {
   1354 			progressInfo.total = raidPtr->Layout.numStripe;
   1355 			progressInfo.completed =
   1356 				raidPtr->parity_rewrite_stripes_done;
   1357 			progressInfo.remaining = progressInfo.total -
   1358 				progressInfo.completed;
   1359 		} else {
   1360 			progressInfo.remaining = 0;
   1361 			progressInfo.completed = 100;
   1362 			progressInfo.total = 100;
   1363 		}
   1364 		retcode = copyout((caddr_t) &progressInfo,
   1365 				  (caddr_t) *progressInfoPtr,
   1366 				  sizeof(RF_ProgressInfo_t));
   1367 		return (retcode);
   1368 
   1369 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
   1370 		if (raidPtr->Layout.map->faultsTolerated == 0) {
   1371 			/* This makes no sense on a RAID 0 */
   1372 			*(int *) data = 100;
   1373 			return(0);
   1374 		}
   1375 		if (raidPtr->copyback_in_progress == 1) {
   1376 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
   1377 				raidPtr->Layout.numStripe;
   1378 		} else {
   1379 			*(int *) data = 100;
   1380 		}
   1381 		return (0);
   1382 
   1383 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
   1384 		progressInfoPtr = (RF_ProgressInfo_t **) data;
   1385 		if (raidPtr->copyback_in_progress == 1) {
   1386 			progressInfo.total = raidPtr->Layout.numStripe;
   1387 			progressInfo.completed =
   1388 				raidPtr->copyback_stripes_done;
   1389 			progressInfo.remaining = progressInfo.total -
   1390 				progressInfo.completed;
   1391 		} else {
   1392 			progressInfo.remaining = 0;
   1393 			progressInfo.completed = 100;
   1394 			progressInfo.total = 100;
   1395 		}
   1396 		retcode = copyout((caddr_t) &progressInfo,
   1397 				  (caddr_t) *progressInfoPtr,
   1398 				  sizeof(RF_ProgressInfo_t));
   1399 		return (retcode);
   1400 
   1401 		/* the sparetable daemon calls this to wait for the kernel to
   1402 		 * need a spare table. this ioctl does not return until a
   1403 		 * spare table is needed. XXX -- calling mpsleep here in the
   1404 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
   1405 		 * -- I should either compute the spare table in the kernel,
   1406 		 * or have a different -- XXX XXX -- interface (a different
   1407 		 * character device) for delivering the table     -- XXX */
   1408 #if 0
   1409 	case RAIDFRAME_SPARET_WAIT:
   1410 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1411 		while (!rf_sparet_wait_queue)
   1412 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
   1413 		waitreq = rf_sparet_wait_queue;
   1414 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
   1415 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1416 
   1417 		/* structure assignment */
   1418 		*((RF_SparetWait_t *) data) = *waitreq;
   1419 
   1420 		RF_Free(waitreq, sizeof(*waitreq));
   1421 		return (0);
   1422 
   1423 		/* wakes up a process waiting on SPARET_WAIT and puts an error
   1424 		 * code in it that will cause the dameon to exit */
   1425 	case RAIDFRAME_ABORT_SPARET_WAIT:
   1426 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1427 		waitreq->fcol = -1;
   1428 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1429 		waitreq->next = rf_sparet_wait_queue;
   1430 		rf_sparet_wait_queue = waitreq;
   1431 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1432 		wakeup(&rf_sparet_wait_queue);
   1433 		return (0);
   1434 
   1435 		/* used by the spare table daemon to deliver a spare table
   1436 		 * into the kernel */
   1437 	case RAIDFRAME_SEND_SPARET:
   1438 
   1439 		/* install the spare table */
   1440 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
   1441 
   1442 		/* respond to the requestor.  the return status of the spare
   1443 		 * table installation is passed in the "fcol" field */
   1444 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
   1445 		waitreq->fcol = retcode;
   1446 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1447 		waitreq->next = rf_sparet_resp_queue;
   1448 		rf_sparet_resp_queue = waitreq;
   1449 		wakeup(&rf_sparet_resp_queue);
   1450 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1451 
   1452 		return (retcode);
   1453 #endif
   1454 
   1455 	default:
   1456 		break; /* fall through to the os-specific code below */
   1457 
   1458 	}
   1459 
   1460 	if (!raidPtr->valid)
   1461 		return (EINVAL);
   1462 
   1463 	/*
   1464 	 * Add support for "regular" device ioctls here.
   1465 	 */
   1466 
   1467 	switch (cmd) {
   1468 	case DIOCGDINFO:
   1469 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
   1470 		break;
   1471 #ifdef __HAVE_OLD_DISKLABEL
   1472 	case ODIOCGDINFO:
   1473 		newlabel = *(rs->sc_dkdev.dk_label);
   1474 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1475 			return ENOTTY;
   1476 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1477 		break;
   1478 #endif
   1479 
   1480 	case DIOCGPART:
   1481 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
   1482 		((struct partinfo *) data)->part =
   1483 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
   1484 		break;
   1485 
   1486 	case DIOCWDINFO:
   1487 	case DIOCSDINFO:
   1488 #ifdef __HAVE_OLD_DISKLABEL
   1489 	case ODIOCWDINFO:
   1490 	case ODIOCSDINFO:
   1491 #endif
   1492 	{
   1493 		struct disklabel *lp;
   1494 #ifdef __HAVE_OLD_DISKLABEL
   1495 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
   1496 			memset(&newlabel, 0, sizeof newlabel);
   1497 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
   1498 			lp = &newlabel;
   1499 		} else
   1500 #endif
   1501 		lp = (struct disklabel *)data;
   1502 
   1503 		if ((error = raidlock(rs)) != 0)
   1504 			return (error);
   1505 
   1506 		rs->sc_flags |= RAIDF_LABELLING;
   1507 
   1508 		error = setdisklabel(rs->sc_dkdev.dk_label,
   1509 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
   1510 		if (error == 0) {
   1511 			if (cmd == DIOCWDINFO
   1512 #ifdef __HAVE_OLD_DISKLABEL
   1513 			    || cmd == ODIOCWDINFO
   1514 #endif
   1515 			   )
   1516 				error = writedisklabel(RAIDLABELDEV(dev),
   1517 				    raidstrategy, rs->sc_dkdev.dk_label,
   1518 				    rs->sc_dkdev.dk_cpulabel);
   1519 		}
   1520 		rs->sc_flags &= ~RAIDF_LABELLING;
   1521 
   1522 		raidunlock(rs);
   1523 
   1524 		if (error)
   1525 			return (error);
   1526 		break;
   1527 	}
   1528 
   1529 	case DIOCWLABEL:
   1530 		if (*(int *) data != 0)
   1531 			rs->sc_flags |= RAIDF_WLABEL;
   1532 		else
   1533 			rs->sc_flags &= ~RAIDF_WLABEL;
   1534 		break;
   1535 
   1536 	case DIOCGDEFLABEL:
   1537 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
   1538 		break;
   1539 
   1540 #ifdef __HAVE_OLD_DISKLABEL
   1541 	case ODIOCGDEFLABEL:
   1542 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
   1543 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
   1544 			return ENOTTY;
   1545 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
   1546 		break;
   1547 #endif
   1548 
   1549 	default:
   1550 		retcode = ENOTTY;
   1551 	}
   1552 	return (retcode);
   1553 
   1554 }
   1555 
   1556 
   1557 /* raidinit -- complete the rest of the initialization for the
   1558    RAIDframe device.  */
   1559 
   1560 
   1561 static void
   1562 raidinit(raidPtr)
   1563 	RF_Raid_t *raidPtr;
   1564 {
   1565 	struct raid_softc *rs;
   1566 	int     unit;
   1567 
   1568 	unit = raidPtr->raidid;
   1569 
   1570 	rs = &raid_softc[unit];
   1571 
   1572 	/* XXX should check return code first... */
   1573 	rs->sc_flags |= RAIDF_INITED;
   1574 
   1575 	sprintf(rs->sc_xname, "raid%d", unit);	/* XXX doesn't check bounds. */
   1576 
   1577 	rs->sc_dkdev.dk_name = rs->sc_xname;
   1578 
   1579 	/* disk_attach actually creates space for the CPU disklabel, among
   1580 	 * other things, so it's critical to call this *BEFORE* we try putzing
   1581 	 * with disklabels. */
   1582 
   1583 	disk_attach(&rs->sc_dkdev);
   1584 
   1585 	/* XXX There may be a weird interaction here between this, and
   1586 	 * protectedSectors, as used in RAIDframe.  */
   1587 
   1588 	rs->sc_size = raidPtr->totalSectors;
   1589 
   1590 }
   1591 
   1592 /* wake up the daemon & tell it to get us a spare table
   1593  * XXX
   1594  * the entries in the queues should be tagged with the raidPtr
   1595  * so that in the extremely rare case that two recons happen at once,
   1596  * we know for which device were requesting a spare table
   1597  * XXX
   1598  *
   1599  * XXX This code is not currently used. GO
   1600  */
   1601 int
   1602 rf_GetSpareTableFromDaemon(req)
   1603 	RF_SparetWait_t *req;
   1604 {
   1605 	int     retcode;
   1606 
   1607 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
   1608 	req->next = rf_sparet_wait_queue;
   1609 	rf_sparet_wait_queue = req;
   1610 	wakeup(&rf_sparet_wait_queue);
   1611 
   1612 	/* mpsleep unlocks the mutex */
   1613 	while (!rf_sparet_resp_queue) {
   1614 		tsleep(&rf_sparet_resp_queue, PRIBIO,
   1615 		    "raidframe getsparetable", 0);
   1616 	}
   1617 	req = rf_sparet_resp_queue;
   1618 	rf_sparet_resp_queue = req->next;
   1619 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
   1620 
   1621 	retcode = req->fcol;
   1622 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
   1623 					 * alloc'd */
   1624 	return (retcode);
   1625 }
   1626 
   1627 /* a wrapper around rf_DoAccess that extracts appropriate info from the
   1628  * bp & passes it down.
   1629  * any calls originating in the kernel must use non-blocking I/O
   1630  * do some extra sanity checking to return "appropriate" error values for
   1631  * certain conditions (to make some standard utilities work)
   1632  *
   1633  * Formerly known as: rf_DoAccessKernel
   1634  */
   1635 void
   1636 raidstart(raidPtr)
   1637 	RF_Raid_t *raidPtr;
   1638 {
   1639 	RF_SectorCount_t num_blocks, pb, sum;
   1640 	RF_RaidAddr_t raid_addr;
   1641 	int     retcode;
   1642 	struct partition *pp;
   1643 	daddr_t blocknum;
   1644 	int     unit;
   1645 	struct raid_softc *rs;
   1646 	int     do_async;
   1647 	struct buf *bp;
   1648 
   1649 	unit = raidPtr->raidid;
   1650 	rs = &raid_softc[unit];
   1651 
   1652 	/* quick check to see if anything has died recently */
   1653 	RF_LOCK_MUTEX(raidPtr->mutex);
   1654 	if (raidPtr->numNewFailures > 0) {
   1655 		rf_update_component_labels(raidPtr,
   1656 					   RF_NORMAL_COMPONENT_UPDATE);
   1657 		raidPtr->numNewFailures--;
   1658 	}
   1659 
   1660 	/* Check to see if we're at the limit... */
   1661 	while (raidPtr->openings > 0) {
   1662 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1663 
   1664 		/* get the next item, if any, from the queue */
   1665 		if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
   1666 			/* nothing more to do */
   1667 			return;
   1668 		}
   1669 
   1670 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
   1671 		 * partition.. Need to make it absolute to the underlying
   1672 		 * device.. */
   1673 
   1674 		blocknum = bp->b_blkno;
   1675 		if (DISKPART(bp->b_dev) != RAW_PART) {
   1676 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
   1677 			blocknum += pp->p_offset;
   1678 		}
   1679 
   1680 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
   1681 			    (int) blocknum));
   1682 
   1683 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
   1684 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
   1685 
   1686 		/* *THIS* is where we adjust what block we're going to...
   1687 		 * but DO NOT TOUCH bp->b_blkno!!! */
   1688 		raid_addr = blocknum;
   1689 
   1690 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
   1691 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
   1692 		sum = raid_addr + num_blocks + pb;
   1693 		if (1 || rf_debugKernelAccess) {
   1694 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
   1695 				    (int) raid_addr, (int) sum, (int) num_blocks,
   1696 				    (int) pb, (int) bp->b_resid));
   1697 		}
   1698 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
   1699 		    || (sum < num_blocks) || (sum < pb)) {
   1700 			bp->b_error = ENOSPC;
   1701 			bp->b_flags |= B_ERROR;
   1702 			bp->b_resid = bp->b_bcount;
   1703 			biodone(bp);
   1704 			RF_LOCK_MUTEX(raidPtr->mutex);
   1705 			continue;
   1706 		}
   1707 		/*
   1708 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
   1709 		 */
   1710 
   1711 		if (bp->b_bcount & raidPtr->sectorMask) {
   1712 			bp->b_error = EINVAL;
   1713 			bp->b_flags |= B_ERROR;
   1714 			bp->b_resid = bp->b_bcount;
   1715 			biodone(bp);
   1716 			RF_LOCK_MUTEX(raidPtr->mutex);
   1717 			continue;
   1718 
   1719 		}
   1720 		db1_printf(("Calling DoAccess..\n"));
   1721 
   1722 
   1723 		RF_LOCK_MUTEX(raidPtr->mutex);
   1724 		raidPtr->openings--;
   1725 		RF_UNLOCK_MUTEX(raidPtr->mutex);
   1726 
   1727 		/*
   1728 		 * Everything is async.
   1729 		 */
   1730 		do_async = 1;
   1731 
   1732 		disk_busy(&rs->sc_dkdev);
   1733 
   1734 		/* XXX we're still at splbio() here... do we *really*
   1735 		   need to be? */
   1736 
   1737 		/* don't ever condition on bp->b_flags & B_WRITE.
   1738 		 * always condition on B_READ instead */
   1739 
   1740 		retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
   1741 				      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
   1742 				      do_async, raid_addr, num_blocks,
   1743 				      bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
   1744 
   1745 		RF_LOCK_MUTEX(raidPtr->mutex);
   1746 	}
   1747 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1748 }
   1749 
   1750 
   1751 
   1752 
   1753 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
   1754 
   1755 int
   1756 rf_DispatchKernelIO(queue, req)
   1757 	RF_DiskQueue_t *queue;
   1758 	RF_DiskQueueData_t *req;
   1759 {
   1760 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
   1761 	struct buf *bp;
   1762 	struct raidbuf *raidbp = NULL;
   1763 	struct raid_softc *rs;
   1764 	int     unit;
   1765 	int s;
   1766 
   1767 	s=0;
   1768 	/* s = splbio();*/ /* want to test this */
   1769 	/* XXX along with the vnode, we also need the softc associated with
   1770 	 * this device.. */
   1771 
   1772 	req->queue = queue;
   1773 
   1774 	unit = queue->raidPtr->raidid;
   1775 
   1776 	db1_printf(("DispatchKernelIO unit: %d\n", unit));
   1777 
   1778 	if (unit >= numraid) {
   1779 		printf("Invalid unit number: %d %d\n", unit, numraid);
   1780 		panic("Invalid Unit number in rf_DispatchKernelIO\n");
   1781 	}
   1782 	rs = &raid_softc[unit];
   1783 
   1784 	bp = req->bp;
   1785 #if 1
   1786 	/* XXX when there is a physical disk failure, someone is passing us a
   1787 	 * buffer that contains old stuff!!  Attempt to deal with this problem
   1788 	 * without taking a performance hit... (not sure where the real bug
   1789 	 * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
   1790 
   1791 	if (bp->b_flags & B_ERROR) {
   1792 		bp->b_flags &= ~B_ERROR;
   1793 	}
   1794 	if (bp->b_error != 0) {
   1795 		bp->b_error = 0;
   1796 	}
   1797 #endif
   1798 	raidbp = RAIDGETBUF(rs);
   1799 
   1800 	raidbp->rf_flags = 0;	/* XXX not really used anywhere... */
   1801 
   1802 	/*
   1803 	 * context for raidiodone
   1804 	 */
   1805 	raidbp->rf_obp = bp;
   1806 	raidbp->req = req;
   1807 
   1808 	LIST_INIT(&raidbp->rf_buf.b_dep);
   1809 
   1810 	switch (req->type) {
   1811 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
   1812 		/* XXX need to do something extra here.. */
   1813 		/* I'm leaving this in, as I've never actually seen it used,
   1814 		 * and I'd like folks to report it... GO */
   1815 		printf(("WAKEUP CALLED\n"));
   1816 		queue->numOutstanding++;
   1817 
   1818 		/* XXX need to glue the original buffer into this??  */
   1819 
   1820 		KernelWakeupFunc(&raidbp->rf_buf);
   1821 		break;
   1822 
   1823 	case RF_IO_TYPE_READ:
   1824 	case RF_IO_TYPE_WRITE:
   1825 
   1826 		if (req->tracerec) {
   1827 			RF_ETIMER_START(req->tracerec->timer);
   1828 		}
   1829 		InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
   1830 		    op | bp->b_flags, queue->rf_cinfo->ci_dev,
   1831 		    req->sectorOffset, req->numSector,
   1832 		    req->buf, KernelWakeupFunc, (void *) req,
   1833 		    queue->raidPtr->logBytesPerSector, req->b_proc);
   1834 
   1835 		if (rf_debugKernelAccess) {
   1836 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
   1837 				(long) bp->b_blkno));
   1838 		}
   1839 		queue->numOutstanding++;
   1840 		queue->last_deq_sector = req->sectorOffset;
   1841 		/* acc wouldn't have been let in if there were any pending
   1842 		 * reqs at any other priority */
   1843 		queue->curPriority = req->priority;
   1844 
   1845 		db1_printf(("Going for %c to unit %d row %d col %d\n",
   1846 			req->type, unit, queue->row, queue->col));
   1847 		db1_printf(("sector %d count %d (%d bytes) %d\n",
   1848 			(int) req->sectorOffset, (int) req->numSector,
   1849 			(int) (req->numSector <<
   1850 			    queue->raidPtr->logBytesPerSector),
   1851 			(int) queue->raidPtr->logBytesPerSector));
   1852 		if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
   1853 			raidbp->rf_buf.b_vp->v_numoutput++;
   1854 		}
   1855 		VOP_STRATEGY(&raidbp->rf_buf);
   1856 
   1857 		break;
   1858 
   1859 	default:
   1860 		panic("bad req->type in rf_DispatchKernelIO");
   1861 	}
   1862 	db1_printf(("Exiting from DispatchKernelIO\n"));
   1863 	/* splx(s); */ /* want to test this */
   1864 	return (0);
   1865 }
   1866 /* this is the callback function associated with a I/O invoked from
   1867    kernel code.
   1868  */
   1869 static void
   1870 KernelWakeupFunc(vbp)
   1871 	struct buf *vbp;
   1872 {
   1873 	RF_DiskQueueData_t *req = NULL;
   1874 	RF_DiskQueue_t *queue;
   1875 	struct raidbuf *raidbp = (struct raidbuf *) vbp;
   1876 	struct buf *bp;
   1877 	struct raid_softc *rs;
   1878 	int     unit;
   1879 	int s;
   1880 
   1881 	s = splbio();
   1882 	db1_printf(("recovering the request queue:\n"));
   1883 	req = raidbp->req;
   1884 
   1885 	bp = raidbp->rf_obp;
   1886 
   1887 	queue = (RF_DiskQueue_t *) req->queue;
   1888 
   1889 	if (raidbp->rf_buf.b_flags & B_ERROR) {
   1890 		bp->b_flags |= B_ERROR;
   1891 		bp->b_error = raidbp->rf_buf.b_error ?
   1892 		    raidbp->rf_buf.b_error : EIO;
   1893 	}
   1894 
   1895 	/* XXX methinks this could be wrong... */
   1896 #if 1
   1897 	bp->b_resid = raidbp->rf_buf.b_resid;
   1898 #endif
   1899 
   1900 	if (req->tracerec) {
   1901 		RF_ETIMER_STOP(req->tracerec->timer);
   1902 		RF_ETIMER_EVAL(req->tracerec->timer);
   1903 		RF_LOCK_MUTEX(rf_tracing_mutex);
   1904 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1905 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
   1906 		req->tracerec->num_phys_ios++;
   1907 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
   1908 	}
   1909 	bp->b_bcount = raidbp->rf_buf.b_bcount;	/* XXXX ?? */
   1910 
   1911 	unit = queue->raidPtr->raidid;	/* *Much* simpler :-> */
   1912 
   1913 
   1914 	/* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
   1915 	 * ballistic, and mark the component as hosed... */
   1916 
   1917 	if (bp->b_flags & B_ERROR) {
   1918 		/* Mark the disk as dead */
   1919 		/* but only mark it once... */
   1920 		if (queue->raidPtr->Disks[queue->row][queue->col].status ==
   1921 		    rf_ds_optimal) {
   1922 			printf("raid%d: IO Error.  Marking %s as failed.\n",
   1923 			    unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
   1924 			queue->raidPtr->Disks[queue->row][queue->col].status =
   1925 			    rf_ds_failed;
   1926 			queue->raidPtr->status[queue->row] = rf_rs_degraded;
   1927 			queue->raidPtr->numFailures++;
   1928 			queue->raidPtr->numNewFailures++;
   1929 		} else {	/* Disk is already dead... */
   1930 			/* printf("Disk already marked as dead!\n"); */
   1931 		}
   1932 
   1933 	}
   1934 
   1935 	rs = &raid_softc[unit];
   1936 	RAIDPUTBUF(rs, raidbp);
   1937 
   1938 	rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
   1939 	(req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
   1940 
   1941 	splx(s);
   1942 }
   1943 
   1944 
   1945 
   1946 /*
   1947  * initialize a buf structure for doing an I/O in the kernel.
   1948  */
   1949 static void
   1950 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
   1951        logBytesPerSector, b_proc)
   1952 	struct buf *bp;
   1953 	struct vnode *b_vp;
   1954 	unsigned rw_flag;
   1955 	dev_t dev;
   1956 	RF_SectorNum_t startSect;
   1957 	RF_SectorCount_t numSect;
   1958 	caddr_t buf;
   1959 	void (*cbFunc) (struct buf *);
   1960 	void *cbArg;
   1961 	int logBytesPerSector;
   1962 	struct proc *b_proc;
   1963 {
   1964 	/* bp->b_flags       = B_PHYS | rw_flag; */
   1965 	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
   1966 	bp->b_bcount = numSect << logBytesPerSector;
   1967 	bp->b_bufsize = bp->b_bcount;
   1968 	bp->b_error = 0;
   1969 	bp->b_dev = dev;
   1970 	bp->b_data = buf;
   1971 	bp->b_blkno = startSect;
   1972 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
   1973 	if (bp->b_bcount == 0) {
   1974 		panic("bp->b_bcount is zero in InitBP!!\n");
   1975 	}
   1976 	bp->b_proc = b_proc;
   1977 	bp->b_iodone = cbFunc;
   1978 	bp->b_vp = b_vp;
   1979 
   1980 }
   1981 
   1982 static void
   1983 raidgetdefaultlabel(raidPtr, rs, lp)
   1984 	RF_Raid_t *raidPtr;
   1985 	struct raid_softc *rs;
   1986 	struct disklabel *lp;
   1987 {
   1988 	db1_printf(("Building a default label...\n"));
   1989 	memset(lp, 0, sizeof(*lp));
   1990 
   1991 	/* fabricate a label... */
   1992 	lp->d_secperunit = raidPtr->totalSectors;
   1993 	lp->d_secsize = raidPtr->bytesPerSector;
   1994 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
   1995 	lp->d_ntracks = 4 * raidPtr->numCol;
   1996 	lp->d_ncylinders = raidPtr->totalSectors /
   1997 		(lp->d_nsectors * lp->d_ntracks);
   1998 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
   1999 
   2000 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
   2001 	lp->d_type = DTYPE_RAID;
   2002 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
   2003 	lp->d_rpm = 3600;
   2004 	lp->d_interleave = 1;
   2005 	lp->d_flags = 0;
   2006 
   2007 	lp->d_partitions[RAW_PART].p_offset = 0;
   2008 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
   2009 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
   2010 	lp->d_npartitions = RAW_PART + 1;
   2011 
   2012 	lp->d_magic = DISKMAGIC;
   2013 	lp->d_magic2 = DISKMAGIC;
   2014 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
   2015 
   2016 }
   2017 /*
   2018  * Read the disklabel from the raid device.  If one is not present, fake one
   2019  * up.
   2020  */
   2021 static void
   2022 raidgetdisklabel(dev)
   2023 	dev_t   dev;
   2024 {
   2025 	int     unit = raidunit(dev);
   2026 	struct raid_softc *rs = &raid_softc[unit];
   2027 	char   *errstring;
   2028 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2029 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
   2030 	RF_Raid_t *raidPtr;
   2031 
   2032 	db1_printf(("Getting the disklabel...\n"));
   2033 
   2034 	memset(clp, 0, sizeof(*clp));
   2035 
   2036 	raidPtr = raidPtrs[unit];
   2037 
   2038 	raidgetdefaultlabel(raidPtr, rs, lp);
   2039 
   2040 	/*
   2041 	 * Call the generic disklabel extraction routine.
   2042 	 */
   2043 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
   2044 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
   2045 	if (errstring)
   2046 		raidmakedisklabel(rs);
   2047 	else {
   2048 		int     i;
   2049 		struct partition *pp;
   2050 
   2051 		/*
   2052 		 * Sanity check whether the found disklabel is valid.
   2053 		 *
   2054 		 * This is necessary since total size of the raid device
   2055 		 * may vary when an interleave is changed even though exactly
   2056 		 * same componets are used, and old disklabel may used
   2057 		 * if that is found.
   2058 		 */
   2059 		if (lp->d_secperunit != rs->sc_size)
   2060 			printf("raid%d: WARNING: %s: "
   2061 			    "total sector size in disklabel (%d) != "
   2062 			    "the size of raid (%ld)\n", unit, rs->sc_xname,
   2063 			    lp->d_secperunit, (long) rs->sc_size);
   2064 		for (i = 0; i < lp->d_npartitions; i++) {
   2065 			pp = &lp->d_partitions[i];
   2066 			if (pp->p_offset + pp->p_size > rs->sc_size)
   2067 				printf("raid%d: WARNING: %s: end of partition `%c' "
   2068 				       "exceeds the size of raid (%ld)\n",
   2069 				       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
   2070 		}
   2071 	}
   2072 
   2073 }
   2074 /*
   2075  * Take care of things one might want to take care of in the event
   2076  * that a disklabel isn't present.
   2077  */
   2078 static void
   2079 raidmakedisklabel(rs)
   2080 	struct raid_softc *rs;
   2081 {
   2082 	struct disklabel *lp = rs->sc_dkdev.dk_label;
   2083 	db1_printf(("Making a label..\n"));
   2084 
   2085 	/*
   2086 	 * For historical reasons, if there's no disklabel present
   2087 	 * the raw partition must be marked FS_BSDFFS.
   2088 	 */
   2089 
   2090 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
   2091 
   2092 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
   2093 
   2094 	lp->d_checksum = dkcksum(lp);
   2095 }
   2096 /*
   2097  * Lookup the provided name in the filesystem.  If the file exists,
   2098  * is a valid block device, and isn't being used by anyone else,
   2099  * set *vpp to the file's vnode.
   2100  * You'll find the original of this in ccd.c
   2101  */
   2102 int
   2103 raidlookup(path, p, vpp)
   2104 	char   *path;
   2105 	struct proc *p;
   2106 	struct vnode **vpp;	/* result */
   2107 {
   2108 	struct nameidata nd;
   2109 	struct vnode *vp;
   2110 	struct vattr va;
   2111 	int     error;
   2112 
   2113 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
   2114 	if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
   2115 #if 0
   2116 		printf("RAIDframe: vn_open returned %d\n", error);
   2117 #endif
   2118 		return (error);
   2119 	}
   2120 	vp = nd.ni_vp;
   2121 	if (vp->v_usecount > 1) {
   2122 		VOP_UNLOCK(vp, 0);
   2123 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2124 		return (EBUSY);
   2125 	}
   2126 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
   2127 		VOP_UNLOCK(vp, 0);
   2128 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2129 		return (error);
   2130 	}
   2131 	/* XXX: eventually we should handle VREG, too. */
   2132 	if (va.va_type != VBLK) {
   2133 		VOP_UNLOCK(vp, 0);
   2134 		(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2135 		return (ENOTBLK);
   2136 	}
   2137 	VOP_UNLOCK(vp, 0);
   2138 	*vpp = vp;
   2139 	return (0);
   2140 }
   2141 /*
   2142  * Wait interruptibly for an exclusive lock.
   2143  *
   2144  * XXX
   2145  * Several drivers do this; it should be abstracted and made MP-safe.
   2146  * (Hmm... where have we seen this warning before :->  GO )
   2147  */
   2148 static int
   2149 raidlock(rs)
   2150 	struct raid_softc *rs;
   2151 {
   2152 	int     error;
   2153 
   2154 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
   2155 		rs->sc_flags |= RAIDF_WANTED;
   2156 		if ((error =
   2157 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
   2158 			return (error);
   2159 	}
   2160 	rs->sc_flags |= RAIDF_LOCKED;
   2161 	return (0);
   2162 }
   2163 /*
   2164  * Unlock and wake up any waiters.
   2165  */
   2166 static void
   2167 raidunlock(rs)
   2168 	struct raid_softc *rs;
   2169 {
   2170 
   2171 	rs->sc_flags &= ~RAIDF_LOCKED;
   2172 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
   2173 		rs->sc_flags &= ~RAIDF_WANTED;
   2174 		wakeup(rs);
   2175 	}
   2176 }
   2177 
   2178 
   2179 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
   2180 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
   2181 
   2182 int
   2183 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
   2184 {
   2185 	RF_ComponentLabel_t clabel;
   2186 	raidread_component_label(dev, b_vp, &clabel);
   2187 	clabel.mod_counter = mod_counter;
   2188 	clabel.clean = RF_RAID_CLEAN;
   2189 	raidwrite_component_label(dev, b_vp, &clabel);
   2190 	return(0);
   2191 }
   2192 
   2193 
   2194 int
   2195 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
   2196 {
   2197 	RF_ComponentLabel_t clabel;
   2198 	raidread_component_label(dev, b_vp, &clabel);
   2199 	clabel.mod_counter = mod_counter;
   2200 	clabel.clean = RF_RAID_DIRTY;
   2201 	raidwrite_component_label(dev, b_vp, &clabel);
   2202 	return(0);
   2203 }
   2204 
   2205 /* ARGSUSED */
   2206 int
   2207 raidread_component_label(dev, b_vp, clabel)
   2208 	dev_t dev;
   2209 	struct vnode *b_vp;
   2210 	RF_ComponentLabel_t *clabel;
   2211 {
   2212 	struct buf *bp;
   2213 	int error;
   2214 
   2215 	/* XXX should probably ensure that we don't try to do this if
   2216 	   someone has changed rf_protected_sectors. */
   2217 
   2218 	if (b_vp == NULL) {
   2219 		/* For whatever reason, this component is not valid.
   2220 		   Don't try to read a component label from it. */
   2221 		return(EINVAL);
   2222 	}
   2223 
   2224 	/* get a block of the appropriate size... */
   2225 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2226 	bp->b_dev = dev;
   2227 
   2228 	/* get our ducks in a row for the read */
   2229 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2230 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2231 	bp->b_flags |= B_READ;
   2232  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2233 
   2234 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2235 
   2236 	error = biowait(bp);
   2237 
   2238 	if (!error) {
   2239 		memcpy(clabel, bp->b_data,
   2240 		       sizeof(RF_ComponentLabel_t));
   2241 #if 0
   2242 		rf_print_component_label( clabel );
   2243 #endif
   2244         } else {
   2245 #if 0
   2246 		printf("Failed to read RAID component label!\n");
   2247 #endif
   2248 	}
   2249 
   2250 	brelse(bp);
   2251 	return(error);
   2252 }
   2253 /* ARGSUSED */
   2254 int
   2255 raidwrite_component_label(dev, b_vp, clabel)
   2256 	dev_t dev;
   2257 	struct vnode *b_vp;
   2258 	RF_ComponentLabel_t *clabel;
   2259 {
   2260 	struct buf *bp;
   2261 	int error;
   2262 
   2263 	/* get a block of the appropriate size... */
   2264 	bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
   2265 	bp->b_dev = dev;
   2266 
   2267 	/* get our ducks in a row for the write */
   2268 	bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
   2269 	bp->b_bcount = RF_COMPONENT_INFO_SIZE;
   2270 	bp->b_flags |= B_WRITE;
   2271  	bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
   2272 
   2273 	memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
   2274 
   2275 	memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
   2276 
   2277 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
   2278 	error = biowait(bp);
   2279 	brelse(bp);
   2280 	if (error) {
   2281 #if 1
   2282 		printf("Failed to write RAID component info!\n");
   2283 #endif
   2284 	}
   2285 
   2286 	return(error);
   2287 }
   2288 
   2289 void
   2290 rf_markalldirty(raidPtr)
   2291 	RF_Raid_t *raidPtr;
   2292 {
   2293 	RF_ComponentLabel_t clabel;
   2294 	int r,c;
   2295 
   2296 	raidPtr->mod_counter++;
   2297 	for (r = 0; r < raidPtr->numRow; r++) {
   2298 		for (c = 0; c < raidPtr->numCol; c++) {
   2299 			/* we don't want to touch (at all) a disk that has
   2300 			   failed */
   2301 			if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
   2302 				raidread_component_label(
   2303 					raidPtr->Disks[r][c].dev,
   2304 					raidPtr->raid_cinfo[r][c].ci_vp,
   2305 					&clabel);
   2306 				if (clabel.status == rf_ds_spared) {
   2307 					/* XXX do something special...
   2308 					 but whatever you do, don't
   2309 					 try to access it!! */
   2310 				} else {
   2311 #if 0
   2312 				clabel.status =
   2313 					raidPtr->Disks[r][c].status;
   2314 				raidwrite_component_label(
   2315 					raidPtr->Disks[r][c].dev,
   2316 					raidPtr->raid_cinfo[r][c].ci_vp,
   2317 					&clabel);
   2318 #endif
   2319 				raidmarkdirty(
   2320 				       raidPtr->Disks[r][c].dev,
   2321 				       raidPtr->raid_cinfo[r][c].ci_vp,
   2322 				       raidPtr->mod_counter);
   2323 				}
   2324 			}
   2325 		}
   2326 	}
   2327 	/* printf("Component labels marked dirty.\n"); */
   2328 #if 0
   2329 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2330 		sparecol = raidPtr->numCol + c;
   2331 		if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
   2332 			/*
   2333 
   2334 			   XXX this is where we get fancy and map this spare
   2335 			   into it's correct spot in the array.
   2336 
   2337 			 */
   2338 			/*
   2339 
   2340 			   we claim this disk is "optimal" if it's
   2341 			   rf_ds_used_spare, as that means it should be
   2342 			   directly substitutable for the disk it replaced.
   2343 			   We note that too...
   2344 
   2345 			 */
   2346 
   2347 			for(i=0;i<raidPtr->numRow;i++) {
   2348 				for(j=0;j<raidPtr->numCol;j++) {
   2349 					if ((raidPtr->Disks[i][j].spareRow ==
   2350 					     r) &&
   2351 					    (raidPtr->Disks[i][j].spareCol ==
   2352 					     sparecol)) {
   2353 						srow = r;
   2354 						scol = sparecol;
   2355 						break;
   2356 					}
   2357 				}
   2358 			}
   2359 
   2360 			raidread_component_label(
   2361 				      raidPtr->Disks[r][sparecol].dev,
   2362 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2363 				      &clabel);
   2364 			/* make sure status is noted */
   2365 			clabel.version = RF_COMPONENT_LABEL_VERSION;
   2366 			clabel.mod_counter = raidPtr->mod_counter;
   2367 			clabel.serial_number = raidPtr->serial_number;
   2368 			clabel.row = srow;
   2369 			clabel.column = scol;
   2370 			clabel.num_rows = raidPtr->numRow;
   2371 			clabel.num_columns = raidPtr->numCol;
   2372 			clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
   2373 			clabel.status = rf_ds_optimal;
   2374 			raidwrite_component_label(
   2375 				      raidPtr->Disks[r][sparecol].dev,
   2376 				      raidPtr->raid_cinfo[r][sparecol].ci_vp,
   2377 				      &clabel);
   2378 			raidmarkclean( raidPtr->Disks[r][sparecol].dev,
   2379 			              raidPtr->raid_cinfo[r][sparecol].ci_vp);
   2380 		}
   2381 	}
   2382 
   2383 #endif
   2384 }
   2385 
   2386 
   2387 void
   2388 rf_update_component_labels(raidPtr, final)
   2389 	RF_Raid_t *raidPtr;
   2390 	int final;
   2391 {
   2392 	RF_ComponentLabel_t clabel;
   2393 	int sparecol;
   2394 	int r,c;
   2395 	int i,j;
   2396 	int srow, scol;
   2397 
   2398 	srow = -1;
   2399 	scol = -1;
   2400 
   2401 	/* XXX should do extra checks to make sure things really are clean,
   2402 	   rather than blindly setting the clean bit... */
   2403 
   2404 	raidPtr->mod_counter++;
   2405 
   2406 	for (r = 0; r < raidPtr->numRow; r++) {
   2407 		for (c = 0; c < raidPtr->numCol; c++) {
   2408 			if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
   2409 				raidread_component_label(
   2410 					raidPtr->Disks[r][c].dev,
   2411 					raidPtr->raid_cinfo[r][c].ci_vp,
   2412 					&clabel);
   2413 				/* make sure status is noted */
   2414 				clabel.status = rf_ds_optimal;
   2415 				/* bump the counter */
   2416 				clabel.mod_counter = raidPtr->mod_counter;
   2417 
   2418 				raidwrite_component_label(
   2419 					raidPtr->Disks[r][c].dev,
   2420 					raidPtr->raid_cinfo[r][c].ci_vp,
   2421 					&clabel);
   2422 				if (final == RF_FINAL_COMPONENT_UPDATE) {
   2423 					if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2424 						raidmarkclean(
   2425 							      raidPtr->Disks[r][c].dev,
   2426 							      raidPtr->raid_cinfo[r][c].ci_vp,
   2427 							      raidPtr->mod_counter);
   2428 					}
   2429 				}
   2430 			}
   2431 			/* else we don't touch it.. */
   2432 		}
   2433 	}
   2434 
   2435 	for( c = 0; c < raidPtr->numSpare ; c++) {
   2436 		sparecol = raidPtr->numCol + c;
   2437 		/* Need to ensure that the reconstruct actually completed! */
   2438 		if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
   2439 			/*
   2440 
   2441 			   we claim this disk is "optimal" if it's
   2442 			   rf_ds_used_spare, as that means it should be
   2443 			   directly substitutable for the disk it replaced.
   2444 			   We note that too...
   2445 
   2446 			 */
   2447 
   2448 			for(i=0;i<raidPtr->numRow;i++) {
   2449 				for(j=0;j<raidPtr->numCol;j++) {
   2450 					if ((raidPtr->Disks[i][j].spareRow ==
   2451 					     0) &&
   2452 					    (raidPtr->Disks[i][j].spareCol ==
   2453 					     sparecol)) {
   2454 						srow = i;
   2455 						scol = j;
   2456 						break;
   2457 					}
   2458 				}
   2459 			}
   2460 
   2461 			/* XXX shouldn't *really* need this... */
   2462 			raidread_component_label(
   2463 				      raidPtr->Disks[0][sparecol].dev,
   2464 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2465 				      &clabel);
   2466 			/* make sure status is noted */
   2467 
   2468 			raid_init_component_label(raidPtr, &clabel);
   2469 
   2470 			clabel.mod_counter = raidPtr->mod_counter;
   2471 			clabel.row = srow;
   2472 			clabel.column = scol;
   2473 			clabel.status = rf_ds_optimal;
   2474 
   2475 			raidwrite_component_label(
   2476 				      raidPtr->Disks[0][sparecol].dev,
   2477 				      raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2478 				      &clabel);
   2479 			if (final == RF_FINAL_COMPONENT_UPDATE) {
   2480 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
   2481 					raidmarkclean( raidPtr->Disks[0][sparecol].dev,
   2482 						       raidPtr->raid_cinfo[0][sparecol].ci_vp,
   2483 						       raidPtr->mod_counter);
   2484 				}
   2485 			}
   2486 		}
   2487 	}
   2488 	/* 	printf("Component labels updated\n"); */
   2489 }
   2490 
   2491 void
   2492 rf_close_component(raidPtr, vp, auto_configured)
   2493 	RF_Raid_t *raidPtr;
   2494 	struct vnode *vp;
   2495 	int auto_configured;
   2496 {
   2497 	struct proc *p;
   2498 
   2499 	p = raidPtr->engine_thread;
   2500 
   2501 	if (vp != NULL) {
   2502 		if (auto_configured == 1) {
   2503 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2504 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2505 			vput(vp);
   2506 
   2507 		} else {
   2508 			(void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
   2509 		}
   2510 	} else {
   2511 #if 0
   2512 		printf("vnode was NULL\n");
   2513 #endif
   2514 	}
   2515 }
   2516 
   2517 
   2518 void
   2519 rf_UnconfigureVnodes(raidPtr)
   2520 	RF_Raid_t *raidPtr;
   2521 {
   2522 	int r,c;
   2523 	struct proc *p;
   2524 	struct vnode *vp;
   2525 	int acd;
   2526 
   2527 
   2528 	/* We take this opportunity to close the vnodes like we should.. */
   2529 
   2530 	p = raidPtr->engine_thread;
   2531 
   2532 	for (r = 0; r < raidPtr->numRow; r++) {
   2533 		for (c = 0; c < raidPtr->numCol; c++) {
   2534 #if 0
   2535 			printf("raid%d: Closing vnode for row: %d col: %d\n",
   2536 			       raidPtr->raidid, r, c);
   2537 #endif
   2538 			vp = raidPtr->raid_cinfo[r][c].ci_vp;
   2539 			acd = raidPtr->Disks[r][c].auto_configured;
   2540 			rf_close_component(raidPtr, vp, acd);
   2541 			raidPtr->raid_cinfo[r][c].ci_vp = NULL;
   2542 			raidPtr->Disks[r][c].auto_configured = 0;
   2543 		}
   2544 	}
   2545 	for (r = 0; r < raidPtr->numSpare; r++) {
   2546 #if 0
   2547 		printf("raid%d: Closing vnode for spare: %d\n",
   2548 		       raidPtr->raidid, r);
   2549 #endif
   2550 		vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
   2551 		acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
   2552 		rf_close_component(raidPtr, vp, acd);
   2553 		raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
   2554 		raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
   2555 	}
   2556 }
   2557 
   2558 
   2559 void
   2560 rf_ReconThread(req)
   2561 	struct rf_recon_req *req;
   2562 {
   2563 	int     s;
   2564 	RF_Raid_t *raidPtr;
   2565 
   2566 	s = splbio();
   2567 	raidPtr = (RF_Raid_t *) req->raidPtr;
   2568 	raidPtr->recon_in_progress = 1;
   2569 
   2570 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
   2571 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
   2572 
   2573 	/* XXX get rid of this! we don't need it at all.. */
   2574 	RF_Free(req, sizeof(*req));
   2575 
   2576 	raidPtr->recon_in_progress = 0;
   2577 	splx(s);
   2578 
   2579 	/* That's all... */
   2580 	kthread_exit(0);        /* does not return */
   2581 }
   2582 
   2583 void
   2584 rf_RewriteParityThread(raidPtr)
   2585 	RF_Raid_t *raidPtr;
   2586 {
   2587 	int retcode;
   2588 	int s;
   2589 
   2590 	raidPtr->parity_rewrite_in_progress = 1;
   2591 	s = splbio();
   2592 	retcode = rf_RewriteParity(raidPtr);
   2593 	splx(s);
   2594 	if (retcode) {
   2595 		printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
   2596 	} else {
   2597 		/* set the clean bit!  If we shutdown correctly,
   2598 		   the clean bit on each component label will get
   2599 		   set */
   2600 		raidPtr->parity_good = RF_RAID_CLEAN;
   2601 	}
   2602 	raidPtr->parity_rewrite_in_progress = 0;
   2603 
   2604 	/* Anyone waiting for us to stop?  If so, inform them... */
   2605 	if (raidPtr->waitShutdown) {
   2606 		wakeup(&raidPtr->parity_rewrite_in_progress);
   2607 	}
   2608 
   2609 	/* That's all... */
   2610 	kthread_exit(0);        /* does not return */
   2611 }
   2612 
   2613 
   2614 void
   2615 rf_CopybackThread(raidPtr)
   2616 	RF_Raid_t *raidPtr;
   2617 {
   2618 	int s;
   2619 
   2620 	raidPtr->copyback_in_progress = 1;
   2621 	s = splbio();
   2622 	rf_CopybackReconstructedData(raidPtr);
   2623 	splx(s);
   2624 	raidPtr->copyback_in_progress = 0;
   2625 
   2626 	/* That's all... */
   2627 	kthread_exit(0);        /* does not return */
   2628 }
   2629 
   2630 
   2631 void
   2632 rf_ReconstructInPlaceThread(req)
   2633 	struct rf_recon_req *req;
   2634 {
   2635 	int retcode;
   2636 	int s;
   2637 	RF_Raid_t *raidPtr;
   2638 
   2639 	s = splbio();
   2640 	raidPtr = req->raidPtr;
   2641 	raidPtr->recon_in_progress = 1;
   2642 	retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
   2643 	RF_Free(req, sizeof(*req));
   2644 	raidPtr->recon_in_progress = 0;
   2645 	splx(s);
   2646 
   2647 	/* That's all... */
   2648 	kthread_exit(0);        /* does not return */
   2649 }
   2650 
   2651 void
   2652 rf_mountroot_hook(dev)
   2653 	struct device *dev;
   2654 {
   2655 
   2656 }
   2657 
   2658 
   2659 RF_AutoConfig_t *
   2660 rf_find_raid_components()
   2661 {
   2662 	struct devnametobdevmaj *dtobdm;
   2663 	struct vnode *vp;
   2664 	struct disklabel label;
   2665 	struct device *dv;
   2666 	char *cd_name;
   2667 	dev_t dev;
   2668 	int error;
   2669 	int i;
   2670 	int good_one;
   2671 	RF_ComponentLabel_t *clabel;
   2672 	RF_AutoConfig_t *ac_list;
   2673 	RF_AutoConfig_t *ac;
   2674 
   2675 
   2676 	/* initialize the AutoConfig list */
   2677 	ac_list = NULL;
   2678 
   2679 	/* we begin by trolling through *all* the devices on the system */
   2680 
   2681 	for (dv = alldevs.tqh_first; dv != NULL;
   2682 	     dv = dv->dv_list.tqe_next) {
   2683 
   2684 		/* we are only interested in disks... */
   2685 		if (dv->dv_class != DV_DISK)
   2686 			continue;
   2687 
   2688 		/* we don't care about floppies... */
   2689 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
   2690 			continue;
   2691 		}
   2692 
   2693 		/* we don't care about CD's... */
   2694 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"cd")) {
   2695 			continue;
   2696 		}
   2697 
   2698 		/* hdfd is the Atari/Hades floppy driver */
   2699 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
   2700 			continue;
   2701 		}
   2702 		/* fdisa is the Atari/Milan floppy driver */
   2703 		if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fdisa")) {
   2704 			continue;
   2705 		}
   2706 
   2707 		/* need to find the device_name_to_block_device_major stuff */
   2708 		cd_name = dv->dv_cfdata->cf_driver->cd_name;
   2709 		dtobdm = dev_name2blk;
   2710 		while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
   2711 			dtobdm++;
   2712 		}
   2713 
   2714 		/* get a vnode for the raw partition of this disk */
   2715 
   2716 		dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
   2717 		if (bdevvp(dev, &vp))
   2718 			panic("RAID can't alloc vnode");
   2719 
   2720 		error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2721 
   2722 		if (error) {
   2723 			/* "Who cares."  Continue looking
   2724 			   for something that exists*/
   2725 			vput(vp);
   2726 			continue;
   2727 		}
   2728 
   2729 		/* Ok, the disk exists.  Go get the disklabel. */
   2730 		error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
   2731 				  FREAD, NOCRED, 0);
   2732 		if (error) {
   2733 			/*
   2734 			 * XXX can't happen - open() would
   2735 			 * have errored out (or faked up one)
   2736 			 */
   2737 			printf("can't get label for dev %s%c (%d)!?!?\n",
   2738 			       dv->dv_xname, 'a' + RAW_PART, error);
   2739 		}
   2740 
   2741 		/* don't need this any more.  We'll allocate it again
   2742 		   a little later if we really do... */
   2743 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2744 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2745 		vput(vp);
   2746 
   2747 		for (i=0; i < label.d_npartitions; i++) {
   2748 			/* We only support partitions marked as RAID */
   2749 			if (label.d_partitions[i].p_fstype != FS_RAID)
   2750 				continue;
   2751 
   2752 			dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
   2753 			if (bdevvp(dev, &vp))
   2754 				panic("RAID can't alloc vnode");
   2755 
   2756 			error = VOP_OPEN(vp, FREAD, NOCRED, 0);
   2757 			if (error) {
   2758 				/* Whatever... */
   2759 				vput(vp);
   2760 				continue;
   2761 			}
   2762 
   2763 			good_one = 0;
   2764 
   2765 			clabel = (RF_ComponentLabel_t *)
   2766 				malloc(sizeof(RF_ComponentLabel_t),
   2767 				       M_RAIDFRAME, M_NOWAIT);
   2768 			if (clabel == NULL) {
   2769 				/* XXX CLEANUP HERE */
   2770 				printf("RAID auto config: out of memory!\n");
   2771 				return(NULL); /* XXX probably should panic? */
   2772 			}
   2773 
   2774 			if (!raidread_component_label(dev, vp, clabel)) {
   2775 				/* Got the label.  Does it look reasonable? */
   2776 				if (rf_reasonable_label(clabel) &&
   2777 				    (clabel->partitionSize <=
   2778 				     label.d_partitions[i].p_size)) {
   2779 #if DEBUG
   2780 					printf("Component on: %s%c: %d\n",
   2781 					       dv->dv_xname, 'a'+i,
   2782 					       label.d_partitions[i].p_size);
   2783 					rf_print_component_label(clabel);
   2784 #endif
   2785 					/* if it's reasonable, add it,
   2786 					   else ignore it. */
   2787 					ac = (RF_AutoConfig_t *)
   2788 						malloc(sizeof(RF_AutoConfig_t),
   2789 						       M_RAIDFRAME,
   2790 						       M_NOWAIT);
   2791 					if (ac == NULL) {
   2792 						/* XXX should panic?? */
   2793 						return(NULL);
   2794 					}
   2795 
   2796 					sprintf(ac->devname, "%s%c",
   2797 						dv->dv_xname, 'a'+i);
   2798 					ac->dev = dev;
   2799 					ac->vp = vp;
   2800 					ac->clabel = clabel;
   2801 					ac->next = ac_list;
   2802 					ac_list = ac;
   2803 					good_one = 1;
   2804 				}
   2805 			}
   2806 			if (!good_one) {
   2807 				/* cleanup */
   2808 				free(clabel, M_RAIDFRAME);
   2809 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   2810 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
   2811 				vput(vp);
   2812 			}
   2813 		}
   2814 	}
   2815 	return(ac_list);
   2816 }
   2817 
   2818 static int
   2819 rf_reasonable_label(clabel)
   2820 	RF_ComponentLabel_t *clabel;
   2821 {
   2822 
   2823 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
   2824 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
   2825 	    ((clabel->clean == RF_RAID_CLEAN) ||
   2826 	     (clabel->clean == RF_RAID_DIRTY)) &&
   2827 	    clabel->row >=0 &&
   2828 	    clabel->column >= 0 &&
   2829 	    clabel->num_rows > 0 &&
   2830 	    clabel->num_columns > 0 &&
   2831 	    clabel->row < clabel->num_rows &&
   2832 	    clabel->column < clabel->num_columns &&
   2833 	    clabel->blockSize > 0 &&
   2834 	    clabel->numBlocks > 0) {
   2835 		/* label looks reasonable enough... */
   2836 		return(1);
   2837 	}
   2838 	return(0);
   2839 }
   2840 
   2841 
   2842 void
   2843 rf_print_component_label(clabel)
   2844 	RF_ComponentLabel_t *clabel;
   2845 {
   2846 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
   2847 	       clabel->row, clabel->column,
   2848 	       clabel->num_rows, clabel->num_columns);
   2849 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
   2850 	       clabel->version, clabel->serial_number,
   2851 	       clabel->mod_counter);
   2852 	printf("   Clean: %s Status: %d\n",
   2853 	       clabel->clean ? "Yes" : "No", clabel->status );
   2854 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
   2855 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
   2856 	printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
   2857 	       (char) clabel->parityConfig, clabel->blockSize,
   2858 	       clabel->numBlocks);
   2859 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
   2860 	printf("   Contains root partition: %s\n",
   2861 	       clabel->root_partition ? "Yes" : "No" );
   2862 	printf("   Last configured as: raid%d\n", clabel->last_unit );
   2863 #if 0
   2864 	   printf("   Config order: %d\n", clabel->config_order);
   2865 #endif
   2866 
   2867 }
   2868 
   2869 RF_ConfigSet_t *
   2870 rf_create_auto_sets(ac_list)
   2871 	RF_AutoConfig_t *ac_list;
   2872 {
   2873 	RF_AutoConfig_t *ac;
   2874 	RF_ConfigSet_t *config_sets;
   2875 	RF_ConfigSet_t *cset;
   2876 	RF_AutoConfig_t *ac_next;
   2877 
   2878 
   2879 	config_sets = NULL;
   2880 
   2881 	/* Go through the AutoConfig list, and figure out which components
   2882 	   belong to what sets.  */
   2883 	ac = ac_list;
   2884 	while(ac!=NULL) {
   2885 		/* we're going to putz with ac->next, so save it here
   2886 		   for use at the end of the loop */
   2887 		ac_next = ac->next;
   2888 
   2889 		if (config_sets == NULL) {
   2890 			/* will need at least this one... */
   2891 			config_sets = (RF_ConfigSet_t *)
   2892 				malloc(sizeof(RF_ConfigSet_t),
   2893 				       M_RAIDFRAME, M_NOWAIT);
   2894 			if (config_sets == NULL) {
   2895 				panic("rf_create_auto_sets: No memory!\n");
   2896 			}
   2897 			/* this one is easy :) */
   2898 			config_sets->ac = ac;
   2899 			config_sets->next = NULL;
   2900 			config_sets->rootable = 0;
   2901 			ac->next = NULL;
   2902 		} else {
   2903 			/* which set does this component fit into? */
   2904 			cset = config_sets;
   2905 			while(cset!=NULL) {
   2906 				if (rf_does_it_fit(cset, ac)) {
   2907 					/* looks like it matches... */
   2908 					ac->next = cset->ac;
   2909 					cset->ac = ac;
   2910 					break;
   2911 				}
   2912 				cset = cset->next;
   2913 			}
   2914 			if (cset==NULL) {
   2915 				/* didn't find a match above... new set..*/
   2916 				cset = (RF_ConfigSet_t *)
   2917 					malloc(sizeof(RF_ConfigSet_t),
   2918 					       M_RAIDFRAME, M_NOWAIT);
   2919 				if (cset == NULL) {
   2920 					panic("rf_create_auto_sets: No memory!\n");
   2921 				}
   2922 				cset->ac = ac;
   2923 				ac->next = NULL;
   2924 				cset->next = config_sets;
   2925 				cset->rootable = 0;
   2926 				config_sets = cset;
   2927 			}
   2928 		}
   2929 		ac = ac_next;
   2930 	}
   2931 
   2932 
   2933 	return(config_sets);
   2934 }
   2935 
   2936 static int
   2937 rf_does_it_fit(cset, ac)
   2938 	RF_ConfigSet_t *cset;
   2939 	RF_AutoConfig_t *ac;
   2940 {
   2941 	RF_ComponentLabel_t *clabel1, *clabel2;
   2942 
   2943 	/* If this one matches the *first* one in the set, that's good
   2944 	   enough, since the other members of the set would have been
   2945 	   through here too... */
   2946 	/* note that we are not checking partitionSize here..
   2947 
   2948 	   Note that we are also not checking the mod_counters here.
   2949 	   If everything else matches execpt the mod_counter, that's
   2950 	   good enough for this test.  We will deal with the mod_counters
   2951 	   a little later in the autoconfiguration process.
   2952 
   2953 	    (clabel1->mod_counter == clabel2->mod_counter) &&
   2954 
   2955 	   The reason we don't check for this is that failed disks
   2956 	   will have lower modification counts.  If those disks are
   2957 	   not added to the set they used to belong to, then they will
   2958 	   form their own set, which may result in 2 different sets,
   2959 	   for example, competing to be configured at raid0, and
   2960 	   perhaps competing to be the root filesystem set.  If the
   2961 	   wrong ones get configured, or both attempt to become /,
   2962 	   weird behaviour and or serious lossage will occur.  Thus we
   2963 	   need to bring them into the fold here, and kick them out at
   2964 	   a later point.
   2965 
   2966 	*/
   2967 
   2968 	clabel1 = cset->ac->clabel;
   2969 	clabel2 = ac->clabel;
   2970 	if ((clabel1->version == clabel2->version) &&
   2971 	    (clabel1->serial_number == clabel2->serial_number) &&
   2972 	    (clabel1->num_rows == clabel2->num_rows) &&
   2973 	    (clabel1->num_columns == clabel2->num_columns) &&
   2974 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
   2975 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
   2976 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
   2977 	    (clabel1->parityConfig == clabel2->parityConfig) &&
   2978 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
   2979 	    (clabel1->blockSize == clabel2->blockSize) &&
   2980 	    (clabel1->numBlocks == clabel2->numBlocks) &&
   2981 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
   2982 	    (clabel1->root_partition == clabel2->root_partition) &&
   2983 	    (clabel1->last_unit == clabel2->last_unit) &&
   2984 	    (clabel1->config_order == clabel2->config_order)) {
   2985 		/* if it get's here, it almost *has* to be a match */
   2986 	} else {
   2987 		/* it's not consistent with somebody in the set..
   2988 		   punt */
   2989 		return(0);
   2990 	}
   2991 	/* all was fine.. it must fit... */
   2992 	return(1);
   2993 }
   2994 
   2995 int
   2996 rf_have_enough_components(cset)
   2997 	RF_ConfigSet_t *cset;
   2998 {
   2999 	RF_AutoConfig_t *ac;
   3000 	RF_AutoConfig_t *auto_config;
   3001 	RF_ComponentLabel_t *clabel;
   3002 	int r,c;
   3003 	int num_rows;
   3004 	int num_cols;
   3005 	int num_missing;
   3006 	int mod_counter;
   3007 	int mod_counter_found;
   3008 	int even_pair_failed;
   3009 	char parity_type;
   3010 
   3011 
   3012 	/* check to see that we have enough 'live' components
   3013 	   of this set.  If so, we can configure it if necessary */
   3014 
   3015 	num_rows = cset->ac->clabel->num_rows;
   3016 	num_cols = cset->ac->clabel->num_columns;
   3017 	parity_type = cset->ac->clabel->parityConfig;
   3018 
   3019 	/* XXX Check for duplicate components!?!?!? */
   3020 
   3021 	/* Determine what the mod_counter is supposed to be for this set. */
   3022 
   3023 	mod_counter_found = 0;
   3024 	mod_counter = 0;
   3025 	ac = cset->ac;
   3026 	while(ac!=NULL) {
   3027 		if (mod_counter_found==0) {
   3028 			mod_counter = ac->clabel->mod_counter;
   3029 			mod_counter_found = 1;
   3030 		} else {
   3031 			if (ac->clabel->mod_counter > mod_counter) {
   3032 				mod_counter = ac->clabel->mod_counter;
   3033 			}
   3034 		}
   3035 		ac = ac->next;
   3036 	}
   3037 
   3038 	num_missing = 0;
   3039 	auto_config = cset->ac;
   3040 
   3041 	for(r=0; r<num_rows; r++) {
   3042 		even_pair_failed = 0;
   3043 		for(c=0; c<num_cols; c++) {
   3044 			ac = auto_config;
   3045 			while(ac!=NULL) {
   3046 				if ((ac->clabel->row == r) &&
   3047 				    (ac->clabel->column == c) &&
   3048 				    (ac->clabel->mod_counter == mod_counter)) {
   3049 					/* it's this one... */
   3050 #if DEBUG
   3051 					printf("Found: %s at %d,%d\n",
   3052 					       ac->devname,r,c);
   3053 #endif
   3054 					break;
   3055 				}
   3056 				ac=ac->next;
   3057 			}
   3058 			if (ac==NULL) {
   3059 				/* Didn't find one here! */
   3060 				/* special case for RAID 1, especially
   3061 				   where there are more than 2
   3062 				   components (where RAIDframe treats
   3063 				   things a little differently :( ) */
   3064 				if (parity_type == '1') {
   3065 					if (c%2 == 0) { /* even component */
   3066 						even_pair_failed = 1;
   3067 					} else { /* odd component.  If
   3068                                                     we're failed, and
   3069                                                     so is the even
   3070                                                     component, it's
   3071                                                     "Good Night, Charlie" */
   3072 						if (even_pair_failed == 1) {
   3073 							return(0);
   3074 						}
   3075 					}
   3076 				} else {
   3077 					/* normal accounting */
   3078 					num_missing++;
   3079 				}
   3080 			}
   3081 			if ((parity_type == '1') && (c%2 == 1)) {
   3082 				/* Just did an even component, and we didn't
   3083 				   bail.. reset the even_pair_failed flag,
   3084 				   and go on to the next component.... */
   3085 				even_pair_failed = 0;
   3086 			}
   3087 		}
   3088 	}
   3089 
   3090 	clabel = cset->ac->clabel;
   3091 
   3092 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
   3093 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
   3094 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
   3095 		/* XXX this needs to be made *much* more general */
   3096 		/* Too many failures */
   3097 		return(0);
   3098 	}
   3099 	/* otherwise, all is well, and we've got enough to take a kick
   3100 	   at autoconfiguring this set */
   3101 	return(1);
   3102 }
   3103 
   3104 void
   3105 rf_create_configuration(ac,config,raidPtr)
   3106 	RF_AutoConfig_t *ac;
   3107 	RF_Config_t *config;
   3108 	RF_Raid_t *raidPtr;
   3109 {
   3110 	RF_ComponentLabel_t *clabel;
   3111 	int i;
   3112 
   3113 	clabel = ac->clabel;
   3114 
   3115 	/* 1. Fill in the common stuff */
   3116 	config->numRow = clabel->num_rows;
   3117 	config->numCol = clabel->num_columns;
   3118 	config->numSpare = 0; /* XXX should this be set here? */
   3119 	config->sectPerSU = clabel->sectPerSU;
   3120 	config->SUsPerPU = clabel->SUsPerPU;
   3121 	config->SUsPerRU = clabel->SUsPerRU;
   3122 	config->parityConfig = clabel->parityConfig;
   3123 	/* XXX... */
   3124 	strcpy(config->diskQueueType,"fifo");
   3125 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
   3126 	config->layoutSpecificSize = 0; /* XXX ?? */
   3127 
   3128 	while(ac!=NULL) {
   3129 		/* row/col values will be in range due to the checks
   3130 		   in reasonable_label() */
   3131 		strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
   3132 		       ac->devname);
   3133 		ac = ac->next;
   3134 	}
   3135 
   3136 	for(i=0;i<RF_MAXDBGV;i++) {
   3137 		config->debugVars[i][0] = NULL;
   3138 	}
   3139 }
   3140 
   3141 int
   3142 rf_set_autoconfig(raidPtr, new_value)
   3143 	RF_Raid_t *raidPtr;
   3144 	int new_value;
   3145 {
   3146 	RF_ComponentLabel_t clabel;
   3147 	struct vnode *vp;
   3148 	dev_t dev;
   3149 	int row, column;
   3150 
   3151 	raidPtr->autoconfigure = new_value;
   3152 	for(row=0; row<raidPtr->numRow; row++) {
   3153 		for(column=0; column<raidPtr->numCol; column++) {
   3154 			if (raidPtr->Disks[row][column].status ==
   3155 			    rf_ds_optimal) {
   3156 				dev = raidPtr->Disks[row][column].dev;
   3157 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3158 				raidread_component_label(dev, vp, &clabel);
   3159 				clabel.autoconfigure = new_value;
   3160 				raidwrite_component_label(dev, vp, &clabel);
   3161 			}
   3162 		}
   3163 	}
   3164 	return(new_value);
   3165 }
   3166 
   3167 int
   3168 rf_set_rootpartition(raidPtr, new_value)
   3169 	RF_Raid_t *raidPtr;
   3170 	int new_value;
   3171 {
   3172 	RF_ComponentLabel_t clabel;
   3173 	struct vnode *vp;
   3174 	dev_t dev;
   3175 	int row, column;
   3176 
   3177 	raidPtr->root_partition = new_value;
   3178 	for(row=0; row<raidPtr->numRow; row++) {
   3179 		for(column=0; column<raidPtr->numCol; column++) {
   3180 			if (raidPtr->Disks[row][column].status ==
   3181 			    rf_ds_optimal) {
   3182 				dev = raidPtr->Disks[row][column].dev;
   3183 				vp = raidPtr->raid_cinfo[row][column].ci_vp;
   3184 				raidread_component_label(dev, vp, &clabel);
   3185 				clabel.root_partition = new_value;
   3186 				raidwrite_component_label(dev, vp, &clabel);
   3187 			}
   3188 		}
   3189 	}
   3190 	return(new_value);
   3191 }
   3192 
   3193 void
   3194 rf_release_all_vps(cset)
   3195 	RF_ConfigSet_t *cset;
   3196 {
   3197 	RF_AutoConfig_t *ac;
   3198 
   3199 	ac = cset->ac;
   3200 	while(ac!=NULL) {
   3201 		/* Close the vp, and give it back */
   3202 		if (ac->vp) {
   3203 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
   3204 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
   3205 			vput(ac->vp);
   3206 			ac->vp = NULL;
   3207 		}
   3208 		ac = ac->next;
   3209 	}
   3210 }
   3211 
   3212 
   3213 void
   3214 rf_cleanup_config_set(cset)
   3215 	RF_ConfigSet_t *cset;
   3216 {
   3217 	RF_AutoConfig_t *ac;
   3218 	RF_AutoConfig_t *next_ac;
   3219 
   3220 	ac = cset->ac;
   3221 	while(ac!=NULL) {
   3222 		next_ac = ac->next;
   3223 		/* nuke the label */
   3224 		free(ac->clabel, M_RAIDFRAME);
   3225 		/* cleanup the config structure */
   3226 		free(ac, M_RAIDFRAME);
   3227 		/* "next.." */
   3228 		ac = next_ac;
   3229 	}
   3230 	/* and, finally, nuke the config set */
   3231 	free(cset, M_RAIDFRAME);
   3232 }
   3233 
   3234 
   3235 void
   3236 raid_init_component_label(raidPtr, clabel)
   3237 	RF_Raid_t *raidPtr;
   3238 	RF_ComponentLabel_t *clabel;
   3239 {
   3240 	/* current version number */
   3241 	clabel->version = RF_COMPONENT_LABEL_VERSION;
   3242 	clabel->serial_number = raidPtr->serial_number;
   3243 	clabel->mod_counter = raidPtr->mod_counter;
   3244 	clabel->num_rows = raidPtr->numRow;
   3245 	clabel->num_columns = raidPtr->numCol;
   3246 	clabel->clean = RF_RAID_DIRTY; /* not clean */
   3247 	clabel->status = rf_ds_optimal; /* "It's good!" */
   3248 
   3249 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
   3250 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
   3251 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
   3252 
   3253 	clabel->blockSize = raidPtr->bytesPerSector;
   3254 	clabel->numBlocks = raidPtr->sectorsPerDisk;
   3255 
   3256 	/* XXX not portable */
   3257 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
   3258 	clabel->maxOutstanding = raidPtr->maxOutstanding;
   3259 	clabel->autoconfigure = raidPtr->autoconfigure;
   3260 	clabel->root_partition = raidPtr->root_partition;
   3261 	clabel->last_unit = raidPtr->raidid;
   3262 	clabel->config_order = raidPtr->config_order;
   3263 }
   3264 
   3265 int
   3266 rf_auto_config_set(cset,unit)
   3267 	RF_ConfigSet_t *cset;
   3268 	int *unit;
   3269 {
   3270 	RF_Raid_t *raidPtr;
   3271 	RF_Config_t *config;
   3272 	int raidID;
   3273 	int retcode;
   3274 
   3275 #if DEBUG
   3276 	printf("RAID autoconfigure\n");
   3277 #endif
   3278 
   3279 	retcode = 0;
   3280 	*unit = -1;
   3281 
   3282 	/* 1. Create a config structure */
   3283 
   3284 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
   3285 				       M_RAIDFRAME,
   3286 				       M_NOWAIT);
   3287 	if (config==NULL) {
   3288 		printf("Out of mem!?!?\n");
   3289 				/* XXX do something more intelligent here. */
   3290 		return(1);
   3291 	}
   3292 
   3293 	memset(config, 0, sizeof(RF_Config_t));
   3294 
   3295 	/* XXX raidID needs to be set correctly.. */
   3296 
   3297 	/*
   3298 	   2. Figure out what RAID ID this one is supposed to live at
   3299 	   See if we can get the same RAID dev that it was configured
   3300 	   on last time..
   3301 	*/
   3302 
   3303 	raidID = cset->ac->clabel->last_unit;
   3304 	if ((raidID < 0) || (raidID >= numraid)) {
   3305 		/* let's not wander off into lala land. */
   3306 		raidID = numraid - 1;
   3307 	}
   3308 	if (raidPtrs[raidID]->valid != 0) {
   3309 
   3310 		/*
   3311 		   Nope... Go looking for an alternative...
   3312 		   Start high so we don't immediately use raid0 if that's
   3313 		   not taken.
   3314 		*/
   3315 
   3316 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
   3317 			if (raidPtrs[raidID]->valid == 0) {
   3318 				/* can use this one! */
   3319 				break;
   3320 			}
   3321 		}
   3322 	}
   3323 
   3324 	if (raidID < 0) {
   3325 		/* punt... */
   3326 		printf("Unable to auto configure this set!\n");
   3327 		printf("(Out of RAID devs!)\n");
   3328 		return(1);
   3329 	}
   3330 
   3331 #if DEBUG
   3332 	printf("Configuring raid%d:\n",raidID);
   3333 #endif
   3334 
   3335 	raidPtr = raidPtrs[raidID];
   3336 
   3337 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
   3338 	raidPtr->raidid = raidID;
   3339 	raidPtr->openings = RAIDOUTSTANDING;
   3340 
   3341 	/* 3. Build the configuration structure */
   3342 	rf_create_configuration(cset->ac, config, raidPtr);
   3343 
   3344 	/* 4. Do the configuration */
   3345 	retcode = rf_Configure(raidPtr, config, cset->ac);
   3346 
   3347 	if (retcode == 0) {
   3348 
   3349 		raidinit(raidPtrs[raidID]);
   3350 
   3351 		rf_markalldirty(raidPtrs[raidID]);
   3352 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
   3353 		if (cset->ac->clabel->root_partition==1) {
   3354 			/* everything configured just fine.  Make a note
   3355 			   that this set is eligible to be root. */
   3356 			cset->rootable = 1;
   3357 			/* XXX do this here? */
   3358 			raidPtrs[raidID]->root_partition = 1;
   3359 		}
   3360 	}
   3361 
   3362 	/* 5. Cleanup */
   3363 	free(config, M_RAIDFRAME);
   3364 
   3365 	*unit = raidID;
   3366 	return(retcode);
   3367 }
   3368 
   3369 void
   3370 rf_disk_unbusy(desc)
   3371 	RF_RaidAccessDesc_t *desc;
   3372 {
   3373 	struct buf *bp;
   3374 
   3375 	bp = (struct buf *)desc->bp;
   3376 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
   3377 			    (bp->b_bcount - bp->b_resid));
   3378 }
   3379